dispatcher.cc revision 11534
15661Sgblack@eecs.umich.edu/* 25661Sgblack@eecs.umich.edu * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 35661Sgblack@eecs.umich.edu * All rights reserved. 47087Snate@binkert.org * 57087Snate@binkert.org * For use for simulation and test purposes only 67087Snate@binkert.org * 77087Snate@binkert.org * Redistribution and use in source and binary forms, with or without 87087Snate@binkert.org * modification, are permitted provided that the following conditions are met: 97087Snate@binkert.org * 107087Snate@binkert.org * 1. Redistributions of source code must retain the above copyright notice, 117087Snate@binkert.org * this list of conditions and the following disclaimer. 125661Sgblack@eecs.umich.edu * 137087Snate@binkert.org * 2. Redistributions in binary form must reproduce the above copyright notice, 147087Snate@binkert.org * this list of conditions and the following disclaimer in the documentation 157087Snate@binkert.org * and/or other materials provided with the distribution. 167087Snate@binkert.org * 177087Snate@binkert.org * 3. Neither the name of the copyright holder nor the names of its contributors 187087Snate@binkert.org * may be used to endorse or promote products derived from this software 197087Snate@binkert.org * without specific prior written permission. 207087Snate@binkert.org * 215661Sgblack@eecs.umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 227087Snate@binkert.org * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 235661Sgblack@eecs.umich.edu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 245661Sgblack@eecs.umich.edu * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 255661Sgblack@eecs.umich.edu * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 265661Sgblack@eecs.umich.edu * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 275661Sgblack@eecs.umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 285661Sgblack@eecs.umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 295661Sgblack@eecs.umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 305661Sgblack@eecs.umich.edu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 315661Sgblack@eecs.umich.edu * POSSIBILITY OF SUCH DAMAGE. 325661Sgblack@eecs.umich.edu * 335661Sgblack@eecs.umich.edu * Author: Brad Beckmann, Marc Orr 345661Sgblack@eecs.umich.edu */ 355661Sgblack@eecs.umich.edu 365661Sgblack@eecs.umich.edu 375661Sgblack@eecs.umich.edu#include "gpu-compute/dispatcher.hh" 385661Sgblack@eecs.umich.edu 395661Sgblack@eecs.umich.edu#include "cpu/base.hh" 405661Sgblack@eecs.umich.edu#include "debug/GPUDisp.hh" 415661Sgblack@eecs.umich.edu#include "gpu-compute/cl_driver.hh" 425661Sgblack@eecs.umich.edu#include "gpu-compute/cl_event.hh" 435661Sgblack@eecs.umich.edu#include "gpu-compute/shader.hh" 445661Sgblack@eecs.umich.edu#include "gpu-compute/wavefront.hh" 455661Sgblack@eecs.umich.edu#include "mem/packet_access.hh" 465661Sgblack@eecs.umich.edu 475661Sgblack@eecs.umich.eduGpuDispatcher *GpuDispatcher::instance = nullptr; 485661Sgblack@eecs.umich.edu 495661Sgblack@eecs.umich.eduGpuDispatcher::GpuDispatcher(const Params *p) 505661Sgblack@eecs.umich.edu : DmaDevice(p), _masterId(p->system->getMasterId(name() + ".disp")), 515661Sgblack@eecs.umich.edu pioAddr(p->pio_addr), pioSize(4096), pioDelay(p->pio_latency), 525661Sgblack@eecs.umich.edu dispatchCount(0), dispatchActive(false), cpu(p->cpu), 535661Sgblack@eecs.umich.edu shader(p->shader_pointer), driver(p->cl_driver), tickEvent(this) 545661Sgblack@eecs.umich.edu{ 555661Sgblack@eecs.umich.edu shader->handshake(this); 565661Sgblack@eecs.umich.edu driver->handshake(this); 575661Sgblack@eecs.umich.edu 585661Sgblack@eecs.umich.edu ndRange.wg_disp_rem = false; 595661Sgblack@eecs.umich.edu ndRange.globalWgId = 0; 605661Sgblack@eecs.umich.edu 615661Sgblack@eecs.umich.edu schedule(&tickEvent, 0); 625661Sgblack@eecs.umich.edu 635661Sgblack@eecs.umich.edu // translation port for the dispatcher 645661Sgblack@eecs.umich.edu tlbPort = new TLBPort(csprintf("%s-port%d", name()), this); 655661Sgblack@eecs.umich.edu 665661Sgblack@eecs.umich.edu num_kernelLaunched 675661Sgblack@eecs.umich.edu .name(name() + ".num_kernel_launched") 685661Sgblack@eecs.umich.edu .desc("number of kernel launched") 695661Sgblack@eecs.umich.edu ; 705661Sgblack@eecs.umich.edu} 715661Sgblack@eecs.umich.edu 725661Sgblack@eecs.umich.eduGpuDispatcher *GpuDispatcherParams::create() 735661Sgblack@eecs.umich.edu{ 745661Sgblack@eecs.umich.edu GpuDispatcher *dispatcher = new GpuDispatcher(this); 755661Sgblack@eecs.umich.edu GpuDispatcher::setInstance(dispatcher); 765661Sgblack@eecs.umich.edu 775661Sgblack@eecs.umich.edu return GpuDispatcher::getInstance(); 785661Sgblack@eecs.umich.edu} 795661Sgblack@eecs.umich.edu 805661Sgblack@eecs.umich.eduvoid 815661Sgblack@eecs.umich.eduGpuDispatcher::serialize(CheckpointOut &cp) const 825661Sgblack@eecs.umich.edu{ 835661Sgblack@eecs.umich.edu Tick event_tick = 0; 845661Sgblack@eecs.umich.edu 855661Sgblack@eecs.umich.edu if (ndRange.wg_disp_rem) 865661Sgblack@eecs.umich.edu fatal("Checkpointing not supported during active workgroup execution"); 875661Sgblack@eecs.umich.edu 885661Sgblack@eecs.umich.edu if (tickEvent.scheduled()) 895661Sgblack@eecs.umich.edu event_tick = tickEvent.when(); 905661Sgblack@eecs.umich.edu 915661Sgblack@eecs.umich.edu SERIALIZE_SCALAR(event_tick); 925661Sgblack@eecs.umich.edu 935661Sgblack@eecs.umich.edu} 945661Sgblack@eecs.umich.edu 955661Sgblack@eecs.umich.eduvoid 965661Sgblack@eecs.umich.eduGpuDispatcher::unserialize(CheckpointIn &cp) 975661Sgblack@eecs.umich.edu{ 985661Sgblack@eecs.umich.edu Tick event_tick; 995661Sgblack@eecs.umich.edu 1005661Sgblack@eecs.umich.edu if (tickEvent.scheduled()) 1015661Sgblack@eecs.umich.edu deschedule(&tickEvent); 1025661Sgblack@eecs.umich.edu 1035661Sgblack@eecs.umich.edu UNSERIALIZE_SCALAR(event_tick); 1045661Sgblack@eecs.umich.edu 1055661Sgblack@eecs.umich.edu if (event_tick) 1065661Sgblack@eecs.umich.edu schedule(&tickEvent, event_tick); 1075661Sgblack@eecs.umich.edu} 1085661Sgblack@eecs.umich.edu 1095661Sgblack@eecs.umich.eduAddrRangeList 1105661Sgblack@eecs.umich.eduGpuDispatcher::getAddrRanges() const 1115661Sgblack@eecs.umich.edu{ 1125661Sgblack@eecs.umich.edu AddrRangeList ranges; 1135661Sgblack@eecs.umich.edu 1145661Sgblack@eecs.umich.edu DPRINTF(GPUDisp, "dispatcher registering addr range at %#x size %#x\n", 1155661Sgblack@eecs.umich.edu pioAddr, pioSize); 1165661Sgblack@eecs.umich.edu 1175661Sgblack@eecs.umich.edu ranges.push_back(RangeSize(pioAddr, pioSize)); 1185661Sgblack@eecs.umich.edu 1195661Sgblack@eecs.umich.edu return ranges; 1205661Sgblack@eecs.umich.edu} 1215661Sgblack@eecs.umich.edu 1225661Sgblack@eecs.umich.eduTick 1235661Sgblack@eecs.umich.eduGpuDispatcher::read(PacketPtr pkt) 1245661Sgblack@eecs.umich.edu{ 1255661Sgblack@eecs.umich.edu assert(pkt->getAddr() >= pioAddr); 1265661Sgblack@eecs.umich.edu assert(pkt->getAddr() < pioAddr + pioSize); 1275661Sgblack@eecs.umich.edu 1285661Sgblack@eecs.umich.edu int offset = pkt->getAddr() - pioAddr; 1295661Sgblack@eecs.umich.edu pkt->allocate(); 1305661Sgblack@eecs.umich.edu 1315661Sgblack@eecs.umich.edu DPRINTF(GPUDisp, " read register %#x size=%d\n", offset, pkt->getSize()); 1325661Sgblack@eecs.umich.edu 1335661Sgblack@eecs.umich.edu if (offset < 8) { 1345661Sgblack@eecs.umich.edu assert(!offset); 1355661Sgblack@eecs.umich.edu assert(pkt->getSize() == 8); 1365661Sgblack@eecs.umich.edu 1375661Sgblack@eecs.umich.edu uint64_t retval = dispatchActive; 1385661Sgblack@eecs.umich.edu pkt->set(retval); 1395661Sgblack@eecs.umich.edu } else { 1405661Sgblack@eecs.umich.edu offset -= 8; 1415661Sgblack@eecs.umich.edu assert(offset + pkt->getSize() < sizeof(HsaQueueEntry)); 1425661Sgblack@eecs.umich.edu char *curTaskPtr = (char*)&curTask; 1435661Sgblack@eecs.umich.edu 1445661Sgblack@eecs.umich.edu memcpy(pkt->getPtr<const void*>(), curTaskPtr + offset, pkt->getSize()); 1455661Sgblack@eecs.umich.edu } 1465661Sgblack@eecs.umich.edu 1475661Sgblack@eecs.umich.edu pkt->makeAtomicResponse(); 1485661Sgblack@eecs.umich.edu 1495661Sgblack@eecs.umich.edu return pioDelay; 1505661Sgblack@eecs.umich.edu} 1515661Sgblack@eecs.umich.edu 1525661Sgblack@eecs.umich.eduTick 1535661Sgblack@eecs.umich.eduGpuDispatcher::write(PacketPtr pkt) 1545662Sgblack@eecs.umich.edu{ 1555661Sgblack@eecs.umich.edu assert(pkt->getAddr() >= pioAddr); 1565661Sgblack@eecs.umich.edu assert(pkt->getAddr() < pioAddr + pioSize); 1575661Sgblack@eecs.umich.edu 1585661Sgblack@eecs.umich.edu int offset = pkt->getAddr() - pioAddr; 1595661Sgblack@eecs.umich.edu 1605661Sgblack@eecs.umich.edu#if TRACING_ON 1615661Sgblack@eecs.umich.edu uint64_t data_val = 0; 1625661Sgblack@eecs.umich.edu 1635661Sgblack@eecs.umich.edu switch (pkt->getSize()) { 1645661Sgblack@eecs.umich.edu case 1: 1655661Sgblack@eecs.umich.edu data_val = pkt->get<uint8_t>(); 1665788Sgblack@eecs.umich.edu break; 1675661Sgblack@eecs.umich.edu case 2: 1685661Sgblack@eecs.umich.edu data_val = pkt->get<uint16_t>(); 1695661Sgblack@eecs.umich.edu break; 1705661Sgblack@eecs.umich.edu case 4: 1715661Sgblack@eecs.umich.edu data_val = pkt->get<uint32_t>(); 1725661Sgblack@eecs.umich.edu break; 1735661Sgblack@eecs.umich.edu case 8: 1745662Sgblack@eecs.umich.edu data_val = pkt->get<uint64_t>(); 1755662Sgblack@eecs.umich.edu break; 1765662Sgblack@eecs.umich.edu default: 1775662Sgblack@eecs.umich.edu DPRINTF(GPUDisp, "bad size %d\n", pkt->getSize()); 1785662Sgblack@eecs.umich.edu } 1795662Sgblack@eecs.umich.edu 1805692Sgblack@eecs.umich.edu DPRINTF(GPUDisp, "write register %#x value %#x size=%d\n", offset, data_val, 1815662Sgblack@eecs.umich.edu pkt->getSize()); 1825662Sgblack@eecs.umich.edu#endif 1835662Sgblack@eecs.umich.edu if (!offset) { 1845663Sgblack@eecs.umich.edu static int nextId = 0; 1855663Sgblack@eecs.umich.edu 1865663Sgblack@eecs.umich.edu // The depends field of the qstruct, which was previously unused, is 1875663Sgblack@eecs.umich.edu // used to communicate with simulated application. 1885663Sgblack@eecs.umich.edu if (curTask.depends) { 1895663Sgblack@eecs.umich.edu HostState hs; 1905663Sgblack@eecs.umich.edu shader->ReadMem((uint64_t)(curTask.depends), &hs, 1915663Sgblack@eecs.umich.edu sizeof(HostState), 0); 1925663Sgblack@eecs.umich.edu 1935663Sgblack@eecs.umich.edu // update event start time (in nano-seconds) 1945663Sgblack@eecs.umich.edu uint64_t start = curTick() / 1000; 1955663Sgblack@eecs.umich.edu 1965663Sgblack@eecs.umich.edu shader->WriteMem((uint64_t)(&((_cl_event*)hs.event)->start), 1975663Sgblack@eecs.umich.edu &start, sizeof(uint64_t), 0); 1985663Sgblack@eecs.umich.edu } 1995663Sgblack@eecs.umich.edu 2005692Sgblack@eecs.umich.edu // launch kernel 2015663Sgblack@eecs.umich.edu ++num_kernelLaunched; 2025663Sgblack@eecs.umich.edu 2035663Sgblack@eecs.umich.edu NDRange *ndr = &(ndRangeMap[nextId]); 2045661Sgblack@eecs.umich.edu // copy dispatch info 2055661Sgblack@eecs.umich.edu ndr->q = curTask; 2065661Sgblack@eecs.umich.edu 2075661Sgblack@eecs.umich.edu // update the numDispTask polled by the runtime 2085661Sgblack@eecs.umich.edu accessUserVar(cpu, (uint64_t)(curTask.numDispLeft), 0, 1); 2095661Sgblack@eecs.umich.edu 2105661Sgblack@eecs.umich.edu ndr->numWgTotal = 1; 2115661Sgblack@eecs.umich.edu 2125661Sgblack@eecs.umich.edu for (int i = 0; i < 3; ++i) { 2135661Sgblack@eecs.umich.edu ndr->wgId[i] = 0; 2145661Sgblack@eecs.umich.edu ndr->numWg[i] = divCeil(curTask.gdSize[i], curTask.wgSize[i]); 2155661Sgblack@eecs.umich.edu ndr->numWgTotal *= ndr->numWg[i]; 2165661Sgblack@eecs.umich.edu } 2175661Sgblack@eecs.umich.edu 2185661Sgblack@eecs.umich.edu ndr->numWgCompleted = 0; 2195663Sgblack@eecs.umich.edu ndr->globalWgId = 0; 2205663Sgblack@eecs.umich.edu ndr->wg_disp_rem = true; 2215663Sgblack@eecs.umich.edu ndr->execDone = false; 2225663Sgblack@eecs.umich.edu ndr->addrToNotify = (volatile bool*)curTask.addrToNotify; 2235663Sgblack@eecs.umich.edu ndr->numDispLeft = (volatile uint32_t*)curTask.numDispLeft; 2245663Sgblack@eecs.umich.edu ndr->dispatchId = nextId; 2255663Sgblack@eecs.umich.edu ndr->curCid = pkt->req->contextId(); 2265663Sgblack@eecs.umich.edu DPRINTF(GPUDisp, "launching kernel %d\n",nextId); 2275663Sgblack@eecs.umich.edu execIds.push(nextId); 2285663Sgblack@eecs.umich.edu ++nextId; 2295663Sgblack@eecs.umich.edu 2305663Sgblack@eecs.umich.edu dispatchActive = true; 2315663Sgblack@eecs.umich.edu 2325663Sgblack@eecs.umich.edu if (!tickEvent.scheduled()) { 2335661Sgblack@eecs.umich.edu schedule(&tickEvent, curTick() + shader->ticks(1)); 234 } 235 } else { 236 // populate current task struct 237 // first 64 bits are launch reg 238 offset -= 8; 239 assert(offset < sizeof(HsaQueueEntry)); 240 char *curTaskPtr = (char*)&curTask; 241 memcpy(curTaskPtr + offset, pkt->getPtr<const void*>(), pkt->getSize()); 242 } 243 244 pkt->makeAtomicResponse(); 245 246 return pioDelay; 247} 248 249 250BaseMasterPort& 251GpuDispatcher::getMasterPort(const std::string &if_name, PortID idx) 252{ 253 if (if_name == "translation_port") { 254 return *tlbPort; 255 } 256 257 return DmaDevice::getMasterPort(if_name, idx); 258} 259 260void 261GpuDispatcher::exec() 262{ 263 int fail_count = 0; 264 265 // There are potentially multiple outstanding kernel launches. 266 // It is possible that the workgroups in a different kernel 267 // can fit on the GPU even if another kernel's workgroups cannot 268 DPRINTF(GPUDisp, "Launching %d Kernels\n", execIds.size()); 269 270 while (execIds.size() > fail_count) { 271 int execId = execIds.front(); 272 273 while (ndRangeMap[execId].wg_disp_rem) { 274 //update the thread context 275 shader->updateContext(ndRangeMap[execId].curCid); 276 277 // attempt to dispatch_workgroup 278 if (!shader->dispatch_workgroups(&ndRangeMap[execId])) { 279 // if we failed try the next kernel, 280 // it may have smaller workgroups. 281 // put it on the queue to rety latter 282 DPRINTF(GPUDisp, "kernel %d failed to launch\n", execId); 283 execIds.push(execId); 284 ++fail_count; 285 break; 286 } 287 } 288 // let's try the next kernel_id 289 execIds.pop(); 290 } 291 292 DPRINTF(GPUDisp, "Returning %d Kernels\n", doneIds.size()); 293 294 if (doneIds.size() && cpu) { 295 shader->hostWakeUp(cpu); 296 } 297 298 while (doneIds.size()) { 299 // wakeup the CPU if any Kernels completed this cycle 300 DPRINTF(GPUDisp, "WorkGroup %d completed\n", doneIds.front()); 301 doneIds.pop(); 302 } 303} 304 305void 306GpuDispatcher::notifyWgCompl(Wavefront *w) 307{ 308 int kern_id = w->kern_id; 309 DPRINTF(GPUDisp, "notify WgCompl %d\n",kern_id); 310 assert(ndRangeMap[kern_id].dispatchId == kern_id); 311 ndRangeMap[kern_id].numWgCompleted++; 312 313 if (ndRangeMap[kern_id].numWgCompleted == ndRangeMap[kern_id].numWgTotal) { 314 ndRangeMap[kern_id].execDone = true; 315 doneIds.push(kern_id); 316 317 if (ndRangeMap[kern_id].addrToNotify) { 318 accessUserVar(cpu, (uint64_t)(ndRangeMap[kern_id].addrToNotify), 1, 319 0); 320 } 321 322 accessUserVar(cpu, (uint64_t)(ndRangeMap[kern_id].numDispLeft), 0, -1); 323 324 // update event end time (in nano-seconds) 325 if (ndRangeMap[kern_id].q.depends) { 326 HostState *host_state = (HostState*)ndRangeMap[kern_id].q.depends; 327 uint64_t event; 328 shader->ReadMem((uint64_t)(&host_state->event), &event, 329 sizeof(uint64_t), 0); 330 331 uint64_t end = curTick() / 1000; 332 333 shader->WriteMem((uint64_t)(&((_cl_event*)event)->end), &end, 334 sizeof(uint64_t), 0); 335 } 336 } 337 338 if (!tickEvent.scheduled()) { 339 schedule(&tickEvent, curTick() + shader->ticks(1)); 340 } 341} 342 343void 344GpuDispatcher::scheduleDispatch() 345{ 346 if (!tickEvent.scheduled()) 347 schedule(&tickEvent, curTick() + shader->ticks(1)); 348} 349 350void 351GpuDispatcher::accessUserVar(BaseCPU *cpu, uint64_t addr, int val, int off) 352{ 353 if (cpu) { 354 if (off) { 355 shader->AccessMem(addr, &val, sizeof(int), 0, MemCmd::ReadReq, 356 true); 357 val += off; 358 } 359 360 shader->AccessMem(addr, &val, sizeof(int), 0, MemCmd::WriteReq, true); 361 } else { 362 panic("Cannot find host"); 363 } 364} 365 366GpuDispatcher::TickEvent::TickEvent(GpuDispatcher *_dispatcher) 367 : Event(CPU_Tick_Pri), dispatcher(_dispatcher) 368{ 369} 370 371void 372GpuDispatcher::TickEvent::process() 373{ 374 dispatcher->exec(); 375} 376 377const char* 378GpuDispatcher::TickEvent::description() const 379{ 380 return "GPU Dispatcher tick"; 381} 382 383// helper functions for driver to retrieve GPU attributes 384int 385GpuDispatcher::getNumCUs() 386{ 387 return shader->cuList.size(); 388} 389 390int 391GpuDispatcher::wfSize() const 392{ 393 return shader->cuList[0]->wfSize(); 394} 395 396void 397GpuDispatcher::setFuncargsSize(int funcargs_size) 398{ 399 shader->funcargs_size = funcargs_size; 400} 401