dispatcher.cc revision 11435
14661Sksewell@umich.edu/*
24661Sksewell@umich.edu * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
35268Sksewell@umich.edu * All rights reserved.
45268Sksewell@umich.edu *
55268Sksewell@umich.edu * For use for simulation and test purposes only
65268Sksewell@umich.edu *
75268Sksewell@umich.edu * Redistribution and use in source and binary forms, with or without
85268Sksewell@umich.edu * modification, are permitted provided that the following conditions are met:
95268Sksewell@umich.edu *
105268Sksewell@umich.edu * 1. Redistributions of source code must retain the above copyright notice,
115268Sksewell@umich.edu * this list of conditions and the following disclaimer.
125268Sksewell@umich.edu *
135268Sksewell@umich.edu * 2. Redistributions in binary form must reproduce the above copyright notice,
145268Sksewell@umich.edu * this list of conditions and the following disclaimer in the documentation
155268Sksewell@umich.edu * and/or other materials provided with the distribution.
165268Sksewell@umich.edu *
175268Sksewell@umich.edu * 3. Neither the name of the copyright holder nor the names of its contributors
185268Sksewell@umich.edu * may be used to endorse or promote products derived from this software
195268Sksewell@umich.edu * without specific prior written permission.
205268Sksewell@umich.edu *
215268Sksewell@umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
225268Sksewell@umich.edu * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
235268Sksewell@umich.edu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
245268Sksewell@umich.edu * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
255268Sksewell@umich.edu * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
265268Sksewell@umich.edu * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
275268Sksewell@umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
285268Sksewell@umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
295268Sksewell@umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
305268Sksewell@umich.edu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
314661Sksewell@umich.edu * POSSIBILITY OF SUCH DAMAGE.
324661Sksewell@umich.edu *
334661Sksewell@umich.edu * Author: Brad Beckmann, Marc Orr
344661Sksewell@umich.edu */
354661Sksewell@umich.edu
364661Sksewell@umich.edu
374661Sksewell@umich.edu#include "gpu-compute/dispatcher.hh"
384661Sksewell@umich.edu
394661Sksewell@umich.edu#include "cpu/base.hh"
404661Sksewell@umich.edu#include "debug/GPUDisp.hh"
414661Sksewell@umich.edu#include "gpu-compute/cl_driver.hh"
424661Sksewell@umich.edu#include "gpu-compute/cl_event.hh"
434661Sksewell@umich.edu#include "gpu-compute/shader.hh"
444661Sksewell@umich.edu#include "gpu-compute/wavefront.hh"
454661Sksewell@umich.edu#include "mem/packet_access.hh"
464661Sksewell@umich.edu
474661Sksewell@umich.eduGpuDispatcher *GpuDispatcher::instance = nullptr;
484661Sksewell@umich.edu
494661Sksewell@umich.eduGpuDispatcher::GpuDispatcher(const Params *p)
504661Sksewell@umich.edu    : DmaDevice(p), _masterId(p->system->getMasterId(name() + ".disp")),
514661Sksewell@umich.edu      pioAddr(p->pio_addr), pioSize(4096), pioDelay(p->pio_latency),
524661Sksewell@umich.edu      dispatchCount(0), dispatchActive(false), cpu(p->cpu),
534661Sksewell@umich.edu      shader(p->shader_pointer), driver(p->cl_driver), tickEvent(this)
544661Sksewell@umich.edu{
554661Sksewell@umich.edu    shader->handshake(this);
564661Sksewell@umich.edu    driver->handshake(this);
574661Sksewell@umich.edu
584661Sksewell@umich.edu    ndRange.wg_disp_rem = false;
594661Sksewell@umich.edu    ndRange.globalWgId = 0;
604661Sksewell@umich.edu
614661Sksewell@umich.edu    schedule(&tickEvent, 0);
624661Sksewell@umich.edu
634661Sksewell@umich.edu    // translation port for the dispatcher
644661Sksewell@umich.edu    tlbPort = new TLBPort(csprintf("%s-port%d", name()), this);
654661Sksewell@umich.edu
664661Sksewell@umich.edu    num_kernelLaunched
6710196SCurtis.Dunham@arm.com    .name(name() + ".num_kernel_launched")
684661Sksewell@umich.edu    .desc("number of kernel launched")
694661Sksewell@umich.edu    ;
704661Sksewell@umich.edu}
714661Sksewell@umich.edu
724661Sksewell@umich.eduGpuDispatcher *GpuDispatcherParams::create()
734661Sksewell@umich.edu{
744661Sksewell@umich.edu    GpuDispatcher *dispatcher = new GpuDispatcher(this);
754661Sksewell@umich.edu    GpuDispatcher::setInstance(dispatcher);
764661Sksewell@umich.edu
774661Sksewell@umich.edu    return GpuDispatcher::getInstance();
784661Sksewell@umich.edu}
794661Sksewell@umich.edu
804661Sksewell@umich.eduvoid
814661Sksewell@umich.eduGpuDispatcher::serialize(CheckpointOut &cp) const
8210474Sandreas.hansson@arm.com{
834661Sksewell@umich.edu    Tick event_tick = 0;
844661Sksewell@umich.edu
854661Sksewell@umich.edu    if (ndRange.wg_disp_rem)
864661Sksewell@umich.edu        fatal("Checkpointing not supported during active workgroup execution");
8710474Sandreas.hansson@arm.com
884661Sksewell@umich.edu    if (tickEvent.scheduled())
894661Sksewell@umich.edu        event_tick = tickEvent.when();
904661Sksewell@umich.edu
914661Sksewell@umich.edu    SERIALIZE_SCALAR(event_tick);
924661Sksewell@umich.edu
934661Sksewell@umich.edu}
944661Sksewell@umich.edu
954661Sksewell@umich.eduvoid
964661Sksewell@umich.eduGpuDispatcher::unserialize(CheckpointIn &cp)
974661Sksewell@umich.edu{
984661Sksewell@umich.edu    Tick event_tick;
994661Sksewell@umich.edu
10010196SCurtis.Dunham@arm.com    if (tickEvent.scheduled())
1014661Sksewell@umich.edu        deschedule(&tickEvent);
1024661Sksewell@umich.edu
1034661Sksewell@umich.edu    UNSERIALIZE_SCALAR(event_tick);
1044661Sksewell@umich.edu
1054661Sksewell@umich.edu    if (event_tick)
1064661Sksewell@umich.edu        schedule(&tickEvent, event_tick);
1074661Sksewell@umich.edu}
1084661Sksewell@umich.edu
1094661Sksewell@umich.eduAddrRangeList
1104661Sksewell@umich.eduGpuDispatcher::getAddrRanges() const
1114661Sksewell@umich.edu{
1124661Sksewell@umich.edu    AddrRangeList ranges;
1134661Sksewell@umich.edu
1144661Sksewell@umich.edu    DPRINTF(GPUDisp, "dispatcher registering addr range at %#x size %#x\n",
11510474Sandreas.hansson@arm.com            pioAddr, pioSize);
1164661Sksewell@umich.edu
1174661Sksewell@umich.edu    ranges.push_back(RangeSize(pioAddr, pioSize));
1184661Sksewell@umich.edu
1194661Sksewell@umich.edu    return ranges;
12010474Sandreas.hansson@arm.com}
1214661Sksewell@umich.edu
1224661Sksewell@umich.eduTick
1234661Sksewell@umich.eduGpuDispatcher::read(PacketPtr pkt)
1244661Sksewell@umich.edu{
1254661Sksewell@umich.edu    assert(pkt->getAddr() >= pioAddr);
1264661Sksewell@umich.edu    assert(pkt->getAddr() < pioAddr + pioSize);
1274661Sksewell@umich.edu
1284661Sksewell@umich.edu    int offset = pkt->getAddr() - pioAddr;
1294661Sksewell@umich.edu    pkt->allocate();
1304661Sksewell@umich.edu
1314661Sksewell@umich.edu    DPRINTF(GPUDisp, " read register %#x size=%d\n", offset, pkt->getSize());
1324661Sksewell@umich.edu
1334661Sksewell@umich.edu    if (offset < 8) {
1344661Sksewell@umich.edu        assert(!offset);
1354661Sksewell@umich.edu        assert(pkt->getSize() == 8);
1364661Sksewell@umich.edu
1374661Sksewell@umich.edu        uint64_t retval = dispatchActive;
1389554Sandreas.hansson@arm.com        pkt->set(retval);
1399554Sandreas.hansson@arm.com    } else {
1409554Sandreas.hansson@arm.com        offset -= 8;
1419554Sandreas.hansson@arm.com        assert(offset + pkt->getSize() < sizeof(HsaQueueEntry));
1429554Sandreas.hansson@arm.com        char *curTaskPtr = (char*)&curTask;
1439554Sandreas.hansson@arm.com
1444661Sksewell@umich.edu        memcpy(pkt->getPtr<const void*>(), curTaskPtr + offset, pkt->getSize());
1454661Sksewell@umich.edu    }
1464661Sksewell@umich.edu
1474661Sksewell@umich.edu    pkt->makeAtomicResponse();
1484661Sksewell@umich.edu
1498564Sgblack@eecs.umich.edu    return pioDelay;
15010196SCurtis.Dunham@arm.com}
1514661Sksewell@umich.edu
1528738Sgblack@eecs.umich.eduTick
1534661Sksewell@umich.eduGpuDispatcher::write(PacketPtr pkt)
1544661Sksewell@umich.edu{
1554661Sksewell@umich.edu    assert(pkt->getAddr() >= pioAddr);
1564661Sksewell@umich.edu    assert(pkt->getAddr() < pioAddr + pioSize);
1578564Sgblack@eecs.umich.edu
15810196SCurtis.Dunham@arm.com    int offset = pkt->getAddr() - pioAddr;
1594661Sksewell@umich.edu
1608738Sgblack@eecs.umich.edu#if TRACING_ON
1614661Sksewell@umich.edu    uint64_t data_val = 0;
1624661Sksewell@umich.edu
1634661Sksewell@umich.edu    switch (pkt->getSize()) {
1644661Sksewell@umich.edu      case 1:
1654661Sksewell@umich.edu        data_val = pkt->get<uint8_t>();
1664661Sksewell@umich.edu        break;
1674661Sksewell@umich.edu      case 2:
1684661Sksewell@umich.edu        data_val = pkt->get<uint16_t>();
1694661Sksewell@umich.edu        break;
1704661Sksewell@umich.edu      case 4:
1714661Sksewell@umich.edu        data_val = pkt->get<uint32_t>();
1724661Sksewell@umich.edu        break;
1734661Sksewell@umich.edu      case 8:
1744661Sksewell@umich.edu        data_val = pkt->get<uint64_t>();
1754661Sksewell@umich.edu        break;
1764661Sksewell@umich.edu      default:
1775222Sksewell@umich.edu        DPRINTF(GPUDisp, "bad size %d\n", pkt->getSize());
1785222Sksewell@umich.edu    }
1794661Sksewell@umich.edu
1804661Sksewell@umich.edu    DPRINTF(GPUDisp, "write register %#x value %#x size=%d\n", offset, data_val,
1814661Sksewell@umich.edu            pkt->getSize());
1824661Sksewell@umich.edu#endif
1834661Sksewell@umich.edu    if (!offset) {
1844661Sksewell@umich.edu        static int nextId = 0;
1854661Sksewell@umich.edu
1864661Sksewell@umich.edu        // The depends field of the qstruct, which was previously unused, is
1874661Sksewell@umich.edu        // used to communicate with simulated application.
1884661Sksewell@umich.edu        if (curTask.depends) {
1894661Sksewell@umich.edu            HostState hs;
1904661Sksewell@umich.edu            shader->ReadMem((uint64_t)(curTask.depends), &hs,
1914661Sksewell@umich.edu                            sizeof(HostState), 0);
1924661Sksewell@umich.edu
1934661Sksewell@umich.edu            // update event start time (in nano-seconds)
1944661Sksewell@umich.edu            uint64_t start = curTick() / 1000;
1954661Sksewell@umich.edu
1964661Sksewell@umich.edu            shader->WriteMem((uint64_t)(&((_cl_event*)hs.event)->start),
1974661Sksewell@umich.edu                             &start, sizeof(uint64_t), 0);
1984661Sksewell@umich.edu        }
1994661Sksewell@umich.edu
2004661Sksewell@umich.edu        // launch kernel
2014661Sksewell@umich.edu        ++num_kernelLaunched;
2024661Sksewell@umich.edu
2034661Sksewell@umich.edu        NDRange *ndr = &(ndRangeMap[nextId]);
2044661Sksewell@umich.edu        // copy dispatch info
2054661Sksewell@umich.edu        ndr->q = curTask;
2064661Sksewell@umich.edu
2074661Sksewell@umich.edu        // update the numDispTask polled by the runtime
2085222Sksewell@umich.edu        accessUserVar(cpu, (uint64_t)(curTask.numDispLeft), 0, 1);
2095222Sksewell@umich.edu
2104661Sksewell@umich.edu        ndr->numWgTotal = 1;
2114661Sksewell@umich.edu
2124661Sksewell@umich.edu        for (int i = 0; i < 3; ++i) {
2134661Sksewell@umich.edu            ndr->wgId[i] = 0;
2144661Sksewell@umich.edu            ndr->numWg[i] = divCeil(curTask.gdSize[i], curTask.wgSize[i]);
2154661Sksewell@umich.edu            ndr->numWgTotal *= ndr->numWg[i];
2164661Sksewell@umich.edu        }
2174661Sksewell@umich.edu
2184661Sksewell@umich.edu        ndr->numWgCompleted = 0;
2194661Sksewell@umich.edu        ndr->globalWgId = 0;
220        ndr->wg_disp_rem = true;
221        ndr->execDone = false;
222        ndr->addrToNotify = (volatile bool*)curTask.addrToNotify;
223        ndr->numDispLeft = (volatile uint32_t*)curTask.numDispLeft;
224        ndr->dispatchId = nextId;
225        ndr->curCid = pkt->req->contextId();
226        DPRINTF(GPUDisp, "launching kernel %d\n",nextId);
227        execIds.push(nextId);
228        ++nextId;
229
230        dispatchActive = true;
231
232        if (!tickEvent.scheduled()) {
233            schedule(&tickEvent, curTick() + shader->ticks(1));
234        }
235    } else {
236        // populate current task struct
237        // first 64 bits are launch reg
238        offset -= 8;
239        assert(offset < sizeof(HsaQueueEntry));
240        char *curTaskPtr = (char*)&curTask;
241        memcpy(curTaskPtr + offset, pkt->getPtr<const void*>(), pkt->getSize());
242    }
243
244    pkt->makeAtomicResponse();
245
246    return pioDelay;
247}
248
249
250BaseMasterPort&
251GpuDispatcher::getMasterPort(const std::string &if_name, PortID idx)
252{
253    if (if_name == "translation_port") {
254        return *tlbPort;
255    }
256
257    return DmaDevice::getMasterPort(if_name, idx);
258}
259
260void
261GpuDispatcher::exec()
262{
263    int fail_count = 0;
264
265    // There are potentially multiple outstanding kernel launches.
266    // It is possible that the workgroups in a different kernel
267    // can fit on the GPU even if another kernel's workgroups cannot
268    DPRINTF(GPUDisp, "Launching %d Kernels\n", execIds.size());
269
270    while (execIds.size() > fail_count) {
271        int execId = execIds.front();
272
273        while (ndRangeMap[execId].wg_disp_rem) {
274            //update the thread context
275            shader->updateContext(ndRangeMap[execId].curCid);
276
277            // attempt to dispatch_workgroup
278            if (!shader->dispatch_workgroups(&ndRangeMap[execId])) {
279                // if we failed try the next kernel,
280                // it may have smaller workgroups.
281                // put it on the queue to rety latter
282                DPRINTF(GPUDisp, "kernel %d failed to launch\n", execId);
283                execIds.push(execId);
284                ++fail_count;
285                break;
286            }
287        }
288        // let's try the next kernel_id
289        execIds.pop();
290    }
291
292    DPRINTF(GPUDisp, "Returning %d Kernels\n", doneIds.size());
293
294    if (doneIds.size() && cpu) {
295        shader->hostWakeUp(cpu);
296    }
297
298    while (doneIds.size()) {
299        // wakeup the CPU if any Kernels completed this cycle
300        DPRINTF(GPUDisp, "WorkGroup %d completed\n", doneIds.front());
301        doneIds.pop();
302    }
303}
304
305void
306GpuDispatcher::notifyWgCompl(Wavefront *w)
307{
308    int kern_id = w->kern_id;
309    DPRINTF(GPUDisp, "notify WgCompl %d\n",kern_id);
310    assert(ndRangeMap[kern_id].dispatchId == kern_id);
311    ndRangeMap[kern_id].numWgCompleted++;
312
313    if (ndRangeMap[kern_id].numWgCompleted == ndRangeMap[kern_id].numWgTotal) {
314        ndRangeMap[kern_id].execDone = true;
315        doneIds.push(kern_id);
316
317        if (ndRangeMap[kern_id].addrToNotify) {
318            accessUserVar(cpu, (uint64_t)(ndRangeMap[kern_id].addrToNotify), 1,
319                          0);
320        }
321
322        accessUserVar(cpu, (uint64_t)(ndRangeMap[kern_id].numDispLeft), 0, -1);
323
324        // update event end time (in nano-seconds)
325        if (ndRangeMap[kern_id].q.depends) {
326            HostState *host_state = (HostState*)ndRangeMap[kern_id].q.depends;
327            uint64_t event;
328            shader->ReadMem((uint64_t)(&host_state->event), &event,
329                            sizeof(uint64_t), 0);
330
331            uint64_t end = curTick() / 1000;
332
333            shader->WriteMem((uint64_t)(&((_cl_event*)event)->end), &end,
334                             sizeof(uint64_t), 0);
335        }
336    }
337
338    if (!tickEvent.scheduled()) {
339        schedule(&tickEvent, curTick() + shader->ticks(1));
340    }
341}
342
343void
344GpuDispatcher::scheduleDispatch()
345{
346    if (!tickEvent.scheduled())
347        schedule(&tickEvent, curTick() + shader->ticks(1));
348}
349
350void
351GpuDispatcher::accessUserVar(BaseCPU *cpu, uint64_t addr, int val, int off)
352{
353    if (cpu) {
354        if (off) {
355            shader->AccessMem(addr, &val, sizeof(int), 0, MemCmd::ReadReq,
356                              true);
357            val += off;
358        }
359
360        shader->AccessMem(addr, &val, sizeof(int), 0, MemCmd::WriteReq, true);
361    } else {
362        panic("Cannot find host");
363    }
364}
365
366GpuDispatcher::TickEvent::TickEvent(GpuDispatcher *_dispatcher)
367    : Event(CPU_Tick_Pri), dispatcher(_dispatcher)
368{
369}
370
371void
372GpuDispatcher::TickEvent::process()
373{
374    dispatcher->exec();
375}
376
377const char*
378GpuDispatcher::TickEvent::description() const
379{
380    return "GPU Dispatcher tick";
381}
382
383// helper functions for driver to retrieve GPU attributes
384int
385GpuDispatcher::getNumCUs()
386{
387    return shader->cuList.size();
388}
389
390void
391GpuDispatcher::setFuncargsSize(int funcargs_size)
392{
393    shader->funcargs_size = funcargs_size;
394}
395