cl_driver.cc revision 11851
1545SN/A/*
213930Sgiacomo.travaglini@arm.com * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
38948SN/A * All rights reserved.
48948SN/A *
58948SN/A * For use for simulation and test purposes only
68948SN/A *
78948SN/A * Redistribution and use in source and binary forms, with or without
88948SN/A * modification, are permitted provided that the following conditions are met:
98948SN/A *
108948SN/A * 1. Redistributions of source code must retain the above copyright notice,
118948SN/A * this list of conditions and the following disclaimer.
128948SN/A *
138948SN/A * 2. Redistributions in binary form must reproduce the above copyright notice,
141762SN/A * this list of conditions and the following disclaimer in the documentation
15545SN/A * and/or other materials provided with the distribution.
16545SN/A *
17545SN/A * 3. Neither the name of the copyright holder nor the names of its contributors
18545SN/A * may be used to endorse or promote products derived from this software
19545SN/A * without specific prior written permission.
20545SN/A *
21545SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22545SN/A * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23545SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24545SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25545SN/A * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26545SN/A * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27545SN/A * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28545SN/A * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29545SN/A * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30545SN/A * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31545SN/A * POSSIBILITY OF SUCH DAMAGE.
32545SN/A *
33545SN/A * Author: Anthony Gutierrez
34545SN/A */
35545SN/A
36545SN/A#include "gpu-compute/cl_driver.hh"
37545SN/A
38545SN/A#include "base/intmath.hh"
392665SN/A#include "cpu/thread_context.hh"
402665SN/A#include "gpu-compute/dispatcher.hh"
412665SN/A#include "gpu-compute/hsa_code.hh"
4211010Sandreas.sandberg@arm.com#include "gpu-compute/hsa_kernel_info.hh"
43545SN/A#include "gpu-compute/hsa_object.hh"
44545SN/A#include "params/ClDriver.hh"
459016Sandreas.hansson@arm.com#include "sim/process.hh"
469016Sandreas.hansson@arm.com#include "sim/syscall_emul_buf.hh"
47545SN/A
489166Sandreas.hansson@arm.comClDriver::ClDriver(ClDriverParams *p)
4911010Sandreas.sandberg@arm.com    : EmulatedDriver(p), hsaCode(0)
509166Sandreas.hansson@arm.com{
5111010Sandreas.sandberg@arm.com    for (const auto &codeFile : p->codefile)
529016Sandreas.hansson@arm.com        codeFiles.push_back(&codeFile);
534762SN/A
549342SAndreas.Sandberg@arm.com    maxFuncArgsSize = 0;
559814Sandreas.hansson@arm.com
562565SN/A    for (int i = 0; i < codeFiles.size(); ++i) {
5713892Sgabeblack@google.com        HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]);
5813892Sgabeblack@google.com
5910912Sandreas.sandberg@arm.com        for (int k = 0; k < obj->numKernels(); ++k) {
602384SN/A            assert(obj->getKernel(k));
619307Sandreas.hansson@arm.com            kernels.push_back(obj->getKernel(k));
622784SN/A            kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData);
639307Sandreas.hansson@arm.com            int kern_funcargs_size = kernels.back()->funcarg_size;
649307Sandreas.hansson@arm.com            maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ?
659307Sandreas.hansson@arm.com                kern_funcargs_size : maxFuncArgsSize;
669307Sandreas.hansson@arm.com        }
679307Sandreas.hansson@arm.com    }
689307Sandreas.hansson@arm.com
699307Sandreas.hansson@arm.com    int name_offs = 0;
702784SN/A    int code_offs = 0;
719307Sandreas.hansson@arm.com
729307Sandreas.hansson@arm.com    for (int i = 0; i < kernels.size(); ++i) {
739307Sandreas.hansson@arm.com        kernelInfo.push_back(HsaKernelInfo());
749307Sandreas.hansson@arm.com        HsaCode *k = kernels[i];
759307Sandreas.hansson@arm.com
769307Sandreas.hansson@arm.com        k->generateHsaKernelInfo(&kernelInfo[i]);
779307Sandreas.hansson@arm.com
789307Sandreas.hansson@arm.com        kernelInfo[i].name_offs = name_offs;
794435SN/A        kernelInfo[i].code_offs = code_offs;
809166Sandreas.hansson@arm.com
819166Sandreas.hansson@arm.com        name_offs += k->name().size() + 1;
829166Sandreas.hansson@arm.com        code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
839166Sandreas.hansson@arm.com    }
849166Sandreas.hansson@arm.com}
859166Sandreas.hansson@arm.com
869166Sandreas.hansson@arm.comvoid
879166Sandreas.hansson@arm.comClDriver::handshake(GpuDispatcher *_dispatcher)
889166Sandreas.hansson@arm.com{
899166Sandreas.hansson@arm.com    dispatcher = _dispatcher;
909166Sandreas.hansson@arm.com    dispatcher->setFuncargsSize(maxFuncArgsSize);
918948SN/A}
929307Sandreas.hansson@arm.com
939307Sandreas.hansson@arm.comint
949307Sandreas.hansson@arm.comClDriver::open(Process *p, ThreadContext *tc, int mode, int flags)
959307Sandreas.hansson@arm.com{
969307Sandreas.hansson@arm.com    int fd = p->allocFD(-1, filename, 0, 0, false);
979307Sandreas.hansson@arm.com    FDEntry *fde = p->getFDEntry(fd);
989307Sandreas.hansson@arm.com    fde->driver = this;
999307Sandreas.hansson@arm.com
1009307Sandreas.hansson@arm.com    return fd;
1019307Sandreas.hansson@arm.com}
1029307Sandreas.hansson@arm.com
1039307Sandreas.hansson@arm.comint
1049307Sandreas.hansson@arm.comClDriver::ioctl(Process *process, ThreadContext *tc, unsigned req)
1059307Sandreas.hansson@arm.com{
1069307Sandreas.hansson@arm.com    int index = 2;
1079307Sandreas.hansson@arm.com    Addr buf_addr = process->getSyscallArg(tc, index);
1089307Sandreas.hansson@arm.com
1099307Sandreas.hansson@arm.com    switch (req) {
1109307Sandreas.hansson@arm.com      case HSA_GET_SIZES:
1119307Sandreas.hansson@arm.com        {
11211010Sandreas.sandberg@arm.com            TypedBufferArg<HsaDriverSizes> sizes(buf_addr);
1139307Sandreas.hansson@arm.com            sizes->num_kernels = kernels.size();
11413892Sgabeblack@google.com            sizes->string_table_size = 0;
1159307Sandreas.hansson@arm.com            sizes->code_size = 0;
11611010Sandreas.sandberg@arm.com            sizes->readonly_size = 0;
11711010Sandreas.sandberg@arm.com
11811010Sandreas.sandberg@arm.com            if (kernels.size() > 0) {
11911010Sandreas.sandberg@arm.com                // all kernels will share the same read-only memory
12011010Sandreas.sandberg@arm.com                sizes->readonly_size =
12111010Sandreas.sandberg@arm.com                    kernels[0]->getSize(HsaCode::MemorySegment::READONLY);
12211010Sandreas.sandberg@arm.com                // check our assumption
12311010Sandreas.sandberg@arm.com                for (int i = 1; i<kernels.size(); ++i) {
1249307Sandreas.hansson@arm.com                    assert(sizes->readonly_size ==
1259307Sandreas.hansson@arm.com                    kernels[i]->getSize(HsaCode::MemorySegment::READONLY));
1269307Sandreas.hansson@arm.com                }
1279307Sandreas.hansson@arm.com            }
12812087Sspwilson2@wisc.edu
1299307Sandreas.hansson@arm.com            for (int i = 0; i < kernels.size(); ++i) {
1309307Sandreas.hansson@arm.com                HsaCode *k = kernels[i];
1319307Sandreas.hansson@arm.com                // add one for terminating '\0'
1329307Sandreas.hansson@arm.com                sizes->string_table_size += k->name().size() + 1;
1339307Sandreas.hansson@arm.com                sizes->code_size +=
1349307Sandreas.hansson@arm.com                    k->numInsts() * sizeof(TheGpuISA::RawMachInst);
1359307Sandreas.hansson@arm.com            }
1369307Sandreas.hansson@arm.com
13713930Sgiacomo.travaglini@arm.com            sizes.copyOut(tc->getMemProxy());
13813930Sgiacomo.travaglini@arm.com        }
13913930Sgiacomo.travaglini@arm.com        break;
14013930Sgiacomo.travaglini@arm.com
14113930Sgiacomo.travaglini@arm.com      case HSA_GET_KINFO:
14213930Sgiacomo.travaglini@arm.com        {
1439307Sandreas.hansson@arm.com            TypedBufferArg<HsaKernelInfo>
1449307Sandreas.hansson@arm.com                kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size());
14511169Sandreas.hansson@arm.com
14611169Sandreas.hansson@arm.com            for (int i = 0; i < kernels.size(); ++i) {
1472384SN/A                HsaKernelInfo *ki = &kinfo[i];
1489166Sandreas.hansson@arm.com                ki->name_offs = kernelInfo[i].name_offs;
1494435SN/A                ki->code_offs = kernelInfo[i].code_offs;
1509165Sandreas.hansson@arm.com                ki->sRegCount = kernelInfo[i].sRegCount;
1512489SN/A                ki->dRegCount = kernelInfo[i].dRegCount;
15213930Sgiacomo.travaglini@arm.com                ki->cRegCount = kernelInfo[i].cRegCount;
15313930Sgiacomo.travaglini@arm.com                ki->static_lds_size  = kernelInfo[i].static_lds_size;
1542565SN/A                ki->private_mem_size = kernelInfo[i].private_mem_size;
15513930Sgiacomo.travaglini@arm.com                ki->spill_mem_size   = kernelInfo[i].spill_mem_size;
15613930Sgiacomo.travaglini@arm.com            }
15713930Sgiacomo.travaglini@arm.com
15813930Sgiacomo.travaglini@arm.com            kinfo.copyOut(tc->getMemProxy());
15913930Sgiacomo.travaglini@arm.com        }
16013930Sgiacomo.travaglini@arm.com        break;
16113930Sgiacomo.travaglini@arm.com
16213930Sgiacomo.travaglini@arm.com      case HSA_GET_STRINGS:
1632565SN/A        {
1649166Sandreas.hansson@arm.com            int string_table_size = 0;
1652384SN/A            for (int i = 0; i < kernels.size(); ++i) {
16611168Sandreas.hansson@arm.com                HsaCode *k = kernels[i];
1672384SN/A                string_table_size += k->name().size() + 1;
1682384SN/A            }
169545SN/A
170545SN/A            BufferArg buf(buf_addr, string_table_size);
1714435SN/A            char *bufp = (char*)buf.bufferPtr();
1728851SN/A
173545SN/A            for (int i = 0; i < kernels.size(); ++i) {
174545SN/A                HsaCode *k = kernels[i];
1754762SN/A                const char *n = k->name().c_str();
1764762SN/A
1779166Sandreas.hansson@arm.com                // idiomatic string copy
1784762SN/A                while ((*bufp++ = *n++));
1798851SN/A            }
18013930Sgiacomo.travaglini@arm.com
18113930Sgiacomo.travaglini@arm.com            assert(bufp - (char *)buf.bufferPtr() == string_table_size);
18213930Sgiacomo.travaglini@arm.com
18313930Sgiacomo.travaglini@arm.com            buf.copyOut(tc->getMemProxy());
18413930Sgiacomo.travaglini@arm.com        }
18513930Sgiacomo.travaglini@arm.com        break;
18613930Sgiacomo.travaglini@arm.com
1878851SN/A      case HSA_GET_READONLY_DATA:
1884022SN/A        {
1898851SN/A            // we can pick any kernel --- they share the same
1904022SN/A            // readonly segment (this assumption is checked in GET_SIZES)
1912565SN/A            uint64_t size =
1928851SN/A                kernels.back()->getSize(HsaCode::MemorySegment::READONLY);
19313930Sgiacomo.travaglini@arm.com            BufferArg data(buf_addr, size);
19413930Sgiacomo.travaglini@arm.com            char *datap = (char *)data.bufferPtr();
19513930Sgiacomo.travaglini@arm.com            memcpy(datap,
19613930Sgiacomo.travaglini@arm.com                   kernels.back()->readonly_data,
19713930Sgiacomo.travaglini@arm.com                   size);
19813930Sgiacomo.travaglini@arm.com            data.copyOut(tc->getMemProxy());
19913930Sgiacomo.travaglini@arm.com        }
2008851SN/A        break;
2014263SN/A
2028851SN/A      case HSA_GET_CODE:
2034263SN/A        {
2042565SN/A            // set hsaCode pointer
2059307Sandreas.hansson@arm.com            hsaCode = buf_addr;
2068851SN/A            int code_size = 0;
20711169Sandreas.hansson@arm.com
2082565SN/A            for (int i = 0; i < kernels.size(); ++i) {
2099814Sandreas.hansson@arm.com                HsaCode *k = kernels[i];
2104263SN/A                code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
21113784Sgabeblack@google.com            }
21213784Sgabeblack@google.com
2132489SN/A            TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size);
214545SN/A            TheGpuISA::RawMachInst *bufp = buf;
215545SN/A
21611010Sandreas.sandberg@arm.com            int buf_idx = 0;
21711625Smichael.lebeane@amd.com
21811625Smichael.lebeane@amd.com            for (int i = 0; i < kernels.size(); ++i) {
21911625Smichael.lebeane@amd.com                HsaCode *k = kernels[i];
22011625Smichael.lebeane@amd.com
22111625Smichael.lebeane@amd.com                for (int j = 0; j < k->numInsts(); ++j) {
22211625Smichael.lebeane@amd.com                    bufp[buf_idx] = k->insts()->at(j);
22311625Smichael.lebeane@amd.com                    ++buf_idx;
22411625Smichael.lebeane@amd.com                }
22511625Smichael.lebeane@amd.com            }
22611625Smichael.lebeane@amd.com
22711625Smichael.lebeane@amd.com            buf.copyOut(tc->getMemProxy());
22811625Smichael.lebeane@amd.com        }
22911625Smichael.lebeane@amd.com        break;
23011625Smichael.lebeane@amd.com
23111625Smichael.lebeane@amd.com      case HSA_GET_CU_CNT:
23211625Smichael.lebeane@amd.com        {
23311625Smichael.lebeane@amd.com            BufferArg buf(buf_addr, sizeof(uint32_t));
23411625Smichael.lebeane@amd.com            *((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs();
23511625Smichael.lebeane@amd.com            buf.copyOut(tc->getMemProxy());
23611625Smichael.lebeane@amd.com        }
23711625Smichael.lebeane@amd.com        break;
23811625Smichael.lebeane@amd.com
23911625Smichael.lebeane@amd.com      case HSA_GET_VSZ:
24011625Smichael.lebeane@amd.com        {
24111625Smichael.lebeane@amd.com            BufferArg buf(buf_addr, sizeof(uint32_t));
24211625Smichael.lebeane@amd.com            *((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize();
24311625Smichael.lebeane@amd.com            buf.copyOut(tc->getMemProxy());
24411625Smichael.lebeane@amd.com        }
24511625Smichael.lebeane@amd.com        break;
24611625Smichael.lebeane@amd.com      case HSA_GET_HW_STATIC_CONTEXT_SIZE:
24711625Smichael.lebeane@amd.com        {
24811625Smichael.lebeane@amd.com            BufferArg buf(buf_addr, sizeof(uint32_t));
24911625Smichael.lebeane@amd.com            *((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize();
25011625Smichael.lebeane@amd.com            buf.copyOut(tc->getMemProxy());
25111625Smichael.lebeane@amd.com        }
25211625Smichael.lebeane@amd.com        break;
25311625Smichael.lebeane@amd.com
25411625Smichael.lebeane@amd.com      default:
25511625Smichael.lebeane@amd.com        fatal("ClDriver: bad ioctl %d\n", req);
25611625Smichael.lebeane@amd.com    }
25711625Smichael.lebeane@amd.com
25811625Smichael.lebeane@amd.com    return 0;
25911625Smichael.lebeane@amd.com}
26011625Smichael.lebeane@amd.com
26111625Smichael.lebeane@amd.comconst char*
26211625Smichael.lebeane@amd.comClDriver::codeOffToKernelName(uint64_t code_ptr)
26311625Smichael.lebeane@amd.com{
26411625Smichael.lebeane@amd.com    assert(hsaCode);
26511625Smichael.lebeane@amd.com    uint32_t code_offs = code_ptr - hsaCode;
26611625Smichael.lebeane@amd.com
26711625Smichael.lebeane@amd.com    for (int i = 0; i < kernels.size(); ++i) {
26811625Smichael.lebeane@amd.com        if (code_offs == kernelInfo[i].code_offs) {
26911625Smichael.lebeane@amd.com            return kernels[i]->name().c_str();
27011625Smichael.lebeane@amd.com        }
27111625Smichael.lebeane@amd.com    }
27211625Smichael.lebeane@amd.com
27311625Smichael.lebeane@amd.com    return nullptr;
27411625Smichael.lebeane@amd.com}
27511625Smichael.lebeane@amd.com
27611625Smichael.lebeane@amd.comClDriver*
27711625Smichael.lebeane@amd.comClDriverParams::create()
27811625Smichael.lebeane@amd.com{
27911625Smichael.lebeane@amd.com    return new ClDriver(this);
28011625Smichael.lebeane@amd.com}
28112131Sspwilson2@wisc.edu