1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Anthony Gutierrez
34 */
35
36#include "gpu-compute/cl_driver.hh"
37
38#include <memory>
39
40#include "base/intmath.hh"
41#include "cpu/thread_context.hh"
42#include "gpu-compute/dispatcher.hh"
43#include "gpu-compute/hsa_code.hh"
44#include "gpu-compute/hsa_kernel_info.hh"
45#include "gpu-compute/hsa_object.hh"
46#include "params/ClDriver.hh"
47#include "sim/process.hh"
48#include "sim/syscall_emul_buf.hh"
49
50ClDriver::ClDriver(ClDriverParams *p)
51    : EmulatedDriver(p), hsaCode(0)
52{
53    for (const auto &codeFile : p->codefile)
54        codeFiles.push_back(&codeFile);
55
56    maxFuncArgsSize = 0;
57
58    for (int i = 0; i < codeFiles.size(); ++i) {
59        HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]);
60
61        for (int k = 0; k < obj->numKernels(); ++k) {
62            assert(obj->getKernel(k));
63            kernels.push_back(obj->getKernel(k));
64            kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData);
65            int kern_funcargs_size = kernels.back()->funcarg_size;
66            maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ?
67                kern_funcargs_size : maxFuncArgsSize;
68        }
69    }
70
71    int name_offs = 0;
72    int code_offs = 0;
73
74    for (int i = 0; i < kernels.size(); ++i) {
75        kernelInfo.push_back(HsaKernelInfo());
76        HsaCode *k = kernels[i];
77
78        k->generateHsaKernelInfo(&kernelInfo[i]);
79
80        kernelInfo[i].name_offs = name_offs;
81        kernelInfo[i].code_offs = code_offs;
82
83        name_offs += k->name().size() + 1;
84        code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
85    }
86}
87
88void
89ClDriver::handshake(GpuDispatcher *_dispatcher)
90{
91    dispatcher = _dispatcher;
92    dispatcher->setFuncargsSize(maxFuncArgsSize);
93}
94
95int
96ClDriver::open(ThreadContext *tc, int mode, int flags)
97{
98    auto p = tc->getProcessPtr();
99    std::shared_ptr<DeviceFDEntry> fdp;
100    fdp = std::make_shared<DeviceFDEntry>(this, filename);
101    int tgt_fd = p->fds->allocFD(fdp);
102    return tgt_fd;
103}
104
105int
106ClDriver::ioctl(ThreadContext *tc, unsigned req)
107{
108    int index = 2;
109    auto process = tc->getProcessPtr();
110    Addr buf_addr = process->getSyscallArg(tc, index);
111
112    switch (req) {
113      case HSA_GET_SIZES:
114        {
115            TypedBufferArg<HsaDriverSizes> sizes(buf_addr);
116            sizes->num_kernels = kernels.size();
117            sizes->string_table_size = 0;
118            sizes->code_size = 0;
119            sizes->readonly_size = 0;
120
121            if (kernels.size() > 0) {
122                // all kernels will share the same read-only memory
123                sizes->readonly_size =
124                    kernels[0]->getSize(HsaCode::MemorySegment::READONLY);
125                // check our assumption
126                for (int i = 1; i<kernels.size(); ++i) {
127                    assert(sizes->readonly_size ==
128                    kernels[i]->getSize(HsaCode::MemorySegment::READONLY));
129                }
130            }
131
132            for (int i = 0; i < kernels.size(); ++i) {
133                HsaCode *k = kernels[i];
134                // add one for terminating '\0'
135                sizes->string_table_size += k->name().size() + 1;
136                sizes->code_size +=
137                    k->numInsts() * sizeof(TheGpuISA::RawMachInst);
138            }
139
140            sizes.copyOut(tc->getVirtProxy());
141        }
142        break;
143
144      case HSA_GET_KINFO:
145        {
146            TypedBufferArg<HsaKernelInfo>
147                kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size());
148
149            for (int i = 0; i < kernels.size(); ++i) {
150                HsaKernelInfo *ki = &kinfo[i];
151                ki->name_offs = kernelInfo[i].name_offs;
152                ki->code_offs = kernelInfo[i].code_offs;
153                ki->sRegCount = kernelInfo[i].sRegCount;
154                ki->dRegCount = kernelInfo[i].dRegCount;
155                ki->cRegCount = kernelInfo[i].cRegCount;
156                ki->static_lds_size  = kernelInfo[i].static_lds_size;
157                ki->private_mem_size = kernelInfo[i].private_mem_size;
158                ki->spill_mem_size   = kernelInfo[i].spill_mem_size;
159            }
160
161            kinfo.copyOut(tc->getVirtProxy());
162        }
163        break;
164
165      case HSA_GET_STRINGS:
166        {
167            int string_table_size = 0;
168            for (int i = 0; i < kernels.size(); ++i) {
169                HsaCode *k = kernels[i];
170                string_table_size += k->name().size() + 1;
171            }
172
173            BufferArg buf(buf_addr, string_table_size);
174            char *bufp = (char*)buf.bufferPtr();
175
176            for (int i = 0; i < kernels.size(); ++i) {
177                HsaCode *k = kernels[i];
178                const char *n = k->name().c_str();
179
180                // idiomatic string copy
181                while ((*bufp++ = *n++));
182            }
183
184            assert(bufp - (char *)buf.bufferPtr() == string_table_size);
185
186            buf.copyOut(tc->getVirtProxy());
187        }
188        break;
189
190      case HSA_GET_READONLY_DATA:
191        {
192            // we can pick any kernel --- they share the same
193            // readonly segment (this assumption is checked in GET_SIZES)
194            uint64_t size =
195                kernels.back()->getSize(HsaCode::MemorySegment::READONLY);
196            BufferArg data(buf_addr, size);
197            char *datap = (char *)data.bufferPtr();
198            memcpy(datap,
199                   kernels.back()->readonly_data,
200                   size);
201            data.copyOut(tc->getVirtProxy());
202        }
203        break;
204
205      case HSA_GET_CODE:
206        {
207            // set hsaCode pointer
208            hsaCode = buf_addr;
209            int code_size = 0;
210
211            for (int i = 0; i < kernels.size(); ++i) {
212                HsaCode *k = kernels[i];
213                code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
214            }
215
216            TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size);
217            TheGpuISA::RawMachInst *bufp = buf;
218
219            int buf_idx = 0;
220
221            for (int i = 0; i < kernels.size(); ++i) {
222                HsaCode *k = kernels[i];
223
224                for (int j = 0; j < k->numInsts(); ++j) {
225                    bufp[buf_idx] = k->insts()->at(j);
226                    ++buf_idx;
227                }
228            }
229
230            buf.copyOut(tc->getVirtProxy());
231        }
232        break;
233
234      case HSA_GET_CU_CNT:
235        {
236            BufferArg buf(buf_addr, sizeof(uint32_t));
237            *((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs();
238            buf.copyOut(tc->getVirtProxy());
239        }
240        break;
241
242      case HSA_GET_VSZ:
243        {
244            BufferArg buf(buf_addr, sizeof(uint32_t));
245            *((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize();
246            buf.copyOut(tc->getVirtProxy());
247        }
248        break;
249      case HSA_GET_HW_STATIC_CONTEXT_SIZE:
250        {
251            BufferArg buf(buf_addr, sizeof(uint32_t));
252            *((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize();
253            buf.copyOut(tc->getVirtProxy());
254        }
255        break;
256
257      default:
258        fatal("ClDriver: bad ioctl %d\n", req);
259    }
260
261    return 0;
262}
263
264const char*
265ClDriver::codeOffToKernelName(uint64_t code_ptr)
266{
267    assert(hsaCode);
268    uint32_t code_offs = code_ptr - hsaCode;
269
270    for (int i = 0; i < kernels.size(); ++i) {
271        if (code_offs == kernelInfo[i].code_offs) {
272            return kernels[i]->name().c_str();
273        }
274    }
275
276    return nullptr;
277}
278
279ClDriver*
280ClDriverParams::create()
281{
282    return new ClDriver(this);
283}
284