cl_driver.cc revision 11851:824055fe6b30
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Anthony Gutierrez
34 */
35
36#include "gpu-compute/cl_driver.hh"
37
38#include "base/intmath.hh"
39#include "cpu/thread_context.hh"
40#include "gpu-compute/dispatcher.hh"
41#include "gpu-compute/hsa_code.hh"
42#include "gpu-compute/hsa_kernel_info.hh"
43#include "gpu-compute/hsa_object.hh"
44#include "params/ClDriver.hh"
45#include "sim/process.hh"
46#include "sim/syscall_emul_buf.hh"
47
48ClDriver::ClDriver(ClDriverParams *p)
49    : EmulatedDriver(p), hsaCode(0)
50{
51    for (const auto &codeFile : p->codefile)
52        codeFiles.push_back(&codeFile);
53
54    maxFuncArgsSize = 0;
55
56    for (int i = 0; i < codeFiles.size(); ++i) {
57        HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]);
58
59        for (int k = 0; k < obj->numKernels(); ++k) {
60            assert(obj->getKernel(k));
61            kernels.push_back(obj->getKernel(k));
62            kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData);
63            int kern_funcargs_size = kernels.back()->funcarg_size;
64            maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ?
65                kern_funcargs_size : maxFuncArgsSize;
66        }
67    }
68
69    int name_offs = 0;
70    int code_offs = 0;
71
72    for (int i = 0; i < kernels.size(); ++i) {
73        kernelInfo.push_back(HsaKernelInfo());
74        HsaCode *k = kernels[i];
75
76        k->generateHsaKernelInfo(&kernelInfo[i]);
77
78        kernelInfo[i].name_offs = name_offs;
79        kernelInfo[i].code_offs = code_offs;
80
81        name_offs += k->name().size() + 1;
82        code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
83    }
84}
85
86void
87ClDriver::handshake(GpuDispatcher *_dispatcher)
88{
89    dispatcher = _dispatcher;
90    dispatcher->setFuncargsSize(maxFuncArgsSize);
91}
92
93int
94ClDriver::open(Process *p, ThreadContext *tc, int mode, int flags)
95{
96    int fd = p->allocFD(-1, filename, 0, 0, false);
97    FDEntry *fde = p->getFDEntry(fd);
98    fde->driver = this;
99
100    return fd;
101}
102
103int
104ClDriver::ioctl(Process *process, ThreadContext *tc, unsigned req)
105{
106    int index = 2;
107    Addr buf_addr = process->getSyscallArg(tc, index);
108
109    switch (req) {
110      case HSA_GET_SIZES:
111        {
112            TypedBufferArg<HsaDriverSizes> sizes(buf_addr);
113            sizes->num_kernels = kernels.size();
114            sizes->string_table_size = 0;
115            sizes->code_size = 0;
116            sizes->readonly_size = 0;
117
118            if (kernels.size() > 0) {
119                // all kernels will share the same read-only memory
120                sizes->readonly_size =
121                    kernels[0]->getSize(HsaCode::MemorySegment::READONLY);
122                // check our assumption
123                for (int i = 1; i<kernels.size(); ++i) {
124                    assert(sizes->readonly_size ==
125                    kernels[i]->getSize(HsaCode::MemorySegment::READONLY));
126                }
127            }
128
129            for (int i = 0; i < kernels.size(); ++i) {
130                HsaCode *k = kernels[i];
131                // add one for terminating '\0'
132                sizes->string_table_size += k->name().size() + 1;
133                sizes->code_size +=
134                    k->numInsts() * sizeof(TheGpuISA::RawMachInst);
135            }
136
137            sizes.copyOut(tc->getMemProxy());
138        }
139        break;
140
141      case HSA_GET_KINFO:
142        {
143            TypedBufferArg<HsaKernelInfo>
144                kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size());
145
146            for (int i = 0; i < kernels.size(); ++i) {
147                HsaKernelInfo *ki = &kinfo[i];
148                ki->name_offs = kernelInfo[i].name_offs;
149                ki->code_offs = kernelInfo[i].code_offs;
150                ki->sRegCount = kernelInfo[i].sRegCount;
151                ki->dRegCount = kernelInfo[i].dRegCount;
152                ki->cRegCount = kernelInfo[i].cRegCount;
153                ki->static_lds_size  = kernelInfo[i].static_lds_size;
154                ki->private_mem_size = kernelInfo[i].private_mem_size;
155                ki->spill_mem_size   = kernelInfo[i].spill_mem_size;
156            }
157
158            kinfo.copyOut(tc->getMemProxy());
159        }
160        break;
161
162      case HSA_GET_STRINGS:
163        {
164            int string_table_size = 0;
165            for (int i = 0; i < kernels.size(); ++i) {
166                HsaCode *k = kernels[i];
167                string_table_size += k->name().size() + 1;
168            }
169
170            BufferArg buf(buf_addr, string_table_size);
171            char *bufp = (char*)buf.bufferPtr();
172
173            for (int i = 0; i < kernels.size(); ++i) {
174                HsaCode *k = kernels[i];
175                const char *n = k->name().c_str();
176
177                // idiomatic string copy
178                while ((*bufp++ = *n++));
179            }
180
181            assert(bufp - (char *)buf.bufferPtr() == string_table_size);
182
183            buf.copyOut(tc->getMemProxy());
184        }
185        break;
186
187      case HSA_GET_READONLY_DATA:
188        {
189            // we can pick any kernel --- they share the same
190            // readonly segment (this assumption is checked in GET_SIZES)
191            uint64_t size =
192                kernels.back()->getSize(HsaCode::MemorySegment::READONLY);
193            BufferArg data(buf_addr, size);
194            char *datap = (char *)data.bufferPtr();
195            memcpy(datap,
196                   kernels.back()->readonly_data,
197                   size);
198            data.copyOut(tc->getMemProxy());
199        }
200        break;
201
202      case HSA_GET_CODE:
203        {
204            // set hsaCode pointer
205            hsaCode = buf_addr;
206            int code_size = 0;
207
208            for (int i = 0; i < kernels.size(); ++i) {
209                HsaCode *k = kernels[i];
210                code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
211            }
212
213            TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size);
214            TheGpuISA::RawMachInst *bufp = buf;
215
216            int buf_idx = 0;
217
218            for (int i = 0; i < kernels.size(); ++i) {
219                HsaCode *k = kernels[i];
220
221                for (int j = 0; j < k->numInsts(); ++j) {
222                    bufp[buf_idx] = k->insts()->at(j);
223                    ++buf_idx;
224                }
225            }
226
227            buf.copyOut(tc->getMemProxy());
228        }
229        break;
230
231      case HSA_GET_CU_CNT:
232        {
233            BufferArg buf(buf_addr, sizeof(uint32_t));
234            *((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs();
235            buf.copyOut(tc->getMemProxy());
236        }
237        break;
238
239      case HSA_GET_VSZ:
240        {
241            BufferArg buf(buf_addr, sizeof(uint32_t));
242            *((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize();
243            buf.copyOut(tc->getMemProxy());
244        }
245        break;
246      case HSA_GET_HW_STATIC_CONTEXT_SIZE:
247        {
248            BufferArg buf(buf_addr, sizeof(uint32_t));
249            *((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize();
250            buf.copyOut(tc->getMemProxy());
251        }
252        break;
253
254      default:
255        fatal("ClDriver: bad ioctl %d\n", req);
256    }
257
258    return 0;
259}
260
261const char*
262ClDriver::codeOffToKernelName(uint64_t code_ptr)
263{
264    assert(hsaCode);
265    uint32_t code_offs = code_ptr - hsaCode;
266
267    for (int i = 0; i < kernels.size(); ++i) {
268        if (code_offs == kernelInfo[i].code_offs) {
269            return kernels[i]->name().c_str();
270        }
271    }
272
273    return nullptr;
274}
275
276ClDriver*
277ClDriverParams::create()
278{
279    return new ClDriver(this);
280}
281