cl_driver.cc revision 11856
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Anthony Gutierrez 34 */ 35 36#include "gpu-compute/cl_driver.hh" 37 38#include <memory> 39 40#include "base/intmath.hh" 41#include "cpu/thread_context.hh" 42#include "gpu-compute/dispatcher.hh" 43#include "gpu-compute/hsa_code.hh" 44#include "gpu-compute/hsa_kernel_info.hh" 45#include "gpu-compute/hsa_object.hh" 46#include "params/ClDriver.hh" 47#include "sim/process.hh" 48#include "sim/syscall_emul_buf.hh" 49 50ClDriver::ClDriver(ClDriverParams *p) 51 : EmulatedDriver(p), hsaCode(0) 52{ 53 for (const auto &codeFile : p->codefile) 54 codeFiles.push_back(&codeFile); 55 56 maxFuncArgsSize = 0; 57 58 for (int i = 0; i < codeFiles.size(); ++i) { 59 HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]); 60 61 for (int k = 0; k < obj->numKernels(); ++k) { 62 assert(obj->getKernel(k)); 63 kernels.push_back(obj->getKernel(k)); 64 kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData); 65 int kern_funcargs_size = kernels.back()->funcarg_size; 66 maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ? 67 kern_funcargs_size : maxFuncArgsSize; 68 } 69 } 70 71 int name_offs = 0; 72 int code_offs = 0; 73 74 for (int i = 0; i < kernels.size(); ++i) { 75 kernelInfo.push_back(HsaKernelInfo()); 76 HsaCode *k = kernels[i]; 77 78 k->generateHsaKernelInfo(&kernelInfo[i]); 79 80 kernelInfo[i].name_offs = name_offs; 81 kernelInfo[i].code_offs = code_offs; 82 83 name_offs += k->name().size() + 1; 84 code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst); 85 } 86} 87 88void 89ClDriver::handshake(GpuDispatcher *_dispatcher) 90{ 91 dispatcher = _dispatcher; 92 dispatcher->setFuncargsSize(maxFuncArgsSize); 93} 94 95int 96ClDriver::open(Process *p, ThreadContext *tc, int mode, int flags) 97{ 98 std::shared_ptr<DeviceFDEntry> fdp; 99 fdp = std::make_shared<DeviceFDEntry>(this, filename); 100 int tgt_fd = p->fds->allocFD(fdp); 101 return tgt_fd; 102} 103 104int 105ClDriver::ioctl(Process *process, ThreadContext *tc, unsigned req) 106{ 107 int index = 2; 108 Addr buf_addr = process->getSyscallArg(tc, index); 109 110 switch (req) { 111 case HSA_GET_SIZES: 112 { 113 TypedBufferArg<HsaDriverSizes> sizes(buf_addr); 114 sizes->num_kernels = kernels.size(); 115 sizes->string_table_size = 0; 116 sizes->code_size = 0; 117 sizes->readonly_size = 0; 118 119 if (kernels.size() > 0) { 120 // all kernels will share the same read-only memory 121 sizes->readonly_size = 122 kernels[0]->getSize(HsaCode::MemorySegment::READONLY); 123 // check our assumption 124 for (int i = 1; i<kernels.size(); ++i) { 125 assert(sizes->readonly_size == 126 kernels[i]->getSize(HsaCode::MemorySegment::READONLY)); 127 } 128 } 129 130 for (int i = 0; i < kernels.size(); ++i) { 131 HsaCode *k = kernels[i]; 132 // add one for terminating '\0' 133 sizes->string_table_size += k->name().size() + 1; 134 sizes->code_size += 135 k->numInsts() * sizeof(TheGpuISA::RawMachInst); 136 } 137 138 sizes.copyOut(tc->getMemProxy()); 139 } 140 break; 141 142 case HSA_GET_KINFO: 143 { 144 TypedBufferArg<HsaKernelInfo> 145 kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size()); 146 147 for (int i = 0; i < kernels.size(); ++i) { 148 HsaKernelInfo *ki = &kinfo[i]; 149 ki->name_offs = kernelInfo[i].name_offs; 150 ki->code_offs = kernelInfo[i].code_offs; 151 ki->sRegCount = kernelInfo[i].sRegCount; 152 ki->dRegCount = kernelInfo[i].dRegCount; 153 ki->cRegCount = kernelInfo[i].cRegCount; 154 ki->static_lds_size = kernelInfo[i].static_lds_size; 155 ki->private_mem_size = kernelInfo[i].private_mem_size; 156 ki->spill_mem_size = kernelInfo[i].spill_mem_size; 157 } 158 159 kinfo.copyOut(tc->getMemProxy()); 160 } 161 break; 162 163 case HSA_GET_STRINGS: 164 { 165 int string_table_size = 0; 166 for (int i = 0; i < kernels.size(); ++i) { 167 HsaCode *k = kernels[i]; 168 string_table_size += k->name().size() + 1; 169 } 170 171 BufferArg buf(buf_addr, string_table_size); 172 char *bufp = (char*)buf.bufferPtr(); 173 174 for (int i = 0; i < kernels.size(); ++i) { 175 HsaCode *k = kernels[i]; 176 const char *n = k->name().c_str(); 177 178 // idiomatic string copy 179 while ((*bufp++ = *n++)); 180 } 181 182 assert(bufp - (char *)buf.bufferPtr() == string_table_size); 183 184 buf.copyOut(tc->getMemProxy()); 185 } 186 break; 187 188 case HSA_GET_READONLY_DATA: 189 { 190 // we can pick any kernel --- they share the same 191 // readonly segment (this assumption is checked in GET_SIZES) 192 uint64_t size = 193 kernels.back()->getSize(HsaCode::MemorySegment::READONLY); 194 BufferArg data(buf_addr, size); 195 char *datap = (char *)data.bufferPtr(); 196 memcpy(datap, 197 kernels.back()->readonly_data, 198 size); 199 data.copyOut(tc->getMemProxy()); 200 } 201 break; 202 203 case HSA_GET_CODE: 204 { 205 // set hsaCode pointer 206 hsaCode = buf_addr; 207 int code_size = 0; 208 209 for (int i = 0; i < kernels.size(); ++i) { 210 HsaCode *k = kernels[i]; 211 code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst); 212 } 213 214 TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size); 215 TheGpuISA::RawMachInst *bufp = buf; 216 217 int buf_idx = 0; 218 219 for (int i = 0; i < kernels.size(); ++i) { 220 HsaCode *k = kernels[i]; 221 222 for (int j = 0; j < k->numInsts(); ++j) { 223 bufp[buf_idx] = k->insts()->at(j); 224 ++buf_idx; 225 } 226 } 227 228 buf.copyOut(tc->getMemProxy()); 229 } 230 break; 231 232 case HSA_GET_CU_CNT: 233 { 234 BufferArg buf(buf_addr, sizeof(uint32_t)); 235 *((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs(); 236 buf.copyOut(tc->getMemProxy()); 237 } 238 break; 239 240 case HSA_GET_VSZ: 241 { 242 BufferArg buf(buf_addr, sizeof(uint32_t)); 243 *((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize(); 244 buf.copyOut(tc->getMemProxy()); 245 } 246 break; 247 case HSA_GET_HW_STATIC_CONTEXT_SIZE: 248 { 249 BufferArg buf(buf_addr, sizeof(uint32_t)); 250 *((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize(); 251 buf.copyOut(tc->getMemProxy()); 252 } 253 break; 254 255 default: 256 fatal("ClDriver: bad ioctl %d\n", req); 257 } 258 259 return 0; 260} 261 262const char* 263ClDriver::codeOffToKernelName(uint64_t code_ptr) 264{ 265 assert(hsaCode); 266 uint32_t code_offs = code_ptr - hsaCode; 267 268 for (int i = 0; i < kernels.size(); ++i) { 269 if (code_offs == kernelInfo[i].code_offs) { 270 return kernels[i]->name().c_str(); 271 } 272 } 273 274 return nullptr; 275} 276 277ClDriver* 278ClDriverParams::create() 279{ 280 return new ClDriver(this); 281} 282