cl_driver.cc revision 11308
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Anthony Gutierrez 34 */ 35 36#include "gpu-compute/cl_driver.hh" 37 38#include "base/intmath.hh" 39#include "cpu/thread_context.hh" 40#include "gpu-compute/dispatcher.hh" 41#include "gpu-compute/hsa_code.hh" 42#include "gpu-compute/hsa_kernel_info.hh" 43#include "gpu-compute/hsa_object.hh" 44#include "params/ClDriver.hh" 45#include "sim/process.hh" 46#include "sim/syscall_emul_buf.hh" 47 48ClDriver::ClDriver(ClDriverParams *p) 49 : EmulatedDriver(p), hsaCode(0) 50{ 51 for (const auto &codeFile : p->codefile) 52 codeFiles.push_back(&codeFile); 53 54 maxFuncArgsSize = 0; 55 56 for (int i = 0; i < codeFiles.size(); ++i) { 57 HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]); 58 59 for (int k = 0; k < obj->numKernels(); ++k) { 60 assert(obj->getKernel(k)); 61 kernels.push_back(obj->getKernel(k)); 62 kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData); 63 int kern_funcargs_size = kernels.back()->funcarg_size; 64 maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ? 65 kern_funcargs_size : maxFuncArgsSize; 66 } 67 } 68 69 int name_offs = 0; 70 int code_offs = 0; 71 72 for (int i = 0; i < kernels.size(); ++i) { 73 kernelInfo.push_back(HsaKernelInfo()); 74 HsaCode *k = kernels[i]; 75 76 k->generateHsaKernelInfo(&kernelInfo[i]); 77 78 kernelInfo[i].name_offs = name_offs; 79 kernelInfo[i].code_offs = code_offs; 80 81 name_offs += k->name().size() + 1; 82 code_offs += k->numInsts() * sizeof(GPUStaticInst*); 83 } 84} 85 86void 87ClDriver::handshake(GpuDispatcher *_dispatcher) 88{ 89 dispatcher = _dispatcher; 90 dispatcher->setFuncargsSize(maxFuncArgsSize); 91} 92 93int 94ClDriver::open(LiveProcess *p, ThreadContext *tc, int mode, int flags) 95{ 96 int fd = p->allocFD(-1, filename, 0, 0, false); 97 FDEntry *fde = p->getFDEntry(fd); 98 fde->driver = this; 99 100 return fd; 101} 102 103int 104ClDriver::ioctl(LiveProcess *process, ThreadContext *tc, unsigned req) 105{ 106 int index = 2; 107 Addr buf_addr = process->getSyscallArg(tc, index); 108 109 switch (req) { 110 case HSA_GET_SIZES: 111 { 112 TypedBufferArg<HsaDriverSizes> sizes(buf_addr); 113 sizes->num_kernels = kernels.size(); 114 sizes->string_table_size = 0; 115 sizes->code_size = 0; 116 sizes->readonly_size = 0; 117 118 if (kernels.size() > 0) { 119 // all kernels will share the same read-only memory 120 sizes->readonly_size = 121 kernels[0]->getSize(HsaCode::MemorySegment::READONLY); 122 // check our assumption 123 for (int i = 1; i<kernels.size(); ++i) { 124 assert(sizes->readonly_size == 125 kernels[i]->getSize(HsaCode::MemorySegment::READONLY)); 126 } 127 } 128 129 for (int i = 0; i < kernels.size(); ++i) { 130 HsaCode *k = kernels[i]; 131 // add one for terminating '\0' 132 sizes->string_table_size += k->name().size() + 1; 133 sizes->code_size += k->numInsts() * sizeof(GPUStaticInst*); 134 } 135 136 sizes.copyOut(tc->getMemProxy()); 137 } 138 break; 139 140 case HSA_GET_KINFO: 141 { 142 TypedBufferArg<HsaKernelInfo> 143 kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size()); 144 145 for (int i = 0; i < kernels.size(); ++i) { 146 HsaKernelInfo *ki = &kinfo[i]; 147 ki->name_offs = kernelInfo[i].name_offs; 148 ki->code_offs = kernelInfo[i].code_offs; 149 ki->sRegCount = kernelInfo[i].sRegCount; 150 ki->dRegCount = kernelInfo[i].dRegCount; 151 ki->cRegCount = kernelInfo[i].cRegCount; 152 ki->static_lds_size = kernelInfo[i].static_lds_size; 153 ki->private_mem_size = kernelInfo[i].private_mem_size; 154 ki->spill_mem_size = kernelInfo[i].spill_mem_size; 155 } 156 157 kinfo.copyOut(tc->getMemProxy()); 158 } 159 break; 160 161 case HSA_GET_STRINGS: 162 { 163 int string_table_size = 0; 164 for (int i = 0; i < kernels.size(); ++i) { 165 HsaCode *k = kernels[i]; 166 string_table_size += k->name().size() + 1; 167 } 168 169 BufferArg buf(buf_addr, string_table_size); 170 char *bufp = (char*)buf.bufferPtr(); 171 172 for (int i = 0; i < kernels.size(); ++i) { 173 HsaCode *k = kernels[i]; 174 const char *n = k->name().c_str(); 175 176 // idiomatic string copy 177 while ((*bufp++ = *n++)); 178 } 179 180 assert(bufp - (char *)buf.bufferPtr() == string_table_size); 181 182 buf.copyOut(tc->getMemProxy()); 183 } 184 break; 185 186 case HSA_GET_READONLY_DATA: 187 { 188 // we can pick any kernel --- they share the same 189 // readonly segment (this assumption is checked in GET_SIZES) 190 uint64_t size = 191 kernels.back()->getSize(HsaCode::MemorySegment::READONLY); 192 BufferArg data(buf_addr, size); 193 char *datap = (char *)data.bufferPtr(); 194 memcpy(datap, 195 kernels.back()->readonly_data, 196 size); 197 data.copyOut(tc->getMemProxy()); 198 } 199 break; 200 201 case HSA_GET_CODE: 202 { 203 // set hsaCode pointer 204 hsaCode = buf_addr; 205 int code_size = 0; 206 207 for (int i = 0; i < kernels.size(); ++i) { 208 HsaCode *k = kernels[i]; 209 code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst); 210 } 211 212 TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size); 213 TheGpuISA::RawMachInst *bufp = buf; 214 215 int buf_idx = 0; 216 217 for (int i = 0; i < kernels.size(); ++i) { 218 HsaCode *k = kernels[i]; 219 220 for (int j = 0; j < k->numInsts(); ++j) { 221 bufp[buf_idx] = k->insts()->at(j); 222 ++buf_idx; 223 } 224 } 225 226 buf.copyOut(tc->getMemProxy()); 227 } 228 break; 229 230 case HSA_GET_CU_CNT: 231 { 232 BufferArg buf(buf_addr, sizeof(uint32_t)); 233 *((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs(); 234 buf.copyOut(tc->getMemProxy()); 235 } 236 break; 237 238 case HSA_GET_VSZ: 239 { 240 BufferArg buf(buf_addr, sizeof(uint32_t)); 241 *((uint32_t*)buf.bufferPtr()) = VSZ; 242 buf.copyOut(tc->getMemProxy()); 243 } 244 break; 245 246 default: 247 fatal("ClDriver: bad ioctl %d\n", req); 248 } 249 250 return 0; 251} 252 253const char* 254ClDriver::codeOffToKernelName(uint64_t code_ptr) 255{ 256 assert(hsaCode); 257 uint32_t code_offs = code_ptr - hsaCode; 258 259 for (int i = 0; i < kernels.size(); ++i) { 260 if (code_offs == kernelInfo[i].code_offs) { 261 return kernels[i]->name().c_str(); 262 } 263 } 264 265 return nullptr; 266} 267 268ClDriver* 269ClDriverParams::create() 270{ 271 return new ClDriver(this); 272} 273