1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Anthony Gutierrez 34 */ 35 36#include "gpu-compute/cl_driver.hh" 37 38#include <memory> 39 40#include "base/intmath.hh" 41#include "cpu/thread_context.hh" 42#include "gpu-compute/dispatcher.hh" 43#include "gpu-compute/hsa_code.hh" 44#include "gpu-compute/hsa_kernel_info.hh" 45#include "gpu-compute/hsa_object.hh" 46#include "params/ClDriver.hh" 47#include "sim/process.hh" 48#include "sim/syscall_emul_buf.hh" 49 50ClDriver::ClDriver(ClDriverParams *p) 51 : EmulatedDriver(p), hsaCode(0) 52{ 53 for (const auto &codeFile : p->codefile) 54 codeFiles.push_back(&codeFile); 55 56 maxFuncArgsSize = 0; 57 58 for (int i = 0; i < codeFiles.size(); ++i) { 59 HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]); 60 61 for (int k = 0; k < obj->numKernels(); ++k) { 62 assert(obj->getKernel(k)); 63 kernels.push_back(obj->getKernel(k)); 64 kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData); 65 int kern_funcargs_size = kernels.back()->funcarg_size; 66 maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ? 67 kern_funcargs_size : maxFuncArgsSize; 68 } 69 } 70 71 int name_offs = 0; 72 int code_offs = 0; 73 74 for (int i = 0; i < kernels.size(); ++i) { 75 kernelInfo.push_back(HsaKernelInfo()); 76 HsaCode *k = kernels[i]; 77 78 k->generateHsaKernelInfo(&kernelInfo[i]); 79 80 kernelInfo[i].name_offs = name_offs; 81 kernelInfo[i].code_offs = code_offs; 82 83 name_offs += k->name().size() + 1; 84 code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst); 85 } 86} 87 88void 89ClDriver::handshake(GpuDispatcher *_dispatcher) 90{ 91 dispatcher = _dispatcher; 92 dispatcher->setFuncargsSize(maxFuncArgsSize); 93} 94 95int 96ClDriver::open(ThreadContext *tc, int mode, int flags) 97{ 98 auto p = tc->getProcessPtr(); 99 std::shared_ptr<DeviceFDEntry> fdp; 100 fdp = std::make_shared<DeviceFDEntry>(this, filename); 101 int tgt_fd = p->fds->allocFD(fdp); 102 return tgt_fd; 103} 104 105int 106ClDriver::ioctl(ThreadContext *tc, unsigned req) 107{ 108 int index = 2; 109 auto process = tc->getProcessPtr(); 110 Addr buf_addr = process->getSyscallArg(tc, index); 111 112 switch (req) { 113 case HSA_GET_SIZES: 114 { 115 TypedBufferArg<HsaDriverSizes> sizes(buf_addr); 116 sizes->num_kernels = kernels.size(); 117 sizes->string_table_size = 0; 118 sizes->code_size = 0; 119 sizes->readonly_size = 0; 120 121 if (kernels.size() > 0) { 122 // all kernels will share the same read-only memory 123 sizes->readonly_size = 124 kernels[0]->getSize(HsaCode::MemorySegment::READONLY); 125 // check our assumption 126 for (int i = 1; i<kernels.size(); ++i) { 127 assert(sizes->readonly_size == 128 kernels[i]->getSize(HsaCode::MemorySegment::READONLY)); 129 } 130 } 131 132 for (int i = 0; i < kernels.size(); ++i) { 133 HsaCode *k = kernels[i]; 134 // add one for terminating '\0' 135 sizes->string_table_size += k->name().size() + 1; 136 sizes->code_size += 137 k->numInsts() * sizeof(TheGpuISA::RawMachInst); 138 } 139 140 sizes.copyOut(tc->getVirtProxy()); 141 } 142 break; 143 144 case HSA_GET_KINFO: 145 { 146 TypedBufferArg<HsaKernelInfo> 147 kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size()); 148 149 for (int i = 0; i < kernels.size(); ++i) { 150 HsaKernelInfo *ki = &kinfo[i]; 151 ki->name_offs = kernelInfo[i].name_offs; 152 ki->code_offs = kernelInfo[i].code_offs; 153 ki->sRegCount = kernelInfo[i].sRegCount; 154 ki->dRegCount = kernelInfo[i].dRegCount; 155 ki->cRegCount = kernelInfo[i].cRegCount; 156 ki->static_lds_size = kernelInfo[i].static_lds_size; 157 ki->private_mem_size = kernelInfo[i].private_mem_size; 158 ki->spill_mem_size = kernelInfo[i].spill_mem_size; 159 } 160 161 kinfo.copyOut(tc->getVirtProxy()); 162 } 163 break; 164 165 case HSA_GET_STRINGS: 166 { 167 int string_table_size = 0; 168 for (int i = 0; i < kernels.size(); ++i) { 169 HsaCode *k = kernels[i]; 170 string_table_size += k->name().size() + 1; 171 } 172 173 BufferArg buf(buf_addr, string_table_size); 174 char *bufp = (char*)buf.bufferPtr(); 175 176 for (int i = 0; i < kernels.size(); ++i) { 177 HsaCode *k = kernels[i]; 178 const char *n = k->name().c_str(); 179 180 // idiomatic string copy 181 while ((*bufp++ = *n++)); 182 } 183 184 assert(bufp - (char *)buf.bufferPtr() == string_table_size); 185 186 buf.copyOut(tc->getVirtProxy()); 187 } 188 break; 189 190 case HSA_GET_READONLY_DATA: 191 { 192 // we can pick any kernel --- they share the same 193 // readonly segment (this assumption is checked in GET_SIZES) 194 uint64_t size = 195 kernels.back()->getSize(HsaCode::MemorySegment::READONLY); 196 BufferArg data(buf_addr, size); 197 char *datap = (char *)data.bufferPtr(); 198 memcpy(datap, 199 kernels.back()->readonly_data, 200 size); 201 data.copyOut(tc->getVirtProxy()); 202 } 203 break; 204 205 case HSA_GET_CODE: 206 { 207 // set hsaCode pointer 208 hsaCode = buf_addr; 209 int code_size = 0; 210 211 for (int i = 0; i < kernels.size(); ++i) { 212 HsaCode *k = kernels[i]; 213 code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst); 214 } 215 216 TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size); 217 TheGpuISA::RawMachInst *bufp = buf; 218 219 int buf_idx = 0; 220 221 for (int i = 0; i < kernels.size(); ++i) { 222 HsaCode *k = kernels[i]; 223 224 for (int j = 0; j < k->numInsts(); ++j) { 225 bufp[buf_idx] = k->insts()->at(j); 226 ++buf_idx; 227 } 228 } 229 230 buf.copyOut(tc->getVirtProxy()); 231 } 232 break; 233 234 case HSA_GET_CU_CNT: 235 { 236 BufferArg buf(buf_addr, sizeof(uint32_t)); 237 *((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs(); 238 buf.copyOut(tc->getVirtProxy()); 239 } 240 break; 241 242 case HSA_GET_VSZ: 243 { 244 BufferArg buf(buf_addr, sizeof(uint32_t)); 245 *((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize(); 246 buf.copyOut(tc->getVirtProxy()); 247 } 248 break; 249 case HSA_GET_HW_STATIC_CONTEXT_SIZE: 250 { 251 BufferArg buf(buf_addr, sizeof(uint32_t)); 252 *((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize(); 253 buf.copyOut(tc->getVirtProxy()); 254 } 255 break; 256 257 default: 258 fatal("ClDriver: bad ioctl %d\n", req); 259 } 260 261 return 0; 262} 263 264const char* 265ClDriver::codeOffToKernelName(uint64_t code_ptr) 266{ 267 assert(hsaCode); 268 uint32_t code_offs = code_ptr - hsaCode; 269 270 for (int i = 0; i < kernels.size(); ++i) { 271 if (code_offs == kernelInfo[i].code_offs) { 272 return kernels[i]->name().c_str(); 273 } 274 } 275 276 return nullptr; 277} 278 279ClDriver* 280ClDriverParams::create() 281{ 282 return new ClDriver(this); 283} 284