cl_driver.cc revision 11851
1545SN/A/* 213930Sgiacomo.travaglini@arm.com * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 38948SN/A * All rights reserved. 48948SN/A * 58948SN/A * For use for simulation and test purposes only 68948SN/A * 78948SN/A * Redistribution and use in source and binary forms, with or without 88948SN/A * modification, are permitted provided that the following conditions are met: 98948SN/A * 108948SN/A * 1. Redistributions of source code must retain the above copyright notice, 118948SN/A * this list of conditions and the following disclaimer. 128948SN/A * 138948SN/A * 2. Redistributions in binary form must reproduce the above copyright notice, 141762SN/A * this list of conditions and the following disclaimer in the documentation 15545SN/A * and/or other materials provided with the distribution. 16545SN/A * 17545SN/A * 3. Neither the name of the copyright holder nor the names of its contributors 18545SN/A * may be used to endorse or promote products derived from this software 19545SN/A * without specific prior written permission. 20545SN/A * 21545SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22545SN/A * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23545SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24545SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25545SN/A * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26545SN/A * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27545SN/A * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28545SN/A * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29545SN/A * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30545SN/A * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31545SN/A * POSSIBILITY OF SUCH DAMAGE. 32545SN/A * 33545SN/A * Author: Anthony Gutierrez 34545SN/A */ 35545SN/A 36545SN/A#include "gpu-compute/cl_driver.hh" 37545SN/A 38545SN/A#include "base/intmath.hh" 392665SN/A#include "cpu/thread_context.hh" 402665SN/A#include "gpu-compute/dispatcher.hh" 412665SN/A#include "gpu-compute/hsa_code.hh" 4211010Sandreas.sandberg@arm.com#include "gpu-compute/hsa_kernel_info.hh" 43545SN/A#include "gpu-compute/hsa_object.hh" 44545SN/A#include "params/ClDriver.hh" 459016Sandreas.hansson@arm.com#include "sim/process.hh" 469016Sandreas.hansson@arm.com#include "sim/syscall_emul_buf.hh" 47545SN/A 489166Sandreas.hansson@arm.comClDriver::ClDriver(ClDriverParams *p) 4911010Sandreas.sandberg@arm.com : EmulatedDriver(p), hsaCode(0) 509166Sandreas.hansson@arm.com{ 5111010Sandreas.sandberg@arm.com for (const auto &codeFile : p->codefile) 529016Sandreas.hansson@arm.com codeFiles.push_back(&codeFile); 534762SN/A 549342SAndreas.Sandberg@arm.com maxFuncArgsSize = 0; 559814Sandreas.hansson@arm.com 562565SN/A for (int i = 0; i < codeFiles.size(); ++i) { 5713892Sgabeblack@google.com HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]); 5813892Sgabeblack@google.com 5910912Sandreas.sandberg@arm.com for (int k = 0; k < obj->numKernels(); ++k) { 602384SN/A assert(obj->getKernel(k)); 619307Sandreas.hansson@arm.com kernels.push_back(obj->getKernel(k)); 622784SN/A kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData); 639307Sandreas.hansson@arm.com int kern_funcargs_size = kernels.back()->funcarg_size; 649307Sandreas.hansson@arm.com maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ? 659307Sandreas.hansson@arm.com kern_funcargs_size : maxFuncArgsSize; 669307Sandreas.hansson@arm.com } 679307Sandreas.hansson@arm.com } 689307Sandreas.hansson@arm.com 699307Sandreas.hansson@arm.com int name_offs = 0; 702784SN/A int code_offs = 0; 719307Sandreas.hansson@arm.com 729307Sandreas.hansson@arm.com for (int i = 0; i < kernels.size(); ++i) { 739307Sandreas.hansson@arm.com kernelInfo.push_back(HsaKernelInfo()); 749307Sandreas.hansson@arm.com HsaCode *k = kernels[i]; 759307Sandreas.hansson@arm.com 769307Sandreas.hansson@arm.com k->generateHsaKernelInfo(&kernelInfo[i]); 779307Sandreas.hansson@arm.com 789307Sandreas.hansson@arm.com kernelInfo[i].name_offs = name_offs; 794435SN/A kernelInfo[i].code_offs = code_offs; 809166Sandreas.hansson@arm.com 819166Sandreas.hansson@arm.com name_offs += k->name().size() + 1; 829166Sandreas.hansson@arm.com code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst); 839166Sandreas.hansson@arm.com } 849166Sandreas.hansson@arm.com} 859166Sandreas.hansson@arm.com 869166Sandreas.hansson@arm.comvoid 879166Sandreas.hansson@arm.comClDriver::handshake(GpuDispatcher *_dispatcher) 889166Sandreas.hansson@arm.com{ 899166Sandreas.hansson@arm.com dispatcher = _dispatcher; 909166Sandreas.hansson@arm.com dispatcher->setFuncargsSize(maxFuncArgsSize); 918948SN/A} 929307Sandreas.hansson@arm.com 939307Sandreas.hansson@arm.comint 949307Sandreas.hansson@arm.comClDriver::open(Process *p, ThreadContext *tc, int mode, int flags) 959307Sandreas.hansson@arm.com{ 969307Sandreas.hansson@arm.com int fd = p->allocFD(-1, filename, 0, 0, false); 979307Sandreas.hansson@arm.com FDEntry *fde = p->getFDEntry(fd); 989307Sandreas.hansson@arm.com fde->driver = this; 999307Sandreas.hansson@arm.com 1009307Sandreas.hansson@arm.com return fd; 1019307Sandreas.hansson@arm.com} 1029307Sandreas.hansson@arm.com 1039307Sandreas.hansson@arm.comint 1049307Sandreas.hansson@arm.comClDriver::ioctl(Process *process, ThreadContext *tc, unsigned req) 1059307Sandreas.hansson@arm.com{ 1069307Sandreas.hansson@arm.com int index = 2; 1079307Sandreas.hansson@arm.com Addr buf_addr = process->getSyscallArg(tc, index); 1089307Sandreas.hansson@arm.com 1099307Sandreas.hansson@arm.com switch (req) { 1109307Sandreas.hansson@arm.com case HSA_GET_SIZES: 1119307Sandreas.hansson@arm.com { 11211010Sandreas.sandberg@arm.com TypedBufferArg<HsaDriverSizes> sizes(buf_addr); 1139307Sandreas.hansson@arm.com sizes->num_kernels = kernels.size(); 11413892Sgabeblack@google.com sizes->string_table_size = 0; 1159307Sandreas.hansson@arm.com sizes->code_size = 0; 11611010Sandreas.sandberg@arm.com sizes->readonly_size = 0; 11711010Sandreas.sandberg@arm.com 11811010Sandreas.sandberg@arm.com if (kernels.size() > 0) { 11911010Sandreas.sandberg@arm.com // all kernels will share the same read-only memory 12011010Sandreas.sandberg@arm.com sizes->readonly_size = 12111010Sandreas.sandberg@arm.com kernels[0]->getSize(HsaCode::MemorySegment::READONLY); 12211010Sandreas.sandberg@arm.com // check our assumption 12311010Sandreas.sandberg@arm.com for (int i = 1; i<kernels.size(); ++i) { 1249307Sandreas.hansson@arm.com assert(sizes->readonly_size == 1259307Sandreas.hansson@arm.com kernels[i]->getSize(HsaCode::MemorySegment::READONLY)); 1269307Sandreas.hansson@arm.com } 1279307Sandreas.hansson@arm.com } 12812087Sspwilson2@wisc.edu 1299307Sandreas.hansson@arm.com for (int i = 0; i < kernels.size(); ++i) { 1309307Sandreas.hansson@arm.com HsaCode *k = kernels[i]; 1319307Sandreas.hansson@arm.com // add one for terminating '\0' 1329307Sandreas.hansson@arm.com sizes->string_table_size += k->name().size() + 1; 1339307Sandreas.hansson@arm.com sizes->code_size += 1349307Sandreas.hansson@arm.com k->numInsts() * sizeof(TheGpuISA::RawMachInst); 1359307Sandreas.hansson@arm.com } 1369307Sandreas.hansson@arm.com 13713930Sgiacomo.travaglini@arm.com sizes.copyOut(tc->getMemProxy()); 13813930Sgiacomo.travaglini@arm.com } 13913930Sgiacomo.travaglini@arm.com break; 14013930Sgiacomo.travaglini@arm.com 14113930Sgiacomo.travaglini@arm.com case HSA_GET_KINFO: 14213930Sgiacomo.travaglini@arm.com { 1439307Sandreas.hansson@arm.com TypedBufferArg<HsaKernelInfo> 1449307Sandreas.hansson@arm.com kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size()); 14511169Sandreas.hansson@arm.com 14611169Sandreas.hansson@arm.com for (int i = 0; i < kernels.size(); ++i) { 1472384SN/A HsaKernelInfo *ki = &kinfo[i]; 1489166Sandreas.hansson@arm.com ki->name_offs = kernelInfo[i].name_offs; 1494435SN/A ki->code_offs = kernelInfo[i].code_offs; 1509165Sandreas.hansson@arm.com ki->sRegCount = kernelInfo[i].sRegCount; 1512489SN/A ki->dRegCount = kernelInfo[i].dRegCount; 15213930Sgiacomo.travaglini@arm.com ki->cRegCount = kernelInfo[i].cRegCount; 15313930Sgiacomo.travaglini@arm.com ki->static_lds_size = kernelInfo[i].static_lds_size; 1542565SN/A ki->private_mem_size = kernelInfo[i].private_mem_size; 15513930Sgiacomo.travaglini@arm.com ki->spill_mem_size = kernelInfo[i].spill_mem_size; 15613930Sgiacomo.travaglini@arm.com } 15713930Sgiacomo.travaglini@arm.com 15813930Sgiacomo.travaglini@arm.com kinfo.copyOut(tc->getMemProxy()); 15913930Sgiacomo.travaglini@arm.com } 16013930Sgiacomo.travaglini@arm.com break; 16113930Sgiacomo.travaglini@arm.com 16213930Sgiacomo.travaglini@arm.com case HSA_GET_STRINGS: 1632565SN/A { 1649166Sandreas.hansson@arm.com int string_table_size = 0; 1652384SN/A for (int i = 0; i < kernels.size(); ++i) { 16611168Sandreas.hansson@arm.com HsaCode *k = kernels[i]; 1672384SN/A string_table_size += k->name().size() + 1; 1682384SN/A } 169545SN/A 170545SN/A BufferArg buf(buf_addr, string_table_size); 1714435SN/A char *bufp = (char*)buf.bufferPtr(); 1728851SN/A 173545SN/A for (int i = 0; i < kernels.size(); ++i) { 174545SN/A HsaCode *k = kernels[i]; 1754762SN/A const char *n = k->name().c_str(); 1764762SN/A 1779166Sandreas.hansson@arm.com // idiomatic string copy 1784762SN/A while ((*bufp++ = *n++)); 1798851SN/A } 18013930Sgiacomo.travaglini@arm.com 18113930Sgiacomo.travaglini@arm.com assert(bufp - (char *)buf.bufferPtr() == string_table_size); 18213930Sgiacomo.travaglini@arm.com 18313930Sgiacomo.travaglini@arm.com buf.copyOut(tc->getMemProxy()); 18413930Sgiacomo.travaglini@arm.com } 18513930Sgiacomo.travaglini@arm.com break; 18613930Sgiacomo.travaglini@arm.com 1878851SN/A case HSA_GET_READONLY_DATA: 1884022SN/A { 1898851SN/A // we can pick any kernel --- they share the same 1904022SN/A // readonly segment (this assumption is checked in GET_SIZES) 1912565SN/A uint64_t size = 1928851SN/A kernels.back()->getSize(HsaCode::MemorySegment::READONLY); 19313930Sgiacomo.travaglini@arm.com BufferArg data(buf_addr, size); 19413930Sgiacomo.travaglini@arm.com char *datap = (char *)data.bufferPtr(); 19513930Sgiacomo.travaglini@arm.com memcpy(datap, 19613930Sgiacomo.travaglini@arm.com kernels.back()->readonly_data, 19713930Sgiacomo.travaglini@arm.com size); 19813930Sgiacomo.travaglini@arm.com data.copyOut(tc->getMemProxy()); 19913930Sgiacomo.travaglini@arm.com } 2008851SN/A break; 2014263SN/A 2028851SN/A case HSA_GET_CODE: 2034263SN/A { 2042565SN/A // set hsaCode pointer 2059307Sandreas.hansson@arm.com hsaCode = buf_addr; 2068851SN/A int code_size = 0; 20711169Sandreas.hansson@arm.com 2082565SN/A for (int i = 0; i < kernels.size(); ++i) { 2099814Sandreas.hansson@arm.com HsaCode *k = kernels[i]; 2104263SN/A code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst); 21113784Sgabeblack@google.com } 21213784Sgabeblack@google.com 2132489SN/A TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size); 214545SN/A TheGpuISA::RawMachInst *bufp = buf; 215545SN/A 21611010Sandreas.sandberg@arm.com int buf_idx = 0; 21711625Smichael.lebeane@amd.com 21811625Smichael.lebeane@amd.com for (int i = 0; i < kernels.size(); ++i) { 21911625Smichael.lebeane@amd.com HsaCode *k = kernels[i]; 22011625Smichael.lebeane@amd.com 22111625Smichael.lebeane@amd.com for (int j = 0; j < k->numInsts(); ++j) { 22211625Smichael.lebeane@amd.com bufp[buf_idx] = k->insts()->at(j); 22311625Smichael.lebeane@amd.com ++buf_idx; 22411625Smichael.lebeane@amd.com } 22511625Smichael.lebeane@amd.com } 22611625Smichael.lebeane@amd.com 22711625Smichael.lebeane@amd.com buf.copyOut(tc->getMemProxy()); 22811625Smichael.lebeane@amd.com } 22911625Smichael.lebeane@amd.com break; 23011625Smichael.lebeane@amd.com 23111625Smichael.lebeane@amd.com case HSA_GET_CU_CNT: 23211625Smichael.lebeane@amd.com { 23311625Smichael.lebeane@amd.com BufferArg buf(buf_addr, sizeof(uint32_t)); 23411625Smichael.lebeane@amd.com *((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs(); 23511625Smichael.lebeane@amd.com buf.copyOut(tc->getMemProxy()); 23611625Smichael.lebeane@amd.com } 23711625Smichael.lebeane@amd.com break; 23811625Smichael.lebeane@amd.com 23911625Smichael.lebeane@amd.com case HSA_GET_VSZ: 24011625Smichael.lebeane@amd.com { 24111625Smichael.lebeane@amd.com BufferArg buf(buf_addr, sizeof(uint32_t)); 24211625Smichael.lebeane@amd.com *((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize(); 24311625Smichael.lebeane@amd.com buf.copyOut(tc->getMemProxy()); 24411625Smichael.lebeane@amd.com } 24511625Smichael.lebeane@amd.com break; 24611625Smichael.lebeane@amd.com case HSA_GET_HW_STATIC_CONTEXT_SIZE: 24711625Smichael.lebeane@amd.com { 24811625Smichael.lebeane@amd.com BufferArg buf(buf_addr, sizeof(uint32_t)); 24911625Smichael.lebeane@amd.com *((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize(); 25011625Smichael.lebeane@amd.com buf.copyOut(tc->getMemProxy()); 25111625Smichael.lebeane@amd.com } 25211625Smichael.lebeane@amd.com break; 25311625Smichael.lebeane@amd.com 25411625Smichael.lebeane@amd.com default: 25511625Smichael.lebeane@amd.com fatal("ClDriver: bad ioctl %d\n", req); 25611625Smichael.lebeane@amd.com } 25711625Smichael.lebeane@amd.com 25811625Smichael.lebeane@amd.com return 0; 25911625Smichael.lebeane@amd.com} 26011625Smichael.lebeane@amd.com 26111625Smichael.lebeane@amd.comconst char* 26211625Smichael.lebeane@amd.comClDriver::codeOffToKernelName(uint64_t code_ptr) 26311625Smichael.lebeane@amd.com{ 26411625Smichael.lebeane@amd.com assert(hsaCode); 26511625Smichael.lebeane@amd.com uint32_t code_offs = code_ptr - hsaCode; 26611625Smichael.lebeane@amd.com 26711625Smichael.lebeane@amd.com for (int i = 0; i < kernels.size(); ++i) { 26811625Smichael.lebeane@amd.com if (code_offs == kernelInfo[i].code_offs) { 26911625Smichael.lebeane@amd.com return kernels[i]->name().c_str(); 27011625Smichael.lebeane@amd.com } 27111625Smichael.lebeane@amd.com } 27211625Smichael.lebeane@amd.com 27311625Smichael.lebeane@amd.com return nullptr; 27411625Smichael.lebeane@amd.com} 27511625Smichael.lebeane@amd.com 27611625Smichael.lebeane@amd.comClDriver* 27711625Smichael.lebeane@amd.comClDriverParams::create() 27811625Smichael.lebeane@amd.com{ 27911625Smichael.lebeane@amd.com return new ClDriver(this); 28011625Smichael.lebeane@amd.com} 28112131Sspwilson2@wisc.edu