shader.cc revision 11386
111308Santhony.gutierrez@amd.com/* 211308Santhony.gutierrez@amd.com * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 311308Santhony.gutierrez@amd.com * All rights reserved. 411308Santhony.gutierrez@amd.com * 511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only 611308Santhony.gutierrez@amd.com * 711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without 811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met: 911308Santhony.gutierrez@amd.com * 1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice, 1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer. 1211308Santhony.gutierrez@amd.com * 1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice, 1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation 1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution. 1611308Santhony.gutierrez@amd.com * 1711308Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its contributors 1811308Santhony.gutierrez@amd.com * may be used to endorse or promote products derived from this software 1911308Santhony.gutierrez@amd.com * without specific prior written permission. 2011308Santhony.gutierrez@amd.com * 2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE. 3211308Santhony.gutierrez@amd.com * 3311308Santhony.gutierrez@amd.com * Author: Steve Reinhardt 3411308Santhony.gutierrez@amd.com */ 3511308Santhony.gutierrez@amd.com 3611308Santhony.gutierrez@amd.com#include "gpu-compute/shader.hh" 3711308Santhony.gutierrez@amd.com 3811308Santhony.gutierrez@amd.com#include <limits> 3911308Santhony.gutierrez@amd.com 4011308Santhony.gutierrez@amd.com#include "arch/x86/linux/linux.hh" 4111308Santhony.gutierrez@amd.com#include "base/chunk_generator.hh" 4211308Santhony.gutierrez@amd.com#include "debug/GPUDisp.hh" 4311308Santhony.gutierrez@amd.com#include "debug/GPUMem.hh" 4411308Santhony.gutierrez@amd.com#include "debug/HSAIL.hh" 4511308Santhony.gutierrez@amd.com#include "gpu-compute/dispatcher.hh" 4611308Santhony.gutierrez@amd.com#include "gpu-compute/gpu_static_inst.hh" 4711308Santhony.gutierrez@amd.com#include "gpu-compute/qstruct.hh" 4811308Santhony.gutierrez@amd.com#include "gpu-compute/wavefront.hh" 4911308Santhony.gutierrez@amd.com#include "mem/packet.hh" 5011308Santhony.gutierrez@amd.com#include "mem/ruby/system/RubySystem.hh" 5111308Santhony.gutierrez@amd.com#include "sim/sim_exit.hh" 5211308Santhony.gutierrez@amd.com 5311308Santhony.gutierrez@amd.comShader::Shader(const Params *p) : SimObject(p), 5411308Santhony.gutierrez@amd.com clock(p->clk_domain->clockPeriod()), cpuThread(nullptr), gpuTc(nullptr), 5511308Santhony.gutierrez@amd.com cpuPointer(p->cpu_pointer), tickEvent(this), timingSim(p->timing), 5611308Santhony.gutierrez@amd.com hsail_mode(SIMT), impl_kern_boundary_sync(p->impl_kern_boundary_sync), 5711308Santhony.gutierrez@amd.com separate_acquire_release(p->separate_acquire_release), coissue_return(1), 5811308Santhony.gutierrez@amd.com trace_vgpr_all(1), n_cu((p->CUs).size()), n_wf(p->n_wf), 5911308Santhony.gutierrez@amd.com globalMemSize(p->globalmem), nextSchedCu(0), sa_n(0), tick_cnt(0), 6011308Santhony.gutierrez@amd.com box_tick_cnt(0), start_tick_cnt(0) 6111308Santhony.gutierrez@amd.com{ 6211308Santhony.gutierrez@amd.com 6311308Santhony.gutierrez@amd.com cuList.resize(n_cu); 6411308Santhony.gutierrez@amd.com 6511308Santhony.gutierrez@amd.com for (int i = 0; i < n_cu; ++i) { 6611308Santhony.gutierrez@amd.com cuList[i] = p->CUs[i]; 6711308Santhony.gutierrez@amd.com assert(i == cuList[i]->cu_id); 6811308Santhony.gutierrez@amd.com cuList[i]->shader = this; 6911308Santhony.gutierrez@amd.com } 7011308Santhony.gutierrez@amd.com} 7111308Santhony.gutierrez@amd.com 7211308Santhony.gutierrez@amd.comAddr 7311308Santhony.gutierrez@amd.comShader::mmap(int length) 7411308Santhony.gutierrez@amd.com{ 7511308Santhony.gutierrez@amd.com 7611308Santhony.gutierrez@amd.com Addr start; 7711308Santhony.gutierrez@amd.com 7811308Santhony.gutierrez@amd.com // round up length to the next page 7911308Santhony.gutierrez@amd.com length = roundUp(length, TheISA::PageBytes); 8011308Santhony.gutierrez@amd.com 8111386Ssteve.reinhardt@amd.com Process *proc = gpuTc->getProcessPtr(); 8211386Ssteve.reinhardt@amd.com 8311386Ssteve.reinhardt@amd.com if (proc->mmapGrowsDown()) { 8411308Santhony.gutierrez@amd.com DPRINTF(HSAIL, "GROWS DOWN"); 8511386Ssteve.reinhardt@amd.com start = proc->mmap_end - length; 8611386Ssteve.reinhardt@amd.com proc->mmap_end = start; 8711308Santhony.gutierrez@amd.com } else { 8811308Santhony.gutierrez@amd.com DPRINTF(HSAIL, "GROWS UP"); 8911386Ssteve.reinhardt@amd.com start = proc->mmap_end; 9011386Ssteve.reinhardt@amd.com proc->mmap_end += length; 9111308Santhony.gutierrez@amd.com 9211308Santhony.gutierrez@amd.com // assertion to make sure we don't overwrite the stack (it grows down) 9311386Ssteve.reinhardt@amd.com assert(proc->mmap_end < proc->stack_base - proc->max_stack_size); 9411308Santhony.gutierrez@amd.com } 9511308Santhony.gutierrez@amd.com 9611308Santhony.gutierrez@amd.com DPRINTF(HSAIL,"Shader::mmap start= %#x, %#x\n", start, length); 9711308Santhony.gutierrez@amd.com 9811386Ssteve.reinhardt@amd.com proc->allocateMem(start, length); 9911308Santhony.gutierrez@amd.com 10011308Santhony.gutierrez@amd.com return start; 10111308Santhony.gutierrez@amd.com} 10211308Santhony.gutierrez@amd.com 10311308Santhony.gutierrez@amd.comvoid 10411308Santhony.gutierrez@amd.comShader::init() 10511308Santhony.gutierrez@amd.com{ 10611308Santhony.gutierrez@amd.com // grab the threadContext of the thread running on the CPU 10711308Santhony.gutierrez@amd.com assert(cpuPointer); 10811308Santhony.gutierrez@amd.com gpuTc = cpuPointer->getContext(0); 10911308Santhony.gutierrez@amd.com assert(gpuTc); 11011308Santhony.gutierrez@amd.com} 11111308Santhony.gutierrez@amd.com 11211308Santhony.gutierrez@amd.comShader::~Shader() 11311308Santhony.gutierrez@amd.com{ 11411308Santhony.gutierrez@amd.com for (int j = 0; j < n_cu; ++j) 11511308Santhony.gutierrez@amd.com delete cuList[j]; 11611308Santhony.gutierrez@amd.com} 11711308Santhony.gutierrez@amd.com 11811308Santhony.gutierrez@amd.comvoid 11911308Santhony.gutierrez@amd.comShader::updateThreadContext(int tid) { 12011308Santhony.gutierrez@amd.com // thread context of the thread which dispatched work 12111308Santhony.gutierrez@amd.com assert(cpuPointer); 12211308Santhony.gutierrez@amd.com gpuTc = cpuPointer->getContext(tid); 12311308Santhony.gutierrez@amd.com assert(gpuTc); 12411308Santhony.gutierrez@amd.com} 12511308Santhony.gutierrez@amd.com 12611308Santhony.gutierrez@amd.comvoid 12711308Santhony.gutierrez@amd.comShader::hostWakeUp(BaseCPU *cpu) { 12811308Santhony.gutierrez@amd.com if (cpuPointer == cpu) { 12911308Santhony.gutierrez@amd.com if (gpuTc->status() == ThreadContext::Suspended) 13011308Santhony.gutierrez@amd.com cpu->activateContext(gpuTc->threadId()); 13111308Santhony.gutierrez@amd.com } else { 13211308Santhony.gutierrez@amd.com //Make sure both dispatcher and shader are trying to 13311308Santhony.gutierrez@amd.com //wakeup same host. Hack here to enable kernel launch 13411308Santhony.gutierrez@amd.com //from multiple CPUs 13511308Santhony.gutierrez@amd.com panic("Dispatcher wants to wakeup a different host"); 13611308Santhony.gutierrez@amd.com } 13711308Santhony.gutierrez@amd.com} 13811308Santhony.gutierrez@amd.com 13911308Santhony.gutierrez@amd.comShader* 14011308Santhony.gutierrez@amd.comShaderParams::create() 14111308Santhony.gutierrez@amd.com{ 14211308Santhony.gutierrez@amd.com return new Shader(this); 14311308Santhony.gutierrez@amd.com} 14411308Santhony.gutierrez@amd.com 14511308Santhony.gutierrez@amd.comvoid 14611308Santhony.gutierrez@amd.comShader::exec() 14711308Santhony.gutierrez@amd.com{ 14811308Santhony.gutierrez@amd.com tick_cnt = curTick(); 14911308Santhony.gutierrez@amd.com box_tick_cnt = curTick() - start_tick_cnt; 15011308Santhony.gutierrez@amd.com 15111308Santhony.gutierrez@amd.com // apply any scheduled adds 15211308Santhony.gutierrez@amd.com for (int i = 0; i < sa_n; ++i) { 15311308Santhony.gutierrez@amd.com if (sa_when[i] <= tick_cnt) { 15411308Santhony.gutierrez@amd.com *sa_val[i] += sa_x[i]; 15511308Santhony.gutierrez@amd.com sa_val.erase(sa_val.begin() + i); 15611308Santhony.gutierrez@amd.com sa_x.erase(sa_x.begin() + i); 15711308Santhony.gutierrez@amd.com sa_when.erase(sa_when.begin() + i); 15811308Santhony.gutierrez@amd.com --sa_n; 15911308Santhony.gutierrez@amd.com --i; 16011308Santhony.gutierrez@amd.com } 16111308Santhony.gutierrez@amd.com } 16211308Santhony.gutierrez@amd.com 16311308Santhony.gutierrez@amd.com // clock all of the cu's 16411308Santhony.gutierrez@amd.com for (int i = 0; i < n_cu; ++i) 16511308Santhony.gutierrez@amd.com cuList[i]->exec(); 16611308Santhony.gutierrez@amd.com} 16711308Santhony.gutierrez@amd.com 16811308Santhony.gutierrez@amd.combool 16911308Santhony.gutierrez@amd.comShader::dispatch_workgroups(NDRange *ndr) 17011308Santhony.gutierrez@amd.com{ 17111308Santhony.gutierrez@amd.com bool scheduledSomething = false; 17211308Santhony.gutierrez@amd.com int cuCount = 0; 17311308Santhony.gutierrez@amd.com int curCu = nextSchedCu; 17411308Santhony.gutierrez@amd.com 17511308Santhony.gutierrez@amd.com while (cuCount < n_cu) { 17611308Santhony.gutierrez@amd.com //Every time we try a CU, update nextSchedCu 17711308Santhony.gutierrez@amd.com nextSchedCu = (nextSchedCu + 1) % n_cu; 17811308Santhony.gutierrez@amd.com 17911308Santhony.gutierrez@amd.com // dispatch workgroup iff the following two conditions are met: 18011308Santhony.gutierrez@amd.com // (a) wg_rem is true - there are unassigned workgroups in the grid 18111308Santhony.gutierrez@amd.com // (b) there are enough free slots in cu cuList[i] for this wg 18211308Santhony.gutierrez@amd.com if (ndr->wg_disp_rem && cuList[curCu]->ReadyWorkgroup(ndr)) { 18311308Santhony.gutierrez@amd.com scheduledSomething = true; 18411308Santhony.gutierrez@amd.com DPRINTF(GPUDisp, "Dispatching a workgroup to CU %d\n", curCu); 18511308Santhony.gutierrez@amd.com 18611308Santhony.gutierrez@amd.com // ticks() member function translates cycles to simulation ticks. 18711308Santhony.gutierrez@amd.com if (!tickEvent.scheduled()) { 18811308Santhony.gutierrez@amd.com schedule(tickEvent, curTick() + this->ticks(1)); 18911308Santhony.gutierrez@amd.com } 19011308Santhony.gutierrez@amd.com 19111308Santhony.gutierrez@amd.com cuList[curCu]->StartWorkgroup(ndr); 19211308Santhony.gutierrez@amd.com ndr->wgId[0]++; 19311308Santhony.gutierrez@amd.com ndr->globalWgId++; 19411308Santhony.gutierrez@amd.com if (ndr->wgId[0] * ndr->q.wgSize[0] >= ndr->q.gdSize[0]) { 19511308Santhony.gutierrez@amd.com ndr->wgId[0] = 0; 19611308Santhony.gutierrez@amd.com ndr->wgId[1]++; 19711308Santhony.gutierrez@amd.com 19811308Santhony.gutierrez@amd.com if (ndr->wgId[1] * ndr->q.wgSize[1] >= ndr->q.gdSize[1]) { 19911308Santhony.gutierrez@amd.com ndr->wgId[1] = 0; 20011308Santhony.gutierrez@amd.com ndr->wgId[2]++; 20111308Santhony.gutierrez@amd.com 20211308Santhony.gutierrez@amd.com if (ndr->wgId[2] * ndr->q.wgSize[2] >= ndr->q.gdSize[2]) { 20311308Santhony.gutierrez@amd.com ndr->wg_disp_rem = false; 20411308Santhony.gutierrez@amd.com break; 20511308Santhony.gutierrez@amd.com } 20611308Santhony.gutierrez@amd.com } 20711308Santhony.gutierrez@amd.com } 20811308Santhony.gutierrez@amd.com } 20911308Santhony.gutierrez@amd.com 21011308Santhony.gutierrez@amd.com ++cuCount; 21111308Santhony.gutierrez@amd.com curCu = nextSchedCu; 21211308Santhony.gutierrez@amd.com } 21311308Santhony.gutierrez@amd.com 21411308Santhony.gutierrez@amd.com return scheduledSomething; 21511308Santhony.gutierrez@amd.com} 21611308Santhony.gutierrez@amd.com 21711308Santhony.gutierrez@amd.comvoid 21811308Santhony.gutierrez@amd.comShader::handshake(GpuDispatcher *_dispatcher) 21911308Santhony.gutierrez@amd.com{ 22011308Santhony.gutierrez@amd.com dispatcher = _dispatcher; 22111308Santhony.gutierrez@amd.com} 22211308Santhony.gutierrez@amd.com 22311308Santhony.gutierrez@amd.comvoid 22411308Santhony.gutierrez@amd.comShader::doFunctionalAccess(RequestPtr req, MemCmd cmd, void *data, 22511308Santhony.gutierrez@amd.com bool suppress_func_errors, int cu_id) 22611308Santhony.gutierrez@amd.com{ 22711308Santhony.gutierrez@amd.com unsigned block_size = RubySystem::getBlockSizeBytes(); 22811308Santhony.gutierrez@amd.com unsigned size = req->getSize(); 22911308Santhony.gutierrez@amd.com 23011308Santhony.gutierrez@amd.com Addr tmp_addr; 23111308Santhony.gutierrez@amd.com BaseTLB::Mode trans_mode; 23211308Santhony.gutierrez@amd.com 23311308Santhony.gutierrez@amd.com if (cmd == MemCmd::ReadReq) { 23411308Santhony.gutierrez@amd.com trans_mode = BaseTLB::Read; 23511308Santhony.gutierrez@amd.com } else if (cmd == MemCmd::WriteReq) { 23611308Santhony.gutierrez@amd.com trans_mode = BaseTLB::Write; 23711308Santhony.gutierrez@amd.com } else { 23811308Santhony.gutierrez@amd.com fatal("unexcepted MemCmd\n"); 23911308Santhony.gutierrez@amd.com } 24011308Santhony.gutierrez@amd.com 24111308Santhony.gutierrez@amd.com tmp_addr = req->getVaddr(); 24211308Santhony.gutierrez@amd.com Addr split_addr = roundDown(tmp_addr + size - 1, block_size); 24311308Santhony.gutierrez@amd.com 24411308Santhony.gutierrez@amd.com assert(split_addr <= tmp_addr || split_addr - tmp_addr < block_size); 24511308Santhony.gutierrez@amd.com 24611308Santhony.gutierrez@amd.com // Misaligned access 24711308Santhony.gutierrez@amd.com if (split_addr > tmp_addr) { 24811308Santhony.gutierrez@amd.com RequestPtr req1, req2; 24911308Santhony.gutierrez@amd.com req->splitOnVaddr(split_addr, req1, req2); 25011308Santhony.gutierrez@amd.com 25111308Santhony.gutierrez@amd.com 25211308Santhony.gutierrez@amd.com PacketPtr pkt1 = new Packet(req2, cmd); 25311308Santhony.gutierrez@amd.com PacketPtr pkt2 = new Packet(req1, cmd); 25411308Santhony.gutierrez@amd.com 25511308Santhony.gutierrez@amd.com functionalTLBAccess(pkt1, cu_id, trans_mode); 25611308Santhony.gutierrez@amd.com functionalTLBAccess(pkt2, cu_id, trans_mode); 25711308Santhony.gutierrez@amd.com 25811308Santhony.gutierrez@amd.com PacketPtr new_pkt1 = new Packet(pkt1->req, cmd); 25911308Santhony.gutierrez@amd.com PacketPtr new_pkt2 = new Packet(pkt2->req, cmd); 26011308Santhony.gutierrez@amd.com 26111308Santhony.gutierrez@amd.com new_pkt1->dataStatic(data); 26211308Santhony.gutierrez@amd.com new_pkt2->dataStatic((uint8_t*)data + req1->getSize()); 26311308Santhony.gutierrez@amd.com 26411308Santhony.gutierrez@amd.com if (suppress_func_errors) { 26511308Santhony.gutierrez@amd.com new_pkt1->setSuppressFuncError(); 26611308Santhony.gutierrez@amd.com new_pkt2->setSuppressFuncError(); 26711308Santhony.gutierrez@amd.com } 26811308Santhony.gutierrez@amd.com 26911308Santhony.gutierrez@amd.com // fixme: this should be cuList[cu_id] if cu_id != n_cu 27011308Santhony.gutierrez@amd.com // The latter requires a memPort in the dispatcher 27111308Santhony.gutierrez@amd.com cuList[0]->memPort[0]->sendFunctional(new_pkt1); 27211308Santhony.gutierrez@amd.com cuList[0]->memPort[0]->sendFunctional(new_pkt2); 27311308Santhony.gutierrez@amd.com 27411308Santhony.gutierrez@amd.com delete new_pkt1; 27511308Santhony.gutierrez@amd.com delete new_pkt2; 27611308Santhony.gutierrez@amd.com delete pkt1; 27711308Santhony.gutierrez@amd.com delete pkt2; 27811308Santhony.gutierrez@amd.com } else { 27911308Santhony.gutierrez@amd.com PacketPtr pkt = new Packet(req, cmd); 28011308Santhony.gutierrez@amd.com functionalTLBAccess(pkt, cu_id, trans_mode); 28111308Santhony.gutierrez@amd.com PacketPtr new_pkt = new Packet(pkt->req, cmd); 28211308Santhony.gutierrez@amd.com new_pkt->dataStatic(data); 28311308Santhony.gutierrez@amd.com 28411308Santhony.gutierrez@amd.com if (suppress_func_errors) { 28511308Santhony.gutierrez@amd.com new_pkt->setSuppressFuncError(); 28611308Santhony.gutierrez@amd.com }; 28711308Santhony.gutierrez@amd.com 28811308Santhony.gutierrez@amd.com // fixme: this should be cuList[cu_id] if cu_id != n_cu 28911308Santhony.gutierrez@amd.com // The latter requires a memPort in the dispatcher 29011308Santhony.gutierrez@amd.com cuList[0]->memPort[0]->sendFunctional(new_pkt); 29111308Santhony.gutierrez@amd.com 29211308Santhony.gutierrez@amd.com delete new_pkt; 29311308Santhony.gutierrez@amd.com delete pkt; 29411308Santhony.gutierrez@amd.com } 29511308Santhony.gutierrez@amd.com} 29611308Santhony.gutierrez@amd.com 29711308Santhony.gutierrez@amd.combool 29811308Santhony.gutierrez@amd.comShader::busy() 29911308Santhony.gutierrez@amd.com{ 30011308Santhony.gutierrez@amd.com for (int i_cu = 0; i_cu < n_cu; ++i_cu) { 30111308Santhony.gutierrez@amd.com if (!cuList[i_cu]->isDone()) { 30211308Santhony.gutierrez@amd.com return true; 30311308Santhony.gutierrez@amd.com } 30411308Santhony.gutierrez@amd.com } 30511308Santhony.gutierrez@amd.com 30611308Santhony.gutierrez@amd.com return false; 30711308Santhony.gutierrez@amd.com} 30811308Santhony.gutierrez@amd.com 30911308Santhony.gutierrez@amd.comvoid 31011308Santhony.gutierrez@amd.comShader::ScheduleAdd(uint32_t *val,Tick when,int x) 31111308Santhony.gutierrez@amd.com{ 31211308Santhony.gutierrez@amd.com sa_val.push_back(val); 31311308Santhony.gutierrez@amd.com sa_when.push_back(tick_cnt + when); 31411308Santhony.gutierrez@amd.com sa_x.push_back(x); 31511308Santhony.gutierrez@amd.com ++sa_n; 31611308Santhony.gutierrez@amd.com} 31711308Santhony.gutierrez@amd.com 31811308Santhony.gutierrez@amd.comShader::TickEvent::TickEvent(Shader *_shader) 31911308Santhony.gutierrez@amd.com : Event(CPU_Tick_Pri), shader(_shader) 32011308Santhony.gutierrez@amd.com{ 32111308Santhony.gutierrez@amd.com} 32211308Santhony.gutierrez@amd.com 32311308Santhony.gutierrez@amd.com 32411308Santhony.gutierrez@amd.comvoid 32511308Santhony.gutierrez@amd.comShader::TickEvent::process() 32611308Santhony.gutierrez@amd.com{ 32711308Santhony.gutierrez@amd.com if (shader->busy()) { 32811308Santhony.gutierrez@amd.com shader->exec(); 32911308Santhony.gutierrez@amd.com shader->schedule(this, curTick() + shader->ticks(1)); 33011308Santhony.gutierrez@amd.com } 33111308Santhony.gutierrez@amd.com} 33211308Santhony.gutierrez@amd.com 33311308Santhony.gutierrez@amd.comconst char* 33411308Santhony.gutierrez@amd.comShader::TickEvent::description() const 33511308Santhony.gutierrez@amd.com{ 33611308Santhony.gutierrez@amd.com return "Shader tick"; 33711308Santhony.gutierrez@amd.com} 33811308Santhony.gutierrez@amd.com 33911308Santhony.gutierrez@amd.comvoid 34011308Santhony.gutierrez@amd.comShader::AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, 34111308Santhony.gutierrez@amd.com MemCmd cmd, bool suppress_func_errors) 34211308Santhony.gutierrez@amd.com{ 34311308Santhony.gutierrez@amd.com uint8_t *data_buf = (uint8_t*)ptr; 34411308Santhony.gutierrez@amd.com 34511308Santhony.gutierrez@amd.com for (ChunkGenerator gen(address, size, RubySystem::getBlockSizeBytes()); 34611308Santhony.gutierrez@amd.com !gen.done(); gen.next()) { 34711308Santhony.gutierrez@amd.com Request *req = new Request(0, gen.addr(), gen.size(), 0, 34811308Santhony.gutierrez@amd.com cuList[0]->masterId(), 0, 0, 0); 34911308Santhony.gutierrez@amd.com 35011308Santhony.gutierrez@amd.com doFunctionalAccess(req, cmd, data_buf, suppress_func_errors, cu_id); 35111308Santhony.gutierrez@amd.com data_buf += gen.size(); 35211308Santhony.gutierrez@amd.com delete req; 35311308Santhony.gutierrez@amd.com } 35411308Santhony.gutierrez@amd.com} 35511308Santhony.gutierrez@amd.com 35611308Santhony.gutierrez@amd.comvoid 35711308Santhony.gutierrez@amd.comShader::ReadMem(uint64_t address, void *ptr, uint32_t size, int cu_id) 35811308Santhony.gutierrez@amd.com{ 35911308Santhony.gutierrez@amd.com AccessMem(address, ptr, size, cu_id, MemCmd::ReadReq, false); 36011308Santhony.gutierrez@amd.com} 36111308Santhony.gutierrez@amd.com 36211308Santhony.gutierrez@amd.comvoid 36311308Santhony.gutierrez@amd.comShader::ReadMem(uint64_t address, void *ptr, uint32_t size, int cu_id, 36411308Santhony.gutierrez@amd.com bool suppress_func_errors) 36511308Santhony.gutierrez@amd.com{ 36611308Santhony.gutierrez@amd.com AccessMem(address, ptr, size, cu_id, MemCmd::ReadReq, suppress_func_errors); 36711308Santhony.gutierrez@amd.com} 36811308Santhony.gutierrez@amd.com 36911308Santhony.gutierrez@amd.comvoid 37011308Santhony.gutierrez@amd.comShader::WriteMem(uint64_t address, void *ptr,uint32_t size, int cu_id) 37111308Santhony.gutierrez@amd.com{ 37211308Santhony.gutierrez@amd.com AccessMem(address, ptr, size, cu_id, MemCmd::WriteReq, false); 37311308Santhony.gutierrez@amd.com} 37411308Santhony.gutierrez@amd.com 37511308Santhony.gutierrez@amd.comvoid 37611308Santhony.gutierrez@amd.comShader::WriteMem(uint64_t address, void *ptr, uint32_t size, int cu_id, 37711308Santhony.gutierrez@amd.com bool suppress_func_errors) 37811308Santhony.gutierrez@amd.com{ 37911308Santhony.gutierrez@amd.com AccessMem(address, ptr, size, cu_id, MemCmd::WriteReq, 38011308Santhony.gutierrez@amd.com suppress_func_errors); 38111308Santhony.gutierrez@amd.com} 38211308Santhony.gutierrez@amd.com 38311308Santhony.gutierrez@amd.com/* 38411308Santhony.gutierrez@amd.com * Send a packet through the appropriate TLB functional port. 38511308Santhony.gutierrez@amd.com * If cu_id=n_cu, then this is the dispatcher's TLB. 38611308Santhony.gutierrez@amd.com * Otherwise it's the TLB of the cu_id compute unit. 38711308Santhony.gutierrez@amd.com */ 38811308Santhony.gutierrez@amd.comvoid 38911308Santhony.gutierrez@amd.comShader::functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode) 39011308Santhony.gutierrez@amd.com{ 39111308Santhony.gutierrez@amd.com // update senderState. Need to know the gpuTc and the TLB mode 39211308Santhony.gutierrez@amd.com pkt->senderState = 39311308Santhony.gutierrez@amd.com new TheISA::GpuTLB::TranslationState(mode, gpuTc, false); 39411308Santhony.gutierrez@amd.com 39511308Santhony.gutierrez@amd.com if (cu_id == n_cu) { 39611308Santhony.gutierrez@amd.com dispatcher->tlbPort->sendFunctional(pkt); 39711308Santhony.gutierrez@amd.com } else { 39811308Santhony.gutierrez@amd.com // even when the perLaneTLB flag is turned on 39911308Santhony.gutierrez@amd.com // it's ok tp send all accesses through lane 0 40011308Santhony.gutierrez@amd.com // since the lane # is not known here, 40111308Santhony.gutierrez@amd.com // This isn't important since these are functional accesses. 40211308Santhony.gutierrez@amd.com cuList[cu_id]->tlbPort[0]->sendFunctional(pkt); 40311308Santhony.gutierrez@amd.com } 40411308Santhony.gutierrez@amd.com 40511308Santhony.gutierrez@amd.com /* safe_cast the senderState */ 40611308Santhony.gutierrez@amd.com TheISA::GpuTLB::TranslationState *sender_state = 40711308Santhony.gutierrez@amd.com safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState); 40811308Santhony.gutierrez@amd.com 40911308Santhony.gutierrez@amd.com delete sender_state->tlbEntry; 41011308Santhony.gutierrez@amd.com delete pkt->senderState; 41111308Santhony.gutierrez@amd.com} 412