shader.cc (11435:0f1b46dde3fa) shader.cc (11698:d1ad31187fa5)
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#include "gpu-compute/shader.hh"
37
38#include <limits>
39
40#include "arch/x86/linux/linux.hh"
41#include "base/chunk_generator.hh"
42#include "debug/GPUDisp.hh"
43#include "debug/GPUMem.hh"
44#include "debug/HSAIL.hh"
45#include "gpu-compute/dispatcher.hh"
46#include "gpu-compute/gpu_static_inst.hh"
47#include "gpu-compute/qstruct.hh"
48#include "gpu-compute/wavefront.hh"
49#include "mem/packet.hh"
50#include "mem/ruby/system/RubySystem.hh"
51#include "sim/sim_exit.hh"
52
53Shader::Shader(const Params *p) : SimObject(p),
54 clock(p->clk_domain->clockPeriod()), cpuThread(nullptr), gpuTc(nullptr),
55 cpuPointer(p->cpu_pointer), tickEvent(this), timingSim(p->timing),
56 hsail_mode(SIMT), impl_kern_boundary_sync(p->impl_kern_boundary_sync),
57 separate_acquire_release(p->separate_acquire_release), coissue_return(1),
58 trace_vgpr_all(1), n_cu((p->CUs).size()), n_wf(p->n_wf),
59 globalMemSize(p->globalmem), nextSchedCu(0), sa_n(0), tick_cnt(0),
60 box_tick_cnt(0), start_tick_cnt(0)
61{
62
63 cuList.resize(n_cu);
64
65 for (int i = 0; i < n_cu; ++i) {
66 cuList[i] = p->CUs[i];
67 assert(i == cuList[i]->cu_id);
68 cuList[i]->shader = this;
69 }
70}
71
72Addr
73Shader::mmap(int length)
74{
75
76 Addr start;
77
78 // round up length to the next page
79 length = roundUp(length, TheISA::PageBytes);
80
81 Process *proc = gpuTc->getProcessPtr();
82
83 if (proc->mmapGrowsDown()) {
84 DPRINTF(HSAIL, "GROWS DOWN");
85 start = proc->mmap_end - length;
86 proc->mmap_end = start;
87 } else {
88 DPRINTF(HSAIL, "GROWS UP");
89 start = proc->mmap_end;
90 proc->mmap_end += length;
91
92 // assertion to make sure we don't overwrite the stack (it grows down)
93 assert(proc->mmap_end < proc->stack_base - proc->max_stack_size);
94 }
95
96 DPRINTF(HSAIL,"Shader::mmap start= %#x, %#x\n", start, length);
97
98 proc->allocateMem(start, length);
99
100 return start;
101}
102
103void
104Shader::init()
105{
106 // grab the threadContext of the thread running on the CPU
107 assert(cpuPointer);
108 gpuTc = cpuPointer->getContext(0);
109 assert(gpuTc);
110}
111
112Shader::~Shader()
113{
114 for (int j = 0; j < n_cu; ++j)
115 delete cuList[j];
116}
117
118void
119Shader::updateContext(int cid) {
120 // context of the thread which dispatched work
121 assert(cpuPointer);
122 gpuTc = cpuPointer->getContext(cid);
123 assert(gpuTc);
124}
125
126void
127Shader::hostWakeUp(BaseCPU *cpu) {
128 if (cpuPointer == cpu) {
129 if (gpuTc->status() == ThreadContext::Suspended)
130 cpu->activateContext(gpuTc->threadId());
131 } else {
132 //Make sure both dispatcher and shader are trying to
133 //wakeup same host. Hack here to enable kernel launch
134 //from multiple CPUs
135 panic("Dispatcher wants to wakeup a different host");
136 }
137}
138
139Shader*
140ShaderParams::create()
141{
142 return new Shader(this);
143}
144
145void
146Shader::exec()
147{
148 tick_cnt = curTick();
149 box_tick_cnt = curTick() - start_tick_cnt;
150
151 // apply any scheduled adds
152 for (int i = 0; i < sa_n; ++i) {
153 if (sa_when[i] <= tick_cnt) {
154 *sa_val[i] += sa_x[i];
155 sa_val.erase(sa_val.begin() + i);
156 sa_x.erase(sa_x.begin() + i);
157 sa_when.erase(sa_when.begin() + i);
158 --sa_n;
159 --i;
160 }
161 }
162
163 // clock all of the cu's
164 for (int i = 0; i < n_cu; ++i)
165 cuList[i]->exec();
166}
167
168bool
169Shader::dispatch_workgroups(NDRange *ndr)
170{
171 bool scheduledSomething = false;
172 int cuCount = 0;
173 int curCu = nextSchedCu;
174
175 while (cuCount < n_cu) {
176 //Every time we try a CU, update nextSchedCu
177 nextSchedCu = (nextSchedCu + 1) % n_cu;
178
179 // dispatch workgroup iff the following two conditions are met:
180 // (a) wg_rem is true - there are unassigned workgroups in the grid
181 // (b) there are enough free slots in cu cuList[i] for this wg
182 if (ndr->wg_disp_rem && cuList[curCu]->ReadyWorkgroup(ndr)) {
183 scheduledSomething = true;
184 DPRINTF(GPUDisp, "Dispatching a workgroup to CU %d\n", curCu);
185
186 // ticks() member function translates cycles to simulation ticks.
187 if (!tickEvent.scheduled()) {
188 schedule(tickEvent, curTick() + this->ticks(1));
189 }
190
191 cuList[curCu]->StartWorkgroup(ndr);
192 ndr->wgId[0]++;
193 ndr->globalWgId++;
194 if (ndr->wgId[0] * ndr->q.wgSize[0] >= ndr->q.gdSize[0]) {
195 ndr->wgId[0] = 0;
196 ndr->wgId[1]++;
197
198 if (ndr->wgId[1] * ndr->q.wgSize[1] >= ndr->q.gdSize[1]) {
199 ndr->wgId[1] = 0;
200 ndr->wgId[2]++;
201
202 if (ndr->wgId[2] * ndr->q.wgSize[2] >= ndr->q.gdSize[2]) {
203 ndr->wg_disp_rem = false;
204 break;
205 }
206 }
207 }
208 }
209
210 ++cuCount;
211 curCu = nextSchedCu;
212 }
213
214 return scheduledSomething;
215}
216
217void
218Shader::handshake(GpuDispatcher *_dispatcher)
219{
220 dispatcher = _dispatcher;
221}
222
223void
224Shader::doFunctionalAccess(RequestPtr req, MemCmd cmd, void *data,
225 bool suppress_func_errors, int cu_id)
226{
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#include "gpu-compute/shader.hh"
37
38#include <limits>
39
40#include "arch/x86/linux/linux.hh"
41#include "base/chunk_generator.hh"
42#include "debug/GPUDisp.hh"
43#include "debug/GPUMem.hh"
44#include "debug/HSAIL.hh"
45#include "gpu-compute/dispatcher.hh"
46#include "gpu-compute/gpu_static_inst.hh"
47#include "gpu-compute/qstruct.hh"
48#include "gpu-compute/wavefront.hh"
49#include "mem/packet.hh"
50#include "mem/ruby/system/RubySystem.hh"
51#include "sim/sim_exit.hh"
52
53Shader::Shader(const Params *p) : SimObject(p),
54 clock(p->clk_domain->clockPeriod()), cpuThread(nullptr), gpuTc(nullptr),
55 cpuPointer(p->cpu_pointer), tickEvent(this), timingSim(p->timing),
56 hsail_mode(SIMT), impl_kern_boundary_sync(p->impl_kern_boundary_sync),
57 separate_acquire_release(p->separate_acquire_release), coissue_return(1),
58 trace_vgpr_all(1), n_cu((p->CUs).size()), n_wf(p->n_wf),
59 globalMemSize(p->globalmem), nextSchedCu(0), sa_n(0), tick_cnt(0),
60 box_tick_cnt(0), start_tick_cnt(0)
61{
62
63 cuList.resize(n_cu);
64
65 for (int i = 0; i < n_cu; ++i) {
66 cuList[i] = p->CUs[i];
67 assert(i == cuList[i]->cu_id);
68 cuList[i]->shader = this;
69 }
70}
71
72Addr
73Shader::mmap(int length)
74{
75
76 Addr start;
77
78 // round up length to the next page
79 length = roundUp(length, TheISA::PageBytes);
80
81 Process *proc = gpuTc->getProcessPtr();
82
83 if (proc->mmapGrowsDown()) {
84 DPRINTF(HSAIL, "GROWS DOWN");
85 start = proc->mmap_end - length;
86 proc->mmap_end = start;
87 } else {
88 DPRINTF(HSAIL, "GROWS UP");
89 start = proc->mmap_end;
90 proc->mmap_end += length;
91
92 // assertion to make sure we don't overwrite the stack (it grows down)
93 assert(proc->mmap_end < proc->stack_base - proc->max_stack_size);
94 }
95
96 DPRINTF(HSAIL,"Shader::mmap start= %#x, %#x\n", start, length);
97
98 proc->allocateMem(start, length);
99
100 return start;
101}
102
103void
104Shader::init()
105{
106 // grab the threadContext of the thread running on the CPU
107 assert(cpuPointer);
108 gpuTc = cpuPointer->getContext(0);
109 assert(gpuTc);
110}
111
112Shader::~Shader()
113{
114 for (int j = 0; j < n_cu; ++j)
115 delete cuList[j];
116}
117
118void
119Shader::updateContext(int cid) {
120 // context of the thread which dispatched work
121 assert(cpuPointer);
122 gpuTc = cpuPointer->getContext(cid);
123 assert(gpuTc);
124}
125
126void
127Shader::hostWakeUp(BaseCPU *cpu) {
128 if (cpuPointer == cpu) {
129 if (gpuTc->status() == ThreadContext::Suspended)
130 cpu->activateContext(gpuTc->threadId());
131 } else {
132 //Make sure both dispatcher and shader are trying to
133 //wakeup same host. Hack here to enable kernel launch
134 //from multiple CPUs
135 panic("Dispatcher wants to wakeup a different host");
136 }
137}
138
139Shader*
140ShaderParams::create()
141{
142 return new Shader(this);
143}
144
145void
146Shader::exec()
147{
148 tick_cnt = curTick();
149 box_tick_cnt = curTick() - start_tick_cnt;
150
151 // apply any scheduled adds
152 for (int i = 0; i < sa_n; ++i) {
153 if (sa_when[i] <= tick_cnt) {
154 *sa_val[i] += sa_x[i];
155 sa_val.erase(sa_val.begin() + i);
156 sa_x.erase(sa_x.begin() + i);
157 sa_when.erase(sa_when.begin() + i);
158 --sa_n;
159 --i;
160 }
161 }
162
163 // clock all of the cu's
164 for (int i = 0; i < n_cu; ++i)
165 cuList[i]->exec();
166}
167
168bool
169Shader::dispatch_workgroups(NDRange *ndr)
170{
171 bool scheduledSomething = false;
172 int cuCount = 0;
173 int curCu = nextSchedCu;
174
175 while (cuCount < n_cu) {
176 //Every time we try a CU, update nextSchedCu
177 nextSchedCu = (nextSchedCu + 1) % n_cu;
178
179 // dispatch workgroup iff the following two conditions are met:
180 // (a) wg_rem is true - there are unassigned workgroups in the grid
181 // (b) there are enough free slots in cu cuList[i] for this wg
182 if (ndr->wg_disp_rem && cuList[curCu]->ReadyWorkgroup(ndr)) {
183 scheduledSomething = true;
184 DPRINTF(GPUDisp, "Dispatching a workgroup to CU %d\n", curCu);
185
186 // ticks() member function translates cycles to simulation ticks.
187 if (!tickEvent.scheduled()) {
188 schedule(tickEvent, curTick() + this->ticks(1));
189 }
190
191 cuList[curCu]->StartWorkgroup(ndr);
192 ndr->wgId[0]++;
193 ndr->globalWgId++;
194 if (ndr->wgId[0] * ndr->q.wgSize[0] >= ndr->q.gdSize[0]) {
195 ndr->wgId[0] = 0;
196 ndr->wgId[1]++;
197
198 if (ndr->wgId[1] * ndr->q.wgSize[1] >= ndr->q.gdSize[1]) {
199 ndr->wgId[1] = 0;
200 ndr->wgId[2]++;
201
202 if (ndr->wgId[2] * ndr->q.wgSize[2] >= ndr->q.gdSize[2]) {
203 ndr->wg_disp_rem = false;
204 break;
205 }
206 }
207 }
208 }
209
210 ++cuCount;
211 curCu = nextSchedCu;
212 }
213
214 return scheduledSomething;
215}
216
217void
218Shader::handshake(GpuDispatcher *_dispatcher)
219{
220 dispatcher = _dispatcher;
221}
222
223void
224Shader::doFunctionalAccess(RequestPtr req, MemCmd cmd, void *data,
225 bool suppress_func_errors, int cu_id)
226{
227 unsigned block_size = RubySystem::getBlockSizeBytes();
227 int block_size = cuList.at(cu_id)->cacheLineSize();
228 unsigned size = req->getSize();
229
230 Addr tmp_addr;
231 BaseTLB::Mode trans_mode;
232
233 if (cmd == MemCmd::ReadReq) {
234 trans_mode = BaseTLB::Read;
235 } else if (cmd == MemCmd::WriteReq) {
236 trans_mode = BaseTLB::Write;
237 } else {
238 fatal("unexcepted MemCmd\n");
239 }
240
241 tmp_addr = req->getVaddr();
242 Addr split_addr = roundDown(tmp_addr + size - 1, block_size);
243
244 assert(split_addr <= tmp_addr || split_addr - tmp_addr < block_size);
245
246 // Misaligned access
247 if (split_addr > tmp_addr) {
248 RequestPtr req1, req2;
249 req->splitOnVaddr(split_addr, req1, req2);
250
251
252 PacketPtr pkt1 = new Packet(req2, cmd);
253 PacketPtr pkt2 = new Packet(req1, cmd);
254
255 functionalTLBAccess(pkt1, cu_id, trans_mode);
256 functionalTLBAccess(pkt2, cu_id, trans_mode);
257
258 PacketPtr new_pkt1 = new Packet(pkt1->req, cmd);
259 PacketPtr new_pkt2 = new Packet(pkt2->req, cmd);
260
261 new_pkt1->dataStatic(data);
262 new_pkt2->dataStatic((uint8_t*)data + req1->getSize());
263
264 if (suppress_func_errors) {
265 new_pkt1->setSuppressFuncError();
266 new_pkt2->setSuppressFuncError();
267 }
268
269 // fixme: this should be cuList[cu_id] if cu_id != n_cu
270 // The latter requires a memPort in the dispatcher
271 cuList[0]->memPort[0]->sendFunctional(new_pkt1);
272 cuList[0]->memPort[0]->sendFunctional(new_pkt2);
273
274 delete new_pkt1;
275 delete new_pkt2;
276 delete pkt1;
277 delete pkt2;
278 } else {
279 PacketPtr pkt = new Packet(req, cmd);
280 functionalTLBAccess(pkt, cu_id, trans_mode);
281 PacketPtr new_pkt = new Packet(pkt->req, cmd);
282 new_pkt->dataStatic(data);
283
284 if (suppress_func_errors) {
285 new_pkt->setSuppressFuncError();
286 };
287
288 // fixme: this should be cuList[cu_id] if cu_id != n_cu
289 // The latter requires a memPort in the dispatcher
290 cuList[0]->memPort[0]->sendFunctional(new_pkt);
291
292 delete new_pkt;
293 delete pkt;
294 }
295}
296
297bool
298Shader::busy()
299{
300 for (int i_cu = 0; i_cu < n_cu; ++i_cu) {
301 if (!cuList[i_cu]->isDone()) {
302 return true;
303 }
304 }
305
306 return false;
307}
308
309void
310Shader::ScheduleAdd(uint32_t *val,Tick when,int x)
311{
312 sa_val.push_back(val);
313 sa_when.push_back(tick_cnt + when);
314 sa_x.push_back(x);
315 ++sa_n;
316}
317
318Shader::TickEvent::TickEvent(Shader *_shader)
319 : Event(CPU_Tick_Pri), shader(_shader)
320{
321}
322
323
324void
325Shader::TickEvent::process()
326{
327 if (shader->busy()) {
328 shader->exec();
329 shader->schedule(this, curTick() + shader->ticks(1));
330 }
331}
332
333const char*
334Shader::TickEvent::description() const
335{
336 return "Shader tick";
337}
338
339void
340Shader::AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
341 MemCmd cmd, bool suppress_func_errors)
342{
343 uint8_t *data_buf = (uint8_t*)ptr;
344
228 unsigned size = req->getSize();
229
230 Addr tmp_addr;
231 BaseTLB::Mode trans_mode;
232
233 if (cmd == MemCmd::ReadReq) {
234 trans_mode = BaseTLB::Read;
235 } else if (cmd == MemCmd::WriteReq) {
236 trans_mode = BaseTLB::Write;
237 } else {
238 fatal("unexcepted MemCmd\n");
239 }
240
241 tmp_addr = req->getVaddr();
242 Addr split_addr = roundDown(tmp_addr + size - 1, block_size);
243
244 assert(split_addr <= tmp_addr || split_addr - tmp_addr < block_size);
245
246 // Misaligned access
247 if (split_addr > tmp_addr) {
248 RequestPtr req1, req2;
249 req->splitOnVaddr(split_addr, req1, req2);
250
251
252 PacketPtr pkt1 = new Packet(req2, cmd);
253 PacketPtr pkt2 = new Packet(req1, cmd);
254
255 functionalTLBAccess(pkt1, cu_id, trans_mode);
256 functionalTLBAccess(pkt2, cu_id, trans_mode);
257
258 PacketPtr new_pkt1 = new Packet(pkt1->req, cmd);
259 PacketPtr new_pkt2 = new Packet(pkt2->req, cmd);
260
261 new_pkt1->dataStatic(data);
262 new_pkt2->dataStatic((uint8_t*)data + req1->getSize());
263
264 if (suppress_func_errors) {
265 new_pkt1->setSuppressFuncError();
266 new_pkt2->setSuppressFuncError();
267 }
268
269 // fixme: this should be cuList[cu_id] if cu_id != n_cu
270 // The latter requires a memPort in the dispatcher
271 cuList[0]->memPort[0]->sendFunctional(new_pkt1);
272 cuList[0]->memPort[0]->sendFunctional(new_pkt2);
273
274 delete new_pkt1;
275 delete new_pkt2;
276 delete pkt1;
277 delete pkt2;
278 } else {
279 PacketPtr pkt = new Packet(req, cmd);
280 functionalTLBAccess(pkt, cu_id, trans_mode);
281 PacketPtr new_pkt = new Packet(pkt->req, cmd);
282 new_pkt->dataStatic(data);
283
284 if (suppress_func_errors) {
285 new_pkt->setSuppressFuncError();
286 };
287
288 // fixme: this should be cuList[cu_id] if cu_id != n_cu
289 // The latter requires a memPort in the dispatcher
290 cuList[0]->memPort[0]->sendFunctional(new_pkt);
291
292 delete new_pkt;
293 delete pkt;
294 }
295}
296
297bool
298Shader::busy()
299{
300 for (int i_cu = 0; i_cu < n_cu; ++i_cu) {
301 if (!cuList[i_cu]->isDone()) {
302 return true;
303 }
304 }
305
306 return false;
307}
308
309void
310Shader::ScheduleAdd(uint32_t *val,Tick when,int x)
311{
312 sa_val.push_back(val);
313 sa_when.push_back(tick_cnt + when);
314 sa_x.push_back(x);
315 ++sa_n;
316}
317
318Shader::TickEvent::TickEvent(Shader *_shader)
319 : Event(CPU_Tick_Pri), shader(_shader)
320{
321}
322
323
324void
325Shader::TickEvent::process()
326{
327 if (shader->busy()) {
328 shader->exec();
329 shader->schedule(this, curTick() + shader->ticks(1));
330 }
331}
332
333const char*
334Shader::TickEvent::description() const
335{
336 return "Shader tick";
337}
338
339void
340Shader::AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
341 MemCmd cmd, bool suppress_func_errors)
342{
343 uint8_t *data_buf = (uint8_t*)ptr;
344
345 for (ChunkGenerator gen(address, size, RubySystem::getBlockSizeBytes());
345 for (ChunkGenerator gen(address, size, cuList.at(cu_id)->cacheLineSize());
346 !gen.done(); gen.next()) {
347 Request *req = new Request(0, gen.addr(), gen.size(), 0,
348 cuList[0]->masterId(), 0, 0, 0);
349
350 doFunctionalAccess(req, cmd, data_buf, suppress_func_errors, cu_id);
351 data_buf += gen.size();
352 delete req;
353 }
354}
355
356void
357Shader::ReadMem(uint64_t address, void *ptr, uint32_t size, int cu_id)
358{
359 AccessMem(address, ptr, size, cu_id, MemCmd::ReadReq, false);
360}
361
362void
363Shader::ReadMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
364 bool suppress_func_errors)
365{
366 AccessMem(address, ptr, size, cu_id, MemCmd::ReadReq, suppress_func_errors);
367}
368
369void
370Shader::WriteMem(uint64_t address, void *ptr,uint32_t size, int cu_id)
371{
372 AccessMem(address, ptr, size, cu_id, MemCmd::WriteReq, false);
373}
374
375void
376Shader::WriteMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
377 bool suppress_func_errors)
378{
379 AccessMem(address, ptr, size, cu_id, MemCmd::WriteReq,
380 suppress_func_errors);
381}
382
383/*
384 * Send a packet through the appropriate TLB functional port.
385 * If cu_id=n_cu, then this is the dispatcher's TLB.
386 * Otherwise it's the TLB of the cu_id compute unit.
387 */
388void
389Shader::functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode)
390{
391 // update senderState. Need to know the gpuTc and the TLB mode
392 pkt->senderState =
393 new TheISA::GpuTLB::TranslationState(mode, gpuTc, false);
394
395 if (cu_id == n_cu) {
396 dispatcher->tlbPort->sendFunctional(pkt);
397 } else {
398 // even when the perLaneTLB flag is turned on
399 // it's ok tp send all accesses through lane 0
400 // since the lane # is not known here,
401 // This isn't important since these are functional accesses.
402 cuList[cu_id]->tlbPort[0]->sendFunctional(pkt);
403 }
404
405 /* safe_cast the senderState */
406 TheISA::GpuTLB::TranslationState *sender_state =
407 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
408
409 delete sender_state->tlbEntry;
410 delete pkt->senderState;
411}
346 !gen.done(); gen.next()) {
347 Request *req = new Request(0, gen.addr(), gen.size(), 0,
348 cuList[0]->masterId(), 0, 0, 0);
349
350 doFunctionalAccess(req, cmd, data_buf, suppress_func_errors, cu_id);
351 data_buf += gen.size();
352 delete req;
353 }
354}
355
356void
357Shader::ReadMem(uint64_t address, void *ptr, uint32_t size, int cu_id)
358{
359 AccessMem(address, ptr, size, cu_id, MemCmd::ReadReq, false);
360}
361
362void
363Shader::ReadMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
364 bool suppress_func_errors)
365{
366 AccessMem(address, ptr, size, cu_id, MemCmd::ReadReq, suppress_func_errors);
367}
368
369void
370Shader::WriteMem(uint64_t address, void *ptr,uint32_t size, int cu_id)
371{
372 AccessMem(address, ptr, size, cu_id, MemCmd::WriteReq, false);
373}
374
375void
376Shader::WriteMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
377 bool suppress_func_errors)
378{
379 AccessMem(address, ptr, size, cu_id, MemCmd::WriteReq,
380 suppress_func_errors);
381}
382
383/*
384 * Send a packet through the appropriate TLB functional port.
385 * If cu_id=n_cu, then this is the dispatcher's TLB.
386 * Otherwise it's the TLB of the cu_id compute unit.
387 */
388void
389Shader::functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode)
390{
391 // update senderState. Need to know the gpuTc and the TLB mode
392 pkt->senderState =
393 new TheISA::GpuTLB::TranslationState(mode, gpuTc, false);
394
395 if (cu_id == n_cu) {
396 dispatcher->tlbPort->sendFunctional(pkt);
397 } else {
398 // even when the perLaneTLB flag is turned on
399 // it's ok tp send all accesses through lane 0
400 // since the lane # is not known here,
401 // This isn't important since these are functional accesses.
402 cuList[cu_id]->tlbPort[0]->sendFunctional(pkt);
403 }
404
405 /* safe_cast the senderState */
406 TheISA::GpuTLB::TranslationState *sender_state =
407 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
408
409 delete sender_state->tlbEntry;
410 delete pkt->senderState;
411}