shader.cc (12748:ae5ce8e42de7) shader.cc (12749:223c83ed9979)
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Steve Reinhardt
34 */
35
36#include "gpu-compute/shader.hh"
37
38#include <limits>
39
40#include "arch/x86/linux/linux.hh"
41#include "base/chunk_generator.hh"
42#include "debug/GPUDisp.hh"
43#include "debug/GPUMem.hh"
44#include "debug/HSAIL.hh"
45#include "gpu-compute/dispatcher.hh"
46#include "gpu-compute/gpu_static_inst.hh"
47#include "gpu-compute/qstruct.hh"
48#include "gpu-compute/wavefront.hh"
49#include "mem/packet.hh"
50#include "mem/ruby/system/RubySystem.hh"
51#include "sim/sim_exit.hh"
52
53Shader::Shader(const Params *p)
54 : ClockedObject(p), clock(p->clk_domain->clockPeriod()),
55 cpuThread(nullptr), gpuTc(nullptr), cpuPointer(p->cpu_pointer),
56 tickEvent([this]{ processTick(); }, "Shader tick",
57 false, Event::CPU_Tick_Pri),
58 timingSim(p->timing), hsail_mode(SIMT),
59 impl_kern_boundary_sync(p->impl_kern_boundary_sync),
60 separate_acquire_release(p->separate_acquire_release), coissue_return(1),
61 trace_vgpr_all(1), n_cu((p->CUs).size()), n_wf(p->n_wf),
62 globalMemSize(p->globalmem), nextSchedCu(0), sa_n(0), tick_cnt(0),
63 box_tick_cnt(0), start_tick_cnt(0)
64{
65
66 cuList.resize(n_cu);
67
68 for (int i = 0; i < n_cu; ++i) {
69 cuList[i] = p->CUs[i];
70 assert(i == cuList[i]->cu_id);
71 cuList[i]->shader = this;
72 }
73}
74
75Addr
76Shader::mmap(int length)
77{
78
79 Addr start;
80
81 // round up length to the next page
82 length = roundUp(length, TheISA::PageBytes);
83
84 Process *proc = gpuTc->getProcessPtr();
85 auto mem_state = proc->memState;
86
87 if (proc->mmapGrowsDown()) {
88 DPRINTF(HSAIL, "GROWS DOWN");
89 start = mem_state->getMmapEnd() - length;
90 mem_state->setMmapEnd(start);
91 } else {
92 DPRINTF(HSAIL, "GROWS UP");
93 start = mem_state->getMmapEnd();
94 mem_state->setMmapEnd(start + length);
95
96 // assertion to make sure we don't overwrite the stack (it grows down)
97 assert(mem_state->getStackBase() - mem_state->getMaxStackSize() >
98 mem_state->getMmapEnd());
99 }
100
101 DPRINTF(HSAIL,"Shader::mmap start= %#x, %#x\n", start, length);
102
103 proc->allocateMem(start, length);
104
105 return start;
106}
107
108void
109Shader::init()
110{
111 // grab the threadContext of the thread running on the CPU
112 assert(cpuPointer);
113 gpuTc = cpuPointer->getContext(0);
114 assert(gpuTc);
115}
116
117Shader::~Shader()
118{
119 for (int j = 0; j < n_cu; ++j)
120 delete cuList[j];
121}
122
123void
124Shader::updateContext(int cid) {
125 // context of the thread which dispatched work
126 assert(cpuPointer);
127 gpuTc = cpuPointer->getContext(cid);
128 assert(gpuTc);
129}
130
131void
132Shader::hostWakeUp(BaseCPU *cpu) {
133 if (cpuPointer == cpu) {
134 if (gpuTc->status() == ThreadContext::Suspended)
135 cpu->activateContext(gpuTc->threadId());
136 } else {
137 //Make sure both dispatcher and shader are trying to
138 //wakeup same host. Hack here to enable kernel launch
139 //from multiple CPUs
140 panic("Dispatcher wants to wakeup a different host");
141 }
142}
143
144Shader*
145ShaderParams::create()
146{
147 return new Shader(this);
148}
149
150void
151Shader::exec()
152{
153 tick_cnt = curTick();
154 box_tick_cnt = curTick() - start_tick_cnt;
155
156 // apply any scheduled adds
157 for (int i = 0; i < sa_n; ++i) {
158 if (sa_when[i] <= tick_cnt) {
159 *sa_val[i] += sa_x[i];
160 sa_val.erase(sa_val.begin() + i);
161 sa_x.erase(sa_x.begin() + i);
162 sa_when.erase(sa_when.begin() + i);
163 --sa_n;
164 --i;
165 }
166 }
167
168 // clock all of the cu's
169 for (int i = 0; i < n_cu; ++i)
170 cuList[i]->exec();
171}
172
173bool
174Shader::dispatch_workgroups(NDRange *ndr)
175{
176 bool scheduledSomething = false;
177 int cuCount = 0;
178 int curCu = nextSchedCu;
179
180 while (cuCount < n_cu) {
181 //Every time we try a CU, update nextSchedCu
182 nextSchedCu = (nextSchedCu + 1) % n_cu;
183
184 // dispatch workgroup iff the following two conditions are met:
185 // (a) wg_rem is true - there are unassigned workgroups in the grid
186 // (b) there are enough free slots in cu cuList[i] for this wg
187 if (ndr->wg_disp_rem && cuList[curCu]->ReadyWorkgroup(ndr)) {
188 scheduledSomething = true;
189 DPRINTF(GPUDisp, "Dispatching a workgroup to CU %d\n", curCu);
190
191 // ticks() member function translates cycles to simulation ticks.
192 if (!tickEvent.scheduled()) {
193 schedule(tickEvent, curTick() + this->ticks(1));
194 }
195
196 cuList[curCu]->StartWorkgroup(ndr);
197 ndr->wgId[0]++;
198 ndr->globalWgId++;
199 if (ndr->wgId[0] * ndr->q.wgSize[0] >= ndr->q.gdSize[0]) {
200 ndr->wgId[0] = 0;
201 ndr->wgId[1]++;
202
203 if (ndr->wgId[1] * ndr->q.wgSize[1] >= ndr->q.gdSize[1]) {
204 ndr->wgId[1] = 0;
205 ndr->wgId[2]++;
206
207 if (ndr->wgId[2] * ndr->q.wgSize[2] >= ndr->q.gdSize[2]) {
208 ndr->wg_disp_rem = false;
209 break;
210 }
211 }
212 }
213 }
214
215 ++cuCount;
216 curCu = nextSchedCu;
217 }
218
219 return scheduledSomething;
220}
221
222void
223Shader::handshake(GpuDispatcher *_dispatcher)
224{
225 dispatcher = _dispatcher;
226}
227
228void
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Steve Reinhardt
34 */
35
36#include "gpu-compute/shader.hh"
37
38#include <limits>
39
40#include "arch/x86/linux/linux.hh"
41#include "base/chunk_generator.hh"
42#include "debug/GPUDisp.hh"
43#include "debug/GPUMem.hh"
44#include "debug/HSAIL.hh"
45#include "gpu-compute/dispatcher.hh"
46#include "gpu-compute/gpu_static_inst.hh"
47#include "gpu-compute/qstruct.hh"
48#include "gpu-compute/wavefront.hh"
49#include "mem/packet.hh"
50#include "mem/ruby/system/RubySystem.hh"
51#include "sim/sim_exit.hh"
52
53Shader::Shader(const Params *p)
54 : ClockedObject(p), clock(p->clk_domain->clockPeriod()),
55 cpuThread(nullptr), gpuTc(nullptr), cpuPointer(p->cpu_pointer),
56 tickEvent([this]{ processTick(); }, "Shader tick",
57 false, Event::CPU_Tick_Pri),
58 timingSim(p->timing), hsail_mode(SIMT),
59 impl_kern_boundary_sync(p->impl_kern_boundary_sync),
60 separate_acquire_release(p->separate_acquire_release), coissue_return(1),
61 trace_vgpr_all(1), n_cu((p->CUs).size()), n_wf(p->n_wf),
62 globalMemSize(p->globalmem), nextSchedCu(0), sa_n(0), tick_cnt(0),
63 box_tick_cnt(0), start_tick_cnt(0)
64{
65
66 cuList.resize(n_cu);
67
68 for (int i = 0; i < n_cu; ++i) {
69 cuList[i] = p->CUs[i];
70 assert(i == cuList[i]->cu_id);
71 cuList[i]->shader = this;
72 }
73}
74
75Addr
76Shader::mmap(int length)
77{
78
79 Addr start;
80
81 // round up length to the next page
82 length = roundUp(length, TheISA::PageBytes);
83
84 Process *proc = gpuTc->getProcessPtr();
85 auto mem_state = proc->memState;
86
87 if (proc->mmapGrowsDown()) {
88 DPRINTF(HSAIL, "GROWS DOWN");
89 start = mem_state->getMmapEnd() - length;
90 mem_state->setMmapEnd(start);
91 } else {
92 DPRINTF(HSAIL, "GROWS UP");
93 start = mem_state->getMmapEnd();
94 mem_state->setMmapEnd(start + length);
95
96 // assertion to make sure we don't overwrite the stack (it grows down)
97 assert(mem_state->getStackBase() - mem_state->getMaxStackSize() >
98 mem_state->getMmapEnd());
99 }
100
101 DPRINTF(HSAIL,"Shader::mmap start= %#x, %#x\n", start, length);
102
103 proc->allocateMem(start, length);
104
105 return start;
106}
107
108void
109Shader::init()
110{
111 // grab the threadContext of the thread running on the CPU
112 assert(cpuPointer);
113 gpuTc = cpuPointer->getContext(0);
114 assert(gpuTc);
115}
116
117Shader::~Shader()
118{
119 for (int j = 0; j < n_cu; ++j)
120 delete cuList[j];
121}
122
123void
124Shader::updateContext(int cid) {
125 // context of the thread which dispatched work
126 assert(cpuPointer);
127 gpuTc = cpuPointer->getContext(cid);
128 assert(gpuTc);
129}
130
131void
132Shader::hostWakeUp(BaseCPU *cpu) {
133 if (cpuPointer == cpu) {
134 if (gpuTc->status() == ThreadContext::Suspended)
135 cpu->activateContext(gpuTc->threadId());
136 } else {
137 //Make sure both dispatcher and shader are trying to
138 //wakeup same host. Hack here to enable kernel launch
139 //from multiple CPUs
140 panic("Dispatcher wants to wakeup a different host");
141 }
142}
143
144Shader*
145ShaderParams::create()
146{
147 return new Shader(this);
148}
149
150void
151Shader::exec()
152{
153 tick_cnt = curTick();
154 box_tick_cnt = curTick() - start_tick_cnt;
155
156 // apply any scheduled adds
157 for (int i = 0; i < sa_n; ++i) {
158 if (sa_when[i] <= tick_cnt) {
159 *sa_val[i] += sa_x[i];
160 sa_val.erase(sa_val.begin() + i);
161 sa_x.erase(sa_x.begin() + i);
162 sa_when.erase(sa_when.begin() + i);
163 --sa_n;
164 --i;
165 }
166 }
167
168 // clock all of the cu's
169 for (int i = 0; i < n_cu; ++i)
170 cuList[i]->exec();
171}
172
173bool
174Shader::dispatch_workgroups(NDRange *ndr)
175{
176 bool scheduledSomething = false;
177 int cuCount = 0;
178 int curCu = nextSchedCu;
179
180 while (cuCount < n_cu) {
181 //Every time we try a CU, update nextSchedCu
182 nextSchedCu = (nextSchedCu + 1) % n_cu;
183
184 // dispatch workgroup iff the following two conditions are met:
185 // (a) wg_rem is true - there are unassigned workgroups in the grid
186 // (b) there are enough free slots in cu cuList[i] for this wg
187 if (ndr->wg_disp_rem && cuList[curCu]->ReadyWorkgroup(ndr)) {
188 scheduledSomething = true;
189 DPRINTF(GPUDisp, "Dispatching a workgroup to CU %d\n", curCu);
190
191 // ticks() member function translates cycles to simulation ticks.
192 if (!tickEvent.scheduled()) {
193 schedule(tickEvent, curTick() + this->ticks(1));
194 }
195
196 cuList[curCu]->StartWorkgroup(ndr);
197 ndr->wgId[0]++;
198 ndr->globalWgId++;
199 if (ndr->wgId[0] * ndr->q.wgSize[0] >= ndr->q.gdSize[0]) {
200 ndr->wgId[0] = 0;
201 ndr->wgId[1]++;
202
203 if (ndr->wgId[1] * ndr->q.wgSize[1] >= ndr->q.gdSize[1]) {
204 ndr->wgId[1] = 0;
205 ndr->wgId[2]++;
206
207 if (ndr->wgId[2] * ndr->q.wgSize[2] >= ndr->q.gdSize[2]) {
208 ndr->wg_disp_rem = false;
209 break;
210 }
211 }
212 }
213 }
214
215 ++cuCount;
216 curCu = nextSchedCu;
217 }
218
219 return scheduledSomething;
220}
221
222void
223Shader::handshake(GpuDispatcher *_dispatcher)
224{
225 dispatcher = _dispatcher;
226}
227
228void
229Shader::doFunctionalAccess(RequestPtr req, MemCmd cmd, void *data,
229Shader::doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
230 bool suppress_func_errors, int cu_id)
231{
232 int block_size = cuList.at(cu_id)->cacheLineSize();
233 unsigned size = req->getSize();
234
235 Addr tmp_addr;
236 BaseTLB::Mode trans_mode;
237
238 if (cmd == MemCmd::ReadReq) {
239 trans_mode = BaseTLB::Read;
240 } else if (cmd == MemCmd::WriteReq) {
241 trans_mode = BaseTLB::Write;
242 } else {
243 fatal("unexcepted MemCmd\n");
244 }
245
246 tmp_addr = req->getVaddr();
247 Addr split_addr = roundDown(tmp_addr + size - 1, block_size);
248
249 assert(split_addr <= tmp_addr || split_addr - tmp_addr < block_size);
250
251 // Misaligned access
252 if (split_addr > tmp_addr) {
253 RequestPtr req1, req2;
254 req->splitOnVaddr(split_addr, req1, req2);
255
256
257 PacketPtr pkt1 = new Packet(req2, cmd);
258 PacketPtr pkt2 = new Packet(req1, cmd);
259
260 functionalTLBAccess(pkt1, cu_id, trans_mode);
261 functionalTLBAccess(pkt2, cu_id, trans_mode);
262
263 PacketPtr new_pkt1 = new Packet(pkt1->req, cmd);
264 PacketPtr new_pkt2 = new Packet(pkt2->req, cmd);
265
266 new_pkt1->dataStatic(data);
267 new_pkt2->dataStatic((uint8_t*)data + req1->getSize());
268
269 if (suppress_func_errors) {
270 new_pkt1->setSuppressFuncError();
271 new_pkt2->setSuppressFuncError();
272 }
273
274 // fixme: this should be cuList[cu_id] if cu_id != n_cu
275 // The latter requires a memPort in the dispatcher
276 cuList[0]->memPort[0]->sendFunctional(new_pkt1);
277 cuList[0]->memPort[0]->sendFunctional(new_pkt2);
278
279 delete new_pkt1;
280 delete new_pkt2;
281 delete pkt1;
282 delete pkt2;
283 } else {
284 PacketPtr pkt = new Packet(req, cmd);
285 functionalTLBAccess(pkt, cu_id, trans_mode);
286 PacketPtr new_pkt = new Packet(pkt->req, cmd);
287 new_pkt->dataStatic(data);
288
289 if (suppress_func_errors) {
290 new_pkt->setSuppressFuncError();
291 };
292
293 // fixme: this should be cuList[cu_id] if cu_id != n_cu
294 // The latter requires a memPort in the dispatcher
295 cuList[0]->memPort[0]->sendFunctional(new_pkt);
296
297 delete new_pkt;
298 delete pkt;
299 }
300}
301
302bool
303Shader::busy()
304{
305 for (int i_cu = 0; i_cu < n_cu; ++i_cu) {
306 if (!cuList[i_cu]->isDone()) {
307 return true;
308 }
309 }
310
311 return false;
312}
313
314void
315Shader::ScheduleAdd(uint32_t *val,Tick when,int x)
316{
317 sa_val.push_back(val);
318 sa_when.push_back(tick_cnt + when);
319 sa_x.push_back(x);
320 ++sa_n;
321}
322
323
324void
325Shader::processTick()
326{
327 if (busy()) {
328 exec();
329 schedule(tickEvent, curTick() + ticks(1));
330 }
331}
332
333void
334Shader::AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
335 MemCmd cmd, bool suppress_func_errors)
336{
337 uint8_t *data_buf = (uint8_t*)ptr;
338
339 for (ChunkGenerator gen(address, size, cuList.at(cu_id)->cacheLineSize());
340 !gen.done(); gen.next()) {
230 bool suppress_func_errors, int cu_id)
231{
232 int block_size = cuList.at(cu_id)->cacheLineSize();
233 unsigned size = req->getSize();
234
235 Addr tmp_addr;
236 BaseTLB::Mode trans_mode;
237
238 if (cmd == MemCmd::ReadReq) {
239 trans_mode = BaseTLB::Read;
240 } else if (cmd == MemCmd::WriteReq) {
241 trans_mode = BaseTLB::Write;
242 } else {
243 fatal("unexcepted MemCmd\n");
244 }
245
246 tmp_addr = req->getVaddr();
247 Addr split_addr = roundDown(tmp_addr + size - 1, block_size);
248
249 assert(split_addr <= tmp_addr || split_addr - tmp_addr < block_size);
250
251 // Misaligned access
252 if (split_addr > tmp_addr) {
253 RequestPtr req1, req2;
254 req->splitOnVaddr(split_addr, req1, req2);
255
256
257 PacketPtr pkt1 = new Packet(req2, cmd);
258 PacketPtr pkt2 = new Packet(req1, cmd);
259
260 functionalTLBAccess(pkt1, cu_id, trans_mode);
261 functionalTLBAccess(pkt2, cu_id, trans_mode);
262
263 PacketPtr new_pkt1 = new Packet(pkt1->req, cmd);
264 PacketPtr new_pkt2 = new Packet(pkt2->req, cmd);
265
266 new_pkt1->dataStatic(data);
267 new_pkt2->dataStatic((uint8_t*)data + req1->getSize());
268
269 if (suppress_func_errors) {
270 new_pkt1->setSuppressFuncError();
271 new_pkt2->setSuppressFuncError();
272 }
273
274 // fixme: this should be cuList[cu_id] if cu_id != n_cu
275 // The latter requires a memPort in the dispatcher
276 cuList[0]->memPort[0]->sendFunctional(new_pkt1);
277 cuList[0]->memPort[0]->sendFunctional(new_pkt2);
278
279 delete new_pkt1;
280 delete new_pkt2;
281 delete pkt1;
282 delete pkt2;
283 } else {
284 PacketPtr pkt = new Packet(req, cmd);
285 functionalTLBAccess(pkt, cu_id, trans_mode);
286 PacketPtr new_pkt = new Packet(pkt->req, cmd);
287 new_pkt->dataStatic(data);
288
289 if (suppress_func_errors) {
290 new_pkt->setSuppressFuncError();
291 };
292
293 // fixme: this should be cuList[cu_id] if cu_id != n_cu
294 // The latter requires a memPort in the dispatcher
295 cuList[0]->memPort[0]->sendFunctional(new_pkt);
296
297 delete new_pkt;
298 delete pkt;
299 }
300}
301
302bool
303Shader::busy()
304{
305 for (int i_cu = 0; i_cu < n_cu; ++i_cu) {
306 if (!cuList[i_cu]->isDone()) {
307 return true;
308 }
309 }
310
311 return false;
312}
313
314void
315Shader::ScheduleAdd(uint32_t *val,Tick when,int x)
316{
317 sa_val.push_back(val);
318 sa_when.push_back(tick_cnt + when);
319 sa_x.push_back(x);
320 ++sa_n;
321}
322
323
324void
325Shader::processTick()
326{
327 if (busy()) {
328 exec();
329 schedule(tickEvent, curTick() + ticks(1));
330 }
331}
332
333void
334Shader::AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
335 MemCmd cmd, bool suppress_func_errors)
336{
337 uint8_t *data_buf = (uint8_t*)ptr;
338
339 for (ChunkGenerator gen(address, size, cuList.at(cu_id)->cacheLineSize());
340 !gen.done(); gen.next()) {
341 RequestPtr req = new Request(0, gen.addr(), gen.size(), 0,
342 cuList[0]->masterId(), 0, 0, 0);
343
341
342 RequestPtr req = std::make_shared<Request>(
343 0, gen.addr(), gen.size(), 0,
344 cuList[0]->masterId(), 0, 0, nullptr);
345
344 doFunctionalAccess(req, cmd, data_buf, suppress_func_errors, cu_id);
345 data_buf += gen.size();
346 doFunctionalAccess(req, cmd, data_buf, suppress_func_errors, cu_id);
347 data_buf += gen.size();
346 delete req;
347 }
348}
349
350void
351Shader::ReadMem(uint64_t address, void *ptr, uint32_t size, int cu_id)
352{
353 AccessMem(address, ptr, size, cu_id, MemCmd::ReadReq, false);
354}
355
356void
357Shader::ReadMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
358 bool suppress_func_errors)
359{
360 AccessMem(address, ptr, size, cu_id, MemCmd::ReadReq, suppress_func_errors);
361}
362
363void
364Shader::WriteMem(uint64_t address, void *ptr,uint32_t size, int cu_id)
365{
366 AccessMem(address, ptr, size, cu_id, MemCmd::WriteReq, false);
367}
368
369void
370Shader::WriteMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
371 bool suppress_func_errors)
372{
373 AccessMem(address, ptr, size, cu_id, MemCmd::WriteReq,
374 suppress_func_errors);
375}
376
377/*
378 * Send a packet through the appropriate TLB functional port.
379 * If cu_id=n_cu, then this is the dispatcher's TLB.
380 * Otherwise it's the TLB of the cu_id compute unit.
381 */
382void
383Shader::functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode)
384{
385 // update senderState. Need to know the gpuTc and the TLB mode
386 pkt->senderState =
387 new TheISA::GpuTLB::TranslationState(mode, gpuTc, false);
388
389 if (cu_id == n_cu) {
390 dispatcher->tlbPort->sendFunctional(pkt);
391 } else {
392 // even when the perLaneTLB flag is turned on
393 // it's ok tp send all accesses through lane 0
394 // since the lane # is not known here,
395 // This isn't important since these are functional accesses.
396 cuList[cu_id]->tlbPort[0]->sendFunctional(pkt);
397 }
398
399 /* safe_cast the senderState */
400 TheISA::GpuTLB::TranslationState *sender_state =
401 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
402
403 delete sender_state->tlbEntry;
404 delete pkt->senderState;
405}
348 }
349}
350
351void
352Shader::ReadMem(uint64_t address, void *ptr, uint32_t size, int cu_id)
353{
354 AccessMem(address, ptr, size, cu_id, MemCmd::ReadReq, false);
355}
356
357void
358Shader::ReadMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
359 bool suppress_func_errors)
360{
361 AccessMem(address, ptr, size, cu_id, MemCmd::ReadReq, suppress_func_errors);
362}
363
364void
365Shader::WriteMem(uint64_t address, void *ptr,uint32_t size, int cu_id)
366{
367 AccessMem(address, ptr, size, cu_id, MemCmd::WriteReq, false);
368}
369
370void
371Shader::WriteMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
372 bool suppress_func_errors)
373{
374 AccessMem(address, ptr, size, cu_id, MemCmd::WriteReq,
375 suppress_func_errors);
376}
377
378/*
379 * Send a packet through the appropriate TLB functional port.
380 * If cu_id=n_cu, then this is the dispatcher's TLB.
381 * Otherwise it's the TLB of the cu_id compute unit.
382 */
383void
384Shader::functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode)
385{
386 // update senderState. Need to know the gpuTc and the TLB mode
387 pkt->senderState =
388 new TheISA::GpuTLB::TranslationState(mode, gpuTc, false);
389
390 if (cu_id == n_cu) {
391 dispatcher->tlbPort->sendFunctional(pkt);
392 } else {
393 // even when the perLaneTLB flag is turned on
394 // it's ok tp send all accesses through lane 0
395 // since the lane # is not known here,
396 // This isn't important since these are functional accesses.
397 cuList[cu_id]->tlbPort[0]->sendFunctional(pkt);
398 }
399
400 /* safe_cast the senderState */
401 TheISA::GpuTLB::TranslationState *sender_state =
402 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
403
404 delete sender_state->tlbEntry;
405 delete pkt->senderState;
406}