gpu-ruby.py (12581:a8f1d31d3492) gpu-ruby.py (13718:89e8bcc7253b)
1#
2# Copyright (c) 2015 Advanced Micro Devices, Inc.
3# All rights reserved.
4#
5# For use for simulation and test purposes only
6#
7# Redistribution and use in source and binary forms, with or without
8# modification, are permitted provided that the following conditions are met:

--- 183 unchanged lines hidden (view full) ---

192
193# Switching off per-lane TLB by default
194per_lane = False
195if options.TLB_config == "perLane":
196 per_lane = True
197
198# List of compute units; one GPU can have multiple compute units
199compute_units = []
1#
2# Copyright (c) 2015 Advanced Micro Devices, Inc.
3# All rights reserved.
4#
5# For use for simulation and test purposes only
6#
7# Redistribution and use in source and binary forms, with or without
8# modification, are permitted provided that the following conditions are met:

--- 183 unchanged lines hidden (view full) ---

192
193# Switching off per-lane TLB by default
194per_lane = False
195if options.TLB_config == "perLane":
196 per_lane = True
197
198# List of compute units; one GPU can have multiple compute units
199compute_units = []
200for i in xrange(n_cu):
200for i in range(n_cu):
201 compute_units.append(ComputeUnit(cu_id = i, perLaneTLB = per_lane,
202 num_SIMDs = options.simds_per_cu,
203 wfSize = options.wf_size,
204 spbypass_pipe_length = \
205 options.sp_bypass_path_length,
206 dpbypass_pipe_length = \
207 options.dp_bypass_path_length,
208 issue_period = options.issue_period,

--- 13 unchanged lines hidden (view full) ---

222 localMemBarrier = options.LocalMemBarrier,
223 countPages = options.countPages,
224 localDataStore = \
225 LdsState(banks = options.numLdsBanks,
226 bankConflictPenalty = \
227 options.ldsBankConflictPenalty)))
228 wavefronts = []
229 vrfs = []
201 compute_units.append(ComputeUnit(cu_id = i, perLaneTLB = per_lane,
202 num_SIMDs = options.simds_per_cu,
203 wfSize = options.wf_size,
204 spbypass_pipe_length = \
205 options.sp_bypass_path_length,
206 dpbypass_pipe_length = \
207 options.dp_bypass_path_length,
208 issue_period = options.issue_period,

--- 13 unchanged lines hidden (view full) ---

222 localMemBarrier = options.LocalMemBarrier,
223 countPages = options.countPages,
224 localDataStore = \
225 LdsState(banks = options.numLdsBanks,
226 bankConflictPenalty = \
227 options.ldsBankConflictPenalty)))
228 wavefronts = []
229 vrfs = []
230 for j in xrange(options.simds_per_cu):
231 for k in xrange(shader.n_wf):
230 for j in range(options.simds_per_cu):
231 for k in range(int(shader.n_wf)):
232 wavefronts.append(Wavefront(simdId = j, wf_slot_id = k))
233 vrfs.append(VectorRegisterFile(simd_id=j,
234 num_regs_per_simd=options.vreg_file_size))
235 compute_units[-1].wavefronts = wavefronts
236 compute_units[-1].vector_register_file = vrfs
237 if options.TLB_prefetch:
238 compute_units[-1].prefetch_depth = options.TLB_prefetch
239 compute_units[-1].prefetch_prev_type = options.pf_type

--- 63 unchanged lines hidden (view full) ---

303# the index as below, but note that this assumes there is one sequencer
304# per compute unit and one sequencer per SQC for the math to work out
305# correctly.
306gpu_port_idx = len(system.ruby._cpu_ports) \
307 - options.num_compute_units - options.num_sqc
308gpu_port_idx = gpu_port_idx - options.num_cp * 2
309
310wavefront_size = options.wf_size
232 wavefronts.append(Wavefront(simdId = j, wf_slot_id = k))
233 vrfs.append(VectorRegisterFile(simd_id=j,
234 num_regs_per_simd=options.vreg_file_size))
235 compute_units[-1].wavefronts = wavefronts
236 compute_units[-1].vector_register_file = vrfs
237 if options.TLB_prefetch:
238 compute_units[-1].prefetch_depth = options.TLB_prefetch
239 compute_units[-1].prefetch_prev_type = options.pf_type

--- 63 unchanged lines hidden (view full) ---

303# the index as below, but note that this assumes there is one sequencer
304# per compute unit and one sequencer per SQC for the math to work out
305# correctly.
306gpu_port_idx = len(system.ruby._cpu_ports) \
307 - options.num_compute_units - options.num_sqc
308gpu_port_idx = gpu_port_idx - options.num_cp * 2
309
310wavefront_size = options.wf_size
311for i in xrange(n_cu):
311for i in range(n_cu):
312 # The pipeline issues wavefront_size number of uncoalesced requests
313 # in one GPU issue cycle. Hence wavefront_size mem ports.
312 # The pipeline issues wavefront_size number of uncoalesced requests
313 # in one GPU issue cycle. Hence wavefront_size mem ports.
314 for j in xrange(wavefront_size):
314 for j in range(wavefront_size):
315 system.cpu[shader_idx].CUs[i].memory_port[j] = \
316 system.ruby._cpu_ports[gpu_port_idx].slave[j]
317 gpu_port_idx += 1
318
315 system.cpu[shader_idx].CUs[i].memory_port[j] = \
316 system.ruby._cpu_ports[gpu_port_idx].slave[j]
317 gpu_port_idx += 1
318
319for i in xrange(n_cu):
319for i in range(n_cu):
320 if i > 0 and not i % options.cu_per_sqc:
321 gpu_port_idx += 1
322 system.cpu[shader_idx].CUs[i].sqc_port = \
323 system.ruby._cpu_ports[gpu_port_idx].slave
324gpu_port_idx = gpu_port_idx + 1
325
326# Current regression tests do not support the command processor
327assert(options.num_cp == 0)

--- 24 unchanged lines hidden ---
320 if i > 0 and not i % options.cu_per_sqc:
321 gpu_port_idx += 1
322 system.cpu[shader_idx].CUs[i].sqc_port = \
323 system.ruby._cpu_ports[gpu_port_idx].slave
324gpu_port_idx = gpu_port_idx + 1
325
326# Current regression tests do not support the command processor
327assert(options.num_cp == 0)

--- 24 unchanged lines hidden ---