gpu-ruby.py (12581:a8f1d31d3492) | gpu-ruby.py (13718:89e8bcc7253b) |
---|---|
1# 2# Copyright (c) 2015 Advanced Micro Devices, Inc. 3# All rights reserved. 4# 5# For use for simulation and test purposes only 6# 7# Redistribution and use in source and binary forms, with or without 8# modification, are permitted provided that the following conditions are met: --- 183 unchanged lines hidden (view full) --- 192 193# Switching off per-lane TLB by default 194per_lane = False 195if options.TLB_config == "perLane": 196 per_lane = True 197 198# List of compute units; one GPU can have multiple compute units 199compute_units = [] | 1# 2# Copyright (c) 2015 Advanced Micro Devices, Inc. 3# All rights reserved. 4# 5# For use for simulation and test purposes only 6# 7# Redistribution and use in source and binary forms, with or without 8# modification, are permitted provided that the following conditions are met: --- 183 unchanged lines hidden (view full) --- 192 193# Switching off per-lane TLB by default 194per_lane = False 195if options.TLB_config == "perLane": 196 per_lane = True 197 198# List of compute units; one GPU can have multiple compute units 199compute_units = [] |
200for i in xrange(n_cu): | 200for i in range(n_cu): |
201 compute_units.append(ComputeUnit(cu_id = i, perLaneTLB = per_lane, 202 num_SIMDs = options.simds_per_cu, 203 wfSize = options.wf_size, 204 spbypass_pipe_length = \ 205 options.sp_bypass_path_length, 206 dpbypass_pipe_length = \ 207 options.dp_bypass_path_length, 208 issue_period = options.issue_period, --- 13 unchanged lines hidden (view full) --- 222 localMemBarrier = options.LocalMemBarrier, 223 countPages = options.countPages, 224 localDataStore = \ 225 LdsState(banks = options.numLdsBanks, 226 bankConflictPenalty = \ 227 options.ldsBankConflictPenalty))) 228 wavefronts = [] 229 vrfs = [] | 201 compute_units.append(ComputeUnit(cu_id = i, perLaneTLB = per_lane, 202 num_SIMDs = options.simds_per_cu, 203 wfSize = options.wf_size, 204 spbypass_pipe_length = \ 205 options.sp_bypass_path_length, 206 dpbypass_pipe_length = \ 207 options.dp_bypass_path_length, 208 issue_period = options.issue_period, --- 13 unchanged lines hidden (view full) --- 222 localMemBarrier = options.LocalMemBarrier, 223 countPages = options.countPages, 224 localDataStore = \ 225 LdsState(banks = options.numLdsBanks, 226 bankConflictPenalty = \ 227 options.ldsBankConflictPenalty))) 228 wavefronts = [] 229 vrfs = [] |
230 for j in xrange(options.simds_per_cu): 231 for k in xrange(shader.n_wf): | 230 for j in range(options.simds_per_cu): 231 for k in range(int(shader.n_wf)): |
232 wavefronts.append(Wavefront(simdId = j, wf_slot_id = k)) 233 vrfs.append(VectorRegisterFile(simd_id=j, 234 num_regs_per_simd=options.vreg_file_size)) 235 compute_units[-1].wavefronts = wavefronts 236 compute_units[-1].vector_register_file = vrfs 237 if options.TLB_prefetch: 238 compute_units[-1].prefetch_depth = options.TLB_prefetch 239 compute_units[-1].prefetch_prev_type = options.pf_type --- 63 unchanged lines hidden (view full) --- 303# the index as below, but note that this assumes there is one sequencer 304# per compute unit and one sequencer per SQC for the math to work out 305# correctly. 306gpu_port_idx = len(system.ruby._cpu_ports) \ 307 - options.num_compute_units - options.num_sqc 308gpu_port_idx = gpu_port_idx - options.num_cp * 2 309 310wavefront_size = options.wf_size | 232 wavefronts.append(Wavefront(simdId = j, wf_slot_id = k)) 233 vrfs.append(VectorRegisterFile(simd_id=j, 234 num_regs_per_simd=options.vreg_file_size)) 235 compute_units[-1].wavefronts = wavefronts 236 compute_units[-1].vector_register_file = vrfs 237 if options.TLB_prefetch: 238 compute_units[-1].prefetch_depth = options.TLB_prefetch 239 compute_units[-1].prefetch_prev_type = options.pf_type --- 63 unchanged lines hidden (view full) --- 303# the index as below, but note that this assumes there is one sequencer 304# per compute unit and one sequencer per SQC for the math to work out 305# correctly. 306gpu_port_idx = len(system.ruby._cpu_ports) \ 307 - options.num_compute_units - options.num_sqc 308gpu_port_idx = gpu_port_idx - options.num_cp * 2 309 310wavefront_size = options.wf_size |
311for i in xrange(n_cu): | 311for i in range(n_cu): |
312 # The pipeline issues wavefront_size number of uncoalesced requests 313 # in one GPU issue cycle. Hence wavefront_size mem ports. | 312 # The pipeline issues wavefront_size number of uncoalesced requests 313 # in one GPU issue cycle. Hence wavefront_size mem ports. |
314 for j in xrange(wavefront_size): | 314 for j in range(wavefront_size): |
315 system.cpu[shader_idx].CUs[i].memory_port[j] = \ 316 system.ruby._cpu_ports[gpu_port_idx].slave[j] 317 gpu_port_idx += 1 318 | 315 system.cpu[shader_idx].CUs[i].memory_port[j] = \ 316 system.ruby._cpu_ports[gpu_port_idx].slave[j] 317 gpu_port_idx += 1 318 |
319for i in xrange(n_cu): | 319for i in range(n_cu): |
320 if i > 0 and not i % options.cu_per_sqc: 321 gpu_port_idx += 1 322 system.cpu[shader_idx].CUs[i].sqc_port = \ 323 system.ruby._cpu_ports[gpu_port_idx].slave 324gpu_port_idx = gpu_port_idx + 1 325 326# Current regression tests do not support the command processor 327assert(options.num_cp == 0) --- 24 unchanged lines hidden --- | 320 if i > 0 and not i % options.cu_per_sqc: 321 gpu_port_idx += 1 322 system.cpu[shader_idx].CUs[i].sqc_port = \ 323 system.ruby._cpu_ports[gpu_port_idx].slave 324gpu_port_idx = gpu_port_idx + 1 325 326# Current regression tests do not support the command processor 327assert(options.num_cp == 0) --- 24 unchanged lines hidden --- |