gpu-ruby.py (11308:7d8836fd043d) gpu-ruby.py (11310:b4bbf540d1a7)
1#
2# Copyright (c) 2015 Advanced Micro Devices, Inc.
3# All rights reserved.
4#
5# For use for simulation and test purposes only
6#
7# Redistribution and use in source and binary forms, with or without
8# modification, are permitted provided that the following conditions are met:

--- 78 unchanged lines hidden (view full) ---

87parser = optparse.OptionParser()
88Options.addCommonOptions(parser)
89Options.addSEOptions(parser)
90
91parser.add_option("-k", "--kernel-files",
92 help="file(s) containing GPU kernel code (colon separated)")
93parser.add_option("-u", "--num-compute-units", type="int", default=2,
94 help="number of GPU compute units"),
1#
2# Copyright (c) 2015 Advanced Micro Devices, Inc.
3# All rights reserved.
4#
5# For use for simulation and test purposes only
6#
7# Redistribution and use in source and binary forms, with or without
8# modification, are permitted provided that the following conditions are met:

--- 78 unchanged lines hidden (view full) ---

87parser = optparse.OptionParser()
88Options.addCommonOptions(parser)
89Options.addSEOptions(parser)
90
91parser.add_option("-k", "--kernel-files",
92 help="file(s) containing GPU kernel code (colon separated)")
93parser.add_option("-u", "--num-compute-units", type="int", default=2,
94 help="number of GPU compute units"),
95parser.add_option("--numCPs", type="int", default=0,
95parser.add_option("--num-cp", type="int", default=0,
96 help="Number of GPU Command Processors (CP)")
97parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \
98 "per CU")
99parser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs" \
100 "sharing an SQC (icache, and thus icache TLB)")
101parser.add_option("--wf-size", type="int", default=64,
102 help="Wavefront size(in workitems)")
103parser.add_option("--wfs-per-simd", type="int", default=8, help="Number of " \

--- 199 unchanged lines hidden (view full) ---

303# Because of the peculiarities of the CP core, you may have 1 CPU but 2
304# sequencers and thus 2 _cpu_ports created. Your GPUs shouldn't be
305# hooked up until after the CP. To make this script generic, figure out
306# the index as below, but note that this assumes there is one sequencer
307# per compute unit and one sequencer per SQC for the math to work out
308# correctly.
309gpu_port_idx = len(system.ruby._cpu_ports) \
310 - options.num_compute_units - options.num_sqc
96 help="Number of GPU Command Processors (CP)")
97parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \
98 "per CU")
99parser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs" \
100 "sharing an SQC (icache, and thus icache TLB)")
101parser.add_option("--wf-size", type="int", default=64,
102 help="Wavefront size(in workitems)")
103parser.add_option("--wfs-per-simd", type="int", default=8, help="Number of " \

--- 199 unchanged lines hidden (view full) ---

303# Because of the peculiarities of the CP core, you may have 1 CPU but 2
304# sequencers and thus 2 _cpu_ports created. Your GPUs shouldn't be
305# hooked up until after the CP. To make this script generic, figure out
306# the index as below, but note that this assumes there is one sequencer
307# per compute unit and one sequencer per SQC for the math to work out
308# correctly.
309gpu_port_idx = len(system.ruby._cpu_ports) \
310 - options.num_compute_units - options.num_sqc
311gpu_port_idx = gpu_port_idx - options.numCPs * 2
311gpu_port_idx = gpu_port_idx - options.num_cp * 2
312
313wavefront_size = options.wf_size
314for i in xrange(n_cu):
315 # The pipeline issues wavefront_size number of uncoalesced requests
316 # in one GPU issue cycle. Hence wavefront_size mem ports.
317 for j in xrange(wavefront_size):
318 system.cpu[shader_idx].CUs[i].memory_port[j] = \
319 system.ruby._cpu_ports[gpu_port_idx].slave[j]
320 gpu_port_idx += 1
321
322for i in xrange(n_cu):
323 if i > 0 and not i % options.cu_per_sqc:
324 gpu_port_idx += 1
325 system.cpu[shader_idx].CUs[i].sqc_port = \
326 system.ruby._cpu_ports[gpu_port_idx].slave
327gpu_port_idx = gpu_port_idx + 1
328
312
313wavefront_size = options.wf_size
314for i in xrange(n_cu):
315 # The pipeline issues wavefront_size number of uncoalesced requests
316 # in one GPU issue cycle. Hence wavefront_size mem ports.
317 for j in xrange(wavefront_size):
318 system.cpu[shader_idx].CUs[i].memory_port[j] = \
319 system.ruby._cpu_ports[gpu_port_idx].slave[j]
320 gpu_port_idx += 1
321
322for i in xrange(n_cu):
323 if i > 0 and not i % options.cu_per_sqc:
324 gpu_port_idx += 1
325 system.cpu[shader_idx].CUs[i].sqc_port = \
326 system.ruby._cpu_ports[gpu_port_idx].slave
327gpu_port_idx = gpu_port_idx + 1
328
329assert(options.numCPs == 0)
329# Current regression tests do not support the command processor
330assert(options.num_cp == 0)
330
331# connect dispatcher to the system.piobus
332dispatcher.pio = system.piobus.master
333dispatcher.dma = system.piobus.slave
334
335################# Connect the CPU and GPU via GPU Dispatcher ###################
336# CPU rings the GPU doorbell to notify a pending task
337# using this interface.

--- 16 unchanged lines hidden ---
331
332# connect dispatcher to the system.piobus
333dispatcher.pio = system.piobus.master
334dispatcher.dma = system.piobus.slave
335
336################# Connect the CPU and GPU via GPU Dispatcher ###################
337# CPU rings the GPU doorbell to notify a pending task
338# using this interface.

--- 16 unchanged lines hidden ---