gpu-ruby.py (11308:7d8836fd043d) | gpu-ruby.py (11310:b4bbf540d1a7) |
---|---|
1# 2# Copyright (c) 2015 Advanced Micro Devices, Inc. 3# All rights reserved. 4# 5# For use for simulation and test purposes only 6# 7# Redistribution and use in source and binary forms, with or without 8# modification, are permitted provided that the following conditions are met: --- 78 unchanged lines hidden (view full) --- 87parser = optparse.OptionParser() 88Options.addCommonOptions(parser) 89Options.addSEOptions(parser) 90 91parser.add_option("-k", "--kernel-files", 92 help="file(s) containing GPU kernel code (colon separated)") 93parser.add_option("-u", "--num-compute-units", type="int", default=2, 94 help="number of GPU compute units"), | 1# 2# Copyright (c) 2015 Advanced Micro Devices, Inc. 3# All rights reserved. 4# 5# For use for simulation and test purposes only 6# 7# Redistribution and use in source and binary forms, with or without 8# modification, are permitted provided that the following conditions are met: --- 78 unchanged lines hidden (view full) --- 87parser = optparse.OptionParser() 88Options.addCommonOptions(parser) 89Options.addSEOptions(parser) 90 91parser.add_option("-k", "--kernel-files", 92 help="file(s) containing GPU kernel code (colon separated)") 93parser.add_option("-u", "--num-compute-units", type="int", default=2, 94 help="number of GPU compute units"), |
95parser.add_option("--numCPs", type="int", default=0, | 95parser.add_option("--num-cp", type="int", default=0, |
96 help="Number of GPU Command Processors (CP)") 97parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \ 98 "per CU") 99parser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs" \ 100 "sharing an SQC (icache, and thus icache TLB)") 101parser.add_option("--wf-size", type="int", default=64, 102 help="Wavefront size(in workitems)") 103parser.add_option("--wfs-per-simd", type="int", default=8, help="Number of " \ --- 199 unchanged lines hidden (view full) --- 303# Because of the peculiarities of the CP core, you may have 1 CPU but 2 304# sequencers and thus 2 _cpu_ports created. Your GPUs shouldn't be 305# hooked up until after the CP. To make this script generic, figure out 306# the index as below, but note that this assumes there is one sequencer 307# per compute unit and one sequencer per SQC for the math to work out 308# correctly. 309gpu_port_idx = len(system.ruby._cpu_ports) \ 310 - options.num_compute_units - options.num_sqc | 96 help="Number of GPU Command Processors (CP)") 97parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \ 98 "per CU") 99parser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs" \ 100 "sharing an SQC (icache, and thus icache TLB)") 101parser.add_option("--wf-size", type="int", default=64, 102 help="Wavefront size(in workitems)") 103parser.add_option("--wfs-per-simd", type="int", default=8, help="Number of " \ --- 199 unchanged lines hidden (view full) --- 303# Because of the peculiarities of the CP core, you may have 1 CPU but 2 304# sequencers and thus 2 _cpu_ports created. Your GPUs shouldn't be 305# hooked up until after the CP. To make this script generic, figure out 306# the index as below, but note that this assumes there is one sequencer 307# per compute unit and one sequencer per SQC for the math to work out 308# correctly. 309gpu_port_idx = len(system.ruby._cpu_ports) \ 310 - options.num_compute_units - options.num_sqc |
311gpu_port_idx = gpu_port_idx - options.numCPs * 2 | 311gpu_port_idx = gpu_port_idx - options.num_cp * 2 |
312 313wavefront_size = options.wf_size 314for i in xrange(n_cu): 315 # The pipeline issues wavefront_size number of uncoalesced requests 316 # in one GPU issue cycle. Hence wavefront_size mem ports. 317 for j in xrange(wavefront_size): 318 system.cpu[shader_idx].CUs[i].memory_port[j] = \ 319 system.ruby._cpu_ports[gpu_port_idx].slave[j] 320 gpu_port_idx += 1 321 322for i in xrange(n_cu): 323 if i > 0 and not i % options.cu_per_sqc: 324 gpu_port_idx += 1 325 system.cpu[shader_idx].CUs[i].sqc_port = \ 326 system.ruby._cpu_ports[gpu_port_idx].slave 327gpu_port_idx = gpu_port_idx + 1 328 | 312 313wavefront_size = options.wf_size 314for i in xrange(n_cu): 315 # The pipeline issues wavefront_size number of uncoalesced requests 316 # in one GPU issue cycle. Hence wavefront_size mem ports. 317 for j in xrange(wavefront_size): 318 system.cpu[shader_idx].CUs[i].memory_port[j] = \ 319 system.ruby._cpu_ports[gpu_port_idx].slave[j] 320 gpu_port_idx += 1 321 322for i in xrange(n_cu): 323 if i > 0 and not i % options.cu_per_sqc: 324 gpu_port_idx += 1 325 system.cpu[shader_idx].CUs[i].sqc_port = \ 326 system.ruby._cpu_ports[gpu_port_idx].slave 327gpu_port_idx = gpu_port_idx + 1 328 |
329assert(options.numCPs == 0) | 329# Current regression tests do not support the command processor 330assert(options.num_cp == 0) |
330 331# connect dispatcher to the system.piobus 332dispatcher.pio = system.piobus.master 333dispatcher.dma = system.piobus.slave 334 335################# Connect the CPU and GPU via GPU Dispatcher ################### 336# CPU rings the GPU doorbell to notify a pending task 337# using this interface. --- 16 unchanged lines hidden --- | 331 332# connect dispatcher to the system.piobus 333dispatcher.pio = system.piobus.master 334dispatcher.dma = system.piobus.slave 335 336################# Connect the CPU and GPU via GPU Dispatcher ################### 337# CPU rings the GPU doorbell to notify a pending task 338# using this interface. --- 16 unchanged lines hidden --- |