111308Santhony.gutierrez@amd.com# 211308Santhony.gutierrez@amd.com# Copyright (c) 2015 Advanced Micro Devices, Inc. 311308Santhony.gutierrez@amd.com# All rights reserved. 411308Santhony.gutierrez@amd.com# 511308Santhony.gutierrez@amd.com# For use for simulation and test purposes only 611308Santhony.gutierrez@amd.com# 711308Santhony.gutierrez@amd.com# Redistribution and use in source and binary forms, with or without 811308Santhony.gutierrez@amd.com# modification, are permitted provided that the following conditions are met: 911308Santhony.gutierrez@amd.com# 1011308Santhony.gutierrez@amd.com# 1. Redistributions of source code must retain the above copyright notice, 1111308Santhony.gutierrez@amd.com# this list of conditions and the following disclaimer. 1211308Santhony.gutierrez@amd.com# 1311308Santhony.gutierrez@amd.com# 2. Redistributions in binary form must reproduce the above copyright notice, 1411308Santhony.gutierrez@amd.com# this list of conditions and the following disclaimer in the documentation 1511308Santhony.gutierrez@amd.com# and/or other materials provided with the distribution. 1611308Santhony.gutierrez@amd.com# 1711308Santhony.gutierrez@amd.com# 3. Neither the name of the copyright holder nor the names of its contributors 1811308Santhony.gutierrez@amd.com# may be used to endorse or promote products derived from this software 1911308Santhony.gutierrez@amd.com# without specific prior written permission. 2011308Santhony.gutierrez@amd.com# 2111308Santhony.gutierrez@amd.com# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2211308Santhony.gutierrez@amd.com# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2311308Santhony.gutierrez@amd.com# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2411308Santhony.gutierrez@amd.com# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 2511308Santhony.gutierrez@amd.com# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2611308Santhony.gutierrez@amd.com# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2711308Santhony.gutierrez@amd.com# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2811308Santhony.gutierrez@amd.com# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2911308Santhony.gutierrez@amd.com# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3011308Santhony.gutierrez@amd.com# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111308Santhony.gutierrez@amd.com# POSSIBILITY OF SUCH DAMAGE. 3211308Santhony.gutierrez@amd.com# 3311308Santhony.gutierrez@amd.com# Author: Brad Beckmann 3411308Santhony.gutierrez@amd.com# 3511308Santhony.gutierrez@amd.com 3612581Sgiacomo.travaglini@arm.comfrom __future__ import print_function 3712581Sgiacomo.travaglini@arm.com 3811308Santhony.gutierrez@amd.comimport m5 3911308Santhony.gutierrez@amd.comfrom m5.objects import * 4011308Santhony.gutierrez@amd.comfrom m5.defines import buildEnv 4111308Santhony.gutierrez@amd.comfrom m5.util import addToPath 4211308Santhony.gutierrez@amd.comimport os, optparse, sys, math, glob 4311308Santhony.gutierrez@amd.com 4411670Sandreas.hansson@arm.comm5.util.addToPath('../configs/') 4511308Santhony.gutierrez@amd.com 4611670Sandreas.hansson@arm.comfrom ruby import Ruby 4711682Sandreas.hansson@arm.comfrom common import Options 4811682Sandreas.hansson@arm.comfrom common import GPUTLBOptions, GPUTLBConfig 4911308Santhony.gutierrez@amd.com 5011308Santhony.gutierrez@amd.com########################## Script Options ######################## 5111308Santhony.gutierrez@amd.comdef setOption(parser, opt_str, value = 1): 5211308Santhony.gutierrez@amd.com # check to make sure the option actually exists 5311308Santhony.gutierrez@amd.com if not parser.has_option(opt_str): 5411308Santhony.gutierrez@amd.com raise Exception("cannot find %s in list of possible options" % opt_str) 5511308Santhony.gutierrez@amd.com 5611308Santhony.gutierrez@amd.com opt = parser.get_option(opt_str) 5711308Santhony.gutierrez@amd.com # set the value 5811308Santhony.gutierrez@amd.com exec("parser.values.%s = %s" % (opt.dest, value)) 5911308Santhony.gutierrez@amd.com 6011308Santhony.gutierrez@amd.comdef getOption(parser, opt_str): 6111308Santhony.gutierrez@amd.com # check to make sure the option actually exists 6211308Santhony.gutierrez@amd.com if not parser.has_option(opt_str): 6311308Santhony.gutierrez@amd.com raise Exception("cannot find %s in list of possible options" % opt_str) 6411308Santhony.gutierrez@amd.com 6511308Santhony.gutierrez@amd.com opt = parser.get_option(opt_str) 6611308Santhony.gutierrez@amd.com # get the value 6711308Santhony.gutierrez@amd.com exec("return_value = parser.values.%s" % opt.dest) 6811308Santhony.gutierrez@amd.com return return_value 6911308Santhony.gutierrez@amd.com 7011308Santhony.gutierrez@amd.comdef run_test(root): 7111308Santhony.gutierrez@amd.com """gpu test requires a specialized run_test implementation to set up the 7211308Santhony.gutierrez@amd.com mmio space.""" 7311308Santhony.gutierrez@amd.com 7411308Santhony.gutierrez@amd.com # instantiate configuration 7511308Santhony.gutierrez@amd.com m5.instantiate() 7611308Santhony.gutierrez@amd.com 7711308Santhony.gutierrez@amd.com # Now that the system has been constructed, setup the mmio space 7811308Santhony.gutierrez@amd.com root.system.cpu[0].workload[0].map(0x10000000, 0x200000000, 4096) 7911308Santhony.gutierrez@amd.com 8011308Santhony.gutierrez@amd.com # simulate until program terminates 8111308Santhony.gutierrez@amd.com exit_event = m5.simulate(maxtick) 8212581Sgiacomo.travaglini@arm.com print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause()) 8311308Santhony.gutierrez@amd.com 8411308Santhony.gutierrez@amd.comparser = optparse.OptionParser() 8511308Santhony.gutierrez@amd.comOptions.addCommonOptions(parser) 8611308Santhony.gutierrez@amd.comOptions.addSEOptions(parser) 8711308Santhony.gutierrez@amd.com 8811308Santhony.gutierrez@amd.comparser.add_option("-k", "--kernel-files", 8911308Santhony.gutierrez@amd.com help="file(s) containing GPU kernel code (colon separated)") 9011308Santhony.gutierrez@amd.comparser.add_option("-u", "--num-compute-units", type="int", default=2, 9111308Santhony.gutierrez@amd.com help="number of GPU compute units"), 9211310SBrad.Beckmann@amd.comparser.add_option("--num-cp", type="int", default=0, 9311308Santhony.gutierrez@amd.com help="Number of GPU Command Processors (CP)") 9411308Santhony.gutierrez@amd.comparser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \ 9511308Santhony.gutierrez@amd.com "per CU") 9611308Santhony.gutierrez@amd.comparser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs" \ 9711308Santhony.gutierrez@amd.com "sharing an SQC (icache, and thus icache TLB)") 9811308Santhony.gutierrez@amd.comparser.add_option("--wf-size", type="int", default=64, 9911308Santhony.gutierrez@amd.com help="Wavefront size(in workitems)") 10011308Santhony.gutierrez@amd.comparser.add_option("--wfs-per-simd", type="int", default=8, help="Number of " \ 10111308Santhony.gutierrez@amd.com "WF slots per SIMD") 10211308Santhony.gutierrez@amd.comparser.add_option("--sp-bypass-path-length", type="int", default=4, \ 10311308Santhony.gutierrez@amd.com help="Number of stages of bypass path in vector ALU for Single "\ 10411308Santhony.gutierrez@amd.com "Precision ops") 10511308Santhony.gutierrez@amd.comparser.add_option("--dp-bypass-path-length", type="int", default=4, \ 10611308Santhony.gutierrez@amd.com help="Number of stages of bypass path in vector ALU for Double "\ 10711308Santhony.gutierrez@amd.com "Precision ops") 10811308Santhony.gutierrez@amd.comparser.add_option("--issue-period", type="int", default=4, \ 10911308Santhony.gutierrez@amd.com help="Number of cycles per vector instruction issue period") 11011308Santhony.gutierrez@amd.comparser.add_option("--glbmem-wr-bus-width", type="int", default=32, \ 11111308Santhony.gutierrez@amd.com help="VGPR to Coalescer (Global Memory) data bus width in bytes") 11211308Santhony.gutierrez@amd.comparser.add_option("--glbmem-rd-bus-width", type="int", default=32, \ 11311308Santhony.gutierrez@amd.com help="Coalescer to VGPR (Global Memory) data bus width in bytes") 11411308Santhony.gutierrez@amd.comparser.add_option("--shr-mem-pipes-per-cu", type="int", default=1, \ 11511308Santhony.gutierrez@amd.com help="Number of Shared Memory pipelines per CU") 11611308Santhony.gutierrez@amd.comparser.add_option("--glb-mem-pipes-per-cu", type="int", default=1, \ 11711308Santhony.gutierrez@amd.com help="Number of Global Memory pipelines per CU") 11811308Santhony.gutierrez@amd.comparser.add_option("--vreg-file-size", type="int", default=2048, 11911308Santhony.gutierrez@amd.com help="number of physical vector registers per SIMD") 12011308Santhony.gutierrez@amd.comparser.add_option("--bw-scalor", type="int", default=0, 12111308Santhony.gutierrez@amd.com help="bandwidth scalor for scalability analysis") 12211308Santhony.gutierrez@amd.comparser.add_option("--CPUClock", type="string", default="2GHz", 12311308Santhony.gutierrez@amd.com help="CPU clock") 12411308Santhony.gutierrez@amd.comparser.add_option("--GPUClock", type="string", default="1GHz", 12511308Santhony.gutierrez@amd.com help="GPU clock") 12611308Santhony.gutierrez@amd.comparser.add_option("--cpu-voltage", action="store", type="string", 12711308Santhony.gutierrez@amd.com default='1.0V', 12811308Santhony.gutierrez@amd.com help = """CPU voltage domain""") 12911308Santhony.gutierrez@amd.comparser.add_option("--gpu-voltage", action="store", type="string", 13011308Santhony.gutierrez@amd.com default='1.0V', 13111308Santhony.gutierrez@amd.com help = """CPU voltage domain""") 13211308Santhony.gutierrez@amd.comparser.add_option("--CUExecPolicy", type="string", default="OLDEST-FIRST", 13311308Santhony.gutierrez@amd.com help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)") 13411308Santhony.gutierrez@amd.comparser.add_option("--xact-cas-mode", action="store_true", 13511308Santhony.gutierrez@amd.com help="enable load_compare mode (transactional CAS)") 13611308Santhony.gutierrez@amd.comparser.add_option("--SegFaultDebug",action="store_true", 13711308Santhony.gutierrez@amd.com help="checks for GPU seg fault before TLB access") 13811308Santhony.gutierrez@amd.comparser.add_option("--LocalMemBarrier",action="store_true", 13911308Santhony.gutierrez@amd.com help="Barrier does not wait for writethroughs to complete") 14011308Santhony.gutierrez@amd.comparser.add_option("--countPages", action="store_true", 14111308Santhony.gutierrez@amd.com help="Count Page Accesses and output in per-CU output files") 14211308Santhony.gutierrez@amd.comparser.add_option("--TLB-prefetch", type="int", help = "prefetch depth for"\ 14311308Santhony.gutierrez@amd.com "TLBs") 14411308Santhony.gutierrez@amd.comparser.add_option("--pf-type", type="string", help="type of prefetch: "\ 14511308Santhony.gutierrez@amd.com "PF_CU, PF_WF, PF_PHASE, PF_STRIDE") 14611308Santhony.gutierrez@amd.comparser.add_option("--pf-stride", type="int", help="set prefetch stride") 14711308Santhony.gutierrez@amd.comparser.add_option("--numLdsBanks", type="int", default=32, 14811308Santhony.gutierrez@amd.com help="number of physical banks per LDS module") 14911308Santhony.gutierrez@amd.comparser.add_option("--ldsBankConflictPenalty", type="int", default=1, 15011308Santhony.gutierrez@amd.com help="number of cycles per LDS bank conflict") 15111308Santhony.gutierrez@amd.com 15211308Santhony.gutierrez@amd.com# Add the ruby specific and protocol specific options 15311308Santhony.gutierrez@amd.comRuby.define_options(parser) 15411308Santhony.gutierrez@amd.com 15511308Santhony.gutierrez@amd.comGPUTLBOptions.tlb_options(parser) 15611308Santhony.gutierrez@amd.com 15711308Santhony.gutierrez@amd.com(options, args) = parser.parse_args() 15811308Santhony.gutierrez@amd.com 15911308Santhony.gutierrez@amd.com# The GPU cache coherence protocols only work with the backing store 16011308Santhony.gutierrez@amd.comsetOption(parser, "--access-backing-store") 16111308Santhony.gutierrez@amd.com 16211308Santhony.gutierrez@amd.com# Currently, the sqc (I-Cache of GPU) is shared by 16311308Santhony.gutierrez@amd.com# multiple compute units(CUs). The protocol works just fine 16411308Santhony.gutierrez@amd.com# even if sqc is not shared. Overriding this option here 16511308Santhony.gutierrez@amd.com# so that the user need not explicitly set this (assuming 16611308Santhony.gutierrez@amd.com# sharing sqc is the common usage) 16711308Santhony.gutierrez@amd.comn_cu = options.num_compute_units 16811308Santhony.gutierrez@amd.comnum_sqc = int(math.ceil(float(n_cu) / options.cu_per_sqc)) 16911308Santhony.gutierrez@amd.comoptions.num_sqc = num_sqc # pass this to Ruby 17011308Santhony.gutierrez@amd.com 17111308Santhony.gutierrez@amd.com########################## Creating the GPU system ######################## 17211308Santhony.gutierrez@amd.com# shader is the GPU 17311308Santhony.gutierrez@amd.comshader = Shader(n_wf = options.wfs_per_simd, 17411308Santhony.gutierrez@amd.com clk_domain = SrcClockDomain( 17511308Santhony.gutierrez@amd.com clock = options.GPUClock, 17611308Santhony.gutierrez@amd.com voltage_domain = VoltageDomain( 17711308Santhony.gutierrez@amd.com voltage = options.gpu_voltage)), 17811308Santhony.gutierrez@amd.com timing = True) 17911308Santhony.gutierrez@amd.com 18011308Santhony.gutierrez@amd.com# GPU_RfO(Read For Ownership) implements SC/TSO memory model. 18111308Santhony.gutierrez@amd.com# Other GPU protocols implement release consistency at GPU side. 18211308Santhony.gutierrez@amd.com# So, all GPU protocols other than GPU_RfO should make their writes 18311308Santhony.gutierrez@amd.com# visible to the global memory and should read from global memory 18411308Santhony.gutierrez@amd.com# during kernal boundary. The pipeline initiates(or do not initiate) 18511308Santhony.gutierrez@amd.com# the acquire/release operation depending on this impl_kern_boundary_sync 18611308Santhony.gutierrez@amd.com# flag. This flag=true means pipeline initiates a acquire/release operation 18711308Santhony.gutierrez@amd.com# at kernel boundary. 18811308Santhony.gutierrez@amd.comif buildEnv['PROTOCOL'] == 'GPU_RfO': 18911308Santhony.gutierrez@amd.com shader.impl_kern_boundary_sync = False 19011308Santhony.gutierrez@amd.comelse: 19111308Santhony.gutierrez@amd.com shader.impl_kern_boundary_sync = True 19211308Santhony.gutierrez@amd.com 19311308Santhony.gutierrez@amd.com# Switching off per-lane TLB by default 19411308Santhony.gutierrez@amd.comper_lane = False 19511308Santhony.gutierrez@amd.comif options.TLB_config == "perLane": 19611308Santhony.gutierrez@amd.com per_lane = True 19711308Santhony.gutierrez@amd.com 19811308Santhony.gutierrez@amd.com# List of compute units; one GPU can have multiple compute units 19911308Santhony.gutierrez@amd.comcompute_units = [] 20013718Sandreas.sandberg@arm.comfor i in range(n_cu): 20111308Santhony.gutierrez@amd.com compute_units.append(ComputeUnit(cu_id = i, perLaneTLB = per_lane, 20211308Santhony.gutierrez@amd.com num_SIMDs = options.simds_per_cu, 20311308Santhony.gutierrez@amd.com wfSize = options.wf_size, 20411308Santhony.gutierrez@amd.com spbypass_pipe_length = \ 20511308Santhony.gutierrez@amd.com options.sp_bypass_path_length, 20611308Santhony.gutierrez@amd.com dpbypass_pipe_length = \ 20711308Santhony.gutierrez@amd.com options.dp_bypass_path_length, 20811308Santhony.gutierrez@amd.com issue_period = options.issue_period, 20911308Santhony.gutierrez@amd.com coalescer_to_vrf_bus_width = \ 21011308Santhony.gutierrez@amd.com options.glbmem_rd_bus_width, 21111308Santhony.gutierrez@amd.com vrf_to_coalescer_bus_width = \ 21211308Santhony.gutierrez@amd.com options.glbmem_wr_bus_width, 21311308Santhony.gutierrez@amd.com num_global_mem_pipes = \ 21411308Santhony.gutierrez@amd.com options.glb_mem_pipes_per_cu, 21511308Santhony.gutierrez@amd.com num_shared_mem_pipes = \ 21611308Santhony.gutierrez@amd.com options.shr_mem_pipes_per_cu, 21711308Santhony.gutierrez@amd.com n_wf = options.wfs_per_simd, 21811308Santhony.gutierrez@amd.com execPolicy = options.CUExecPolicy, 21911308Santhony.gutierrez@amd.com xactCasMode = options.xact_cas_mode, 22011308Santhony.gutierrez@amd.com debugSegFault = options.SegFaultDebug, 22111308Santhony.gutierrez@amd.com functionalTLB = True, 22211308Santhony.gutierrez@amd.com localMemBarrier = options.LocalMemBarrier, 22311308Santhony.gutierrez@amd.com countPages = options.countPages, 22411308Santhony.gutierrez@amd.com localDataStore = \ 22511308Santhony.gutierrez@amd.com LdsState(banks = options.numLdsBanks, 22611308Santhony.gutierrez@amd.com bankConflictPenalty = \ 22711308Santhony.gutierrez@amd.com options.ldsBankConflictPenalty))) 22811308Santhony.gutierrez@amd.com wavefronts = [] 22911308Santhony.gutierrez@amd.com vrfs = [] 23013718Sandreas.sandberg@arm.com for j in range(options.simds_per_cu): 23113718Sandreas.sandberg@arm.com for k in range(int(shader.n_wf)): 23211308Santhony.gutierrez@amd.com wavefronts.append(Wavefront(simdId = j, wf_slot_id = k)) 23311308Santhony.gutierrez@amd.com vrfs.append(VectorRegisterFile(simd_id=j, 23411308Santhony.gutierrez@amd.com num_regs_per_simd=options.vreg_file_size)) 23511308Santhony.gutierrez@amd.com compute_units[-1].wavefronts = wavefronts 23611308Santhony.gutierrez@amd.com compute_units[-1].vector_register_file = vrfs 23711308Santhony.gutierrez@amd.com if options.TLB_prefetch: 23811308Santhony.gutierrez@amd.com compute_units[-1].prefetch_depth = options.TLB_prefetch 23911308Santhony.gutierrez@amd.com compute_units[-1].prefetch_prev_type = options.pf_type 24011308Santhony.gutierrez@amd.com 24111308Santhony.gutierrez@amd.com # attach the LDS and the CU to the bus (actually a Bridge) 24211308Santhony.gutierrez@amd.com compute_units[-1].ldsPort = compute_units[-1].ldsBus.slave 24311308Santhony.gutierrez@amd.com compute_units[-1].ldsBus.master = compute_units[-1].localDataStore.cuPort 24411308Santhony.gutierrez@amd.com 24511308Santhony.gutierrez@amd.com# Attach compute units to GPU 24611308Santhony.gutierrez@amd.comshader.CUs = compute_units 24711308Santhony.gutierrez@amd.com 24811308Santhony.gutierrez@amd.com# this is a uniprocessor only test, thus the shader is the second index in the 24911308Santhony.gutierrez@amd.com# list of "system.cpus" 25011308Santhony.gutierrez@amd.comoptions.num_cpus = 1 25111308Santhony.gutierrez@amd.comshader_idx = 1 25211308Santhony.gutierrez@amd.comcpu = TimingSimpleCPU(cpu_id=0) 25311308Santhony.gutierrez@amd.com 25411308Santhony.gutierrez@amd.com########################## Creating the GPU dispatcher ######################## 25511308Santhony.gutierrez@amd.com# Dispatcher dispatches work from host CPU to GPU 25611308Santhony.gutierrez@amd.comhost_cpu = cpu 25711308Santhony.gutierrez@amd.comdispatcher = GpuDispatcher() 25811308Santhony.gutierrez@amd.com 25911308Santhony.gutierrez@amd.com# Currently does not test for command processors 26011308Santhony.gutierrez@amd.comcpu_list = [cpu] + [shader] + [dispatcher] 26111308Santhony.gutierrez@amd.com 26211308Santhony.gutierrez@amd.comsystem = System(cpu = cpu_list, 26311308Santhony.gutierrez@amd.com mem_ranges = [AddrRange(options.mem_size)], 26411308Santhony.gutierrez@amd.com mem_mode = 'timing') 26511308Santhony.gutierrez@amd.com 26611308Santhony.gutierrez@amd.com# Dummy voltage domain for all our clock domains 26711308Santhony.gutierrez@amd.comsystem.voltage_domain = VoltageDomain(voltage = options.sys_voltage) 26811308Santhony.gutierrez@amd.comsystem.clk_domain = SrcClockDomain(clock = '1GHz', 26911308Santhony.gutierrez@amd.com voltage_domain = system.voltage_domain) 27011308Santhony.gutierrez@amd.com 27111308Santhony.gutierrez@amd.com# Create a seperate clock domain for components that should run at 27211308Santhony.gutierrez@amd.com# CPUs frequency 27311308Santhony.gutierrez@amd.comsystem.cpu[0].clk_domain = SrcClockDomain(clock = '2GHz', 27411308Santhony.gutierrez@amd.com voltage_domain = \ 27511308Santhony.gutierrez@amd.com system.voltage_domain) 27611308Santhony.gutierrez@amd.com 27711308Santhony.gutierrez@amd.com# configure the TLB hierarchy 27811308Santhony.gutierrez@amd.comGPUTLBConfig.config_tlb_hierarchy(options, system, shader_idx) 27911308Santhony.gutierrez@amd.com 28011308Santhony.gutierrez@amd.com# create Ruby system 28111308Santhony.gutierrez@amd.comsystem.piobus = IOXBar(width=32, response_latency=0, 28211308Santhony.gutierrez@amd.com frontend_latency=0, forward_latency=0) 28311308Santhony.gutierrez@amd.comRuby.create_system(options, None, system) 28411308Santhony.gutierrez@amd.com 28511308Santhony.gutierrez@amd.com# Create a separate clock for Ruby 28611308Santhony.gutierrez@amd.comsystem.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock, 28711308Santhony.gutierrez@amd.com voltage_domain = system.voltage_domain) 28811308Santhony.gutierrez@amd.com 28911308Santhony.gutierrez@amd.com# create the interrupt controller 29011308Santhony.gutierrez@amd.comcpu.createInterruptController() 29111308Santhony.gutierrez@amd.com 29211308Santhony.gutierrez@amd.com# 29311308Santhony.gutierrez@amd.com# Tie the cpu cache ports to the ruby cpu ports and 29411308Santhony.gutierrez@amd.com# physmem, respectively 29511308Santhony.gutierrez@amd.com# 29611308Santhony.gutierrez@amd.comcpu.connectAllPorts(system.ruby._cpu_ports[0]) 29711308Santhony.gutierrez@amd.comsystem.ruby._cpu_ports[0].mem_master_port = system.piobus.slave 29811308Santhony.gutierrez@amd.com 29911308Santhony.gutierrez@amd.com# attach CU ports to Ruby 30011308Santhony.gutierrez@amd.com# Because of the peculiarities of the CP core, you may have 1 CPU but 2 30111308Santhony.gutierrez@amd.com# sequencers and thus 2 _cpu_ports created. Your GPUs shouldn't be 30211308Santhony.gutierrez@amd.com# hooked up until after the CP. To make this script generic, figure out 30311308Santhony.gutierrez@amd.com# the index as below, but note that this assumes there is one sequencer 30411308Santhony.gutierrez@amd.com# per compute unit and one sequencer per SQC for the math to work out 30511308Santhony.gutierrez@amd.com# correctly. 30611308Santhony.gutierrez@amd.comgpu_port_idx = len(system.ruby._cpu_ports) \ 30711308Santhony.gutierrez@amd.com - options.num_compute_units - options.num_sqc 30811310SBrad.Beckmann@amd.comgpu_port_idx = gpu_port_idx - options.num_cp * 2 30911308Santhony.gutierrez@amd.com 31011308Santhony.gutierrez@amd.comwavefront_size = options.wf_size 31113718Sandreas.sandberg@arm.comfor i in range(n_cu): 31211308Santhony.gutierrez@amd.com # The pipeline issues wavefront_size number of uncoalesced requests 31311308Santhony.gutierrez@amd.com # in one GPU issue cycle. Hence wavefront_size mem ports. 31413718Sandreas.sandberg@arm.com for j in range(wavefront_size): 31511308Santhony.gutierrez@amd.com system.cpu[shader_idx].CUs[i].memory_port[j] = \ 31611308Santhony.gutierrez@amd.com system.ruby._cpu_ports[gpu_port_idx].slave[j] 31711308Santhony.gutierrez@amd.com gpu_port_idx += 1 31811308Santhony.gutierrez@amd.com 31913718Sandreas.sandberg@arm.comfor i in range(n_cu): 32011308Santhony.gutierrez@amd.com if i > 0 and not i % options.cu_per_sqc: 32111308Santhony.gutierrez@amd.com gpu_port_idx += 1 32211308Santhony.gutierrez@amd.com system.cpu[shader_idx].CUs[i].sqc_port = \ 32311308Santhony.gutierrez@amd.com system.ruby._cpu_ports[gpu_port_idx].slave 32411308Santhony.gutierrez@amd.comgpu_port_idx = gpu_port_idx + 1 32511308Santhony.gutierrez@amd.com 32611310SBrad.Beckmann@amd.com# Current regression tests do not support the command processor 32711310SBrad.Beckmann@amd.comassert(options.num_cp == 0) 32811308Santhony.gutierrez@amd.com 32911308Santhony.gutierrez@amd.com# connect dispatcher to the system.piobus 33011308Santhony.gutierrez@amd.comdispatcher.pio = system.piobus.master 33111308Santhony.gutierrez@amd.comdispatcher.dma = system.piobus.slave 33211308Santhony.gutierrez@amd.com 33311308Santhony.gutierrez@amd.com################# Connect the CPU and GPU via GPU Dispatcher ################### 33411308Santhony.gutierrez@amd.com# CPU rings the GPU doorbell to notify a pending task 33511308Santhony.gutierrez@amd.com# using this interface. 33611308Santhony.gutierrez@amd.com# And GPU uses this interface to notify the CPU of task completion 33711308Santhony.gutierrez@amd.com# The communcation happens through emulated driver. 33811308Santhony.gutierrez@amd.com 33911308Santhony.gutierrez@amd.com# Note this implicit setting of the cpu_pointer, shader_pointer and tlb array 34011308Santhony.gutierrez@amd.com# parameters must be after the explicit setting of the System cpu list 34111308Santhony.gutierrez@amd.comshader.cpu_pointer = host_cpu 34211308Santhony.gutierrez@amd.comdispatcher.cpu = host_cpu 34311308Santhony.gutierrez@amd.comdispatcher.shader_pointer = shader 34411308Santhony.gutierrez@amd.com 34511308Santhony.gutierrez@amd.com# ----------------------- 34611308Santhony.gutierrez@amd.com# run simulation 34711308Santhony.gutierrez@amd.com# ----------------------- 34811308Santhony.gutierrez@amd.com 34911308Santhony.gutierrez@amd.comroot = Root(full_system = False, system = system) 35011308Santhony.gutierrez@amd.comm5.ticks.setGlobalFrequency('1THz') 35111308Santhony.gutierrez@amd.comroot.system.mem_mode = 'timing' 352