apu_se.py revision 11534
111308Santhony.gutierrez@amd.com# 211308Santhony.gutierrez@amd.com# Copyright (c) 2015 Advanced Micro Devices, Inc. 311308Santhony.gutierrez@amd.com# All rights reserved. 411308Santhony.gutierrez@amd.com# 511308Santhony.gutierrez@amd.com# For use for simulation and test purposes only 611308Santhony.gutierrez@amd.com# 711308Santhony.gutierrez@amd.com# Redistribution and use in source and binary forms, with or without 811308Santhony.gutierrez@amd.com# modification, are permitted provided that the following conditions are met: 911308Santhony.gutierrez@amd.com# 1011308Santhony.gutierrez@amd.com# 1. Redistributions of source code must retain the above copyright notice, 1111308Santhony.gutierrez@amd.com# this list of conditions and the following disclaimer. 1211308Santhony.gutierrez@amd.com# 1311308Santhony.gutierrez@amd.com# 2. Redistributions in binary form must reproduce the above copyright notice, 1411308Santhony.gutierrez@amd.com# this list of conditions and the following disclaimer in the documentation 1511308Santhony.gutierrez@amd.com# and/or other materials provided with the distribution. 1611308Santhony.gutierrez@amd.com# 1711308Santhony.gutierrez@amd.com# 3. Neither the name of the copyright holder nor the names of its contributors 1811308Santhony.gutierrez@amd.com# may be used to endorse or promote products derived from this software 1911308Santhony.gutierrez@amd.com# without specific prior written permission. 2011308Santhony.gutierrez@amd.com# 2111308Santhony.gutierrez@amd.com# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2211308Santhony.gutierrez@amd.com# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2311308Santhony.gutierrez@amd.com# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2411308Santhony.gutierrez@amd.com# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 2511308Santhony.gutierrez@amd.com# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2611308Santhony.gutierrez@amd.com# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2711308Santhony.gutierrez@amd.com# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2811308Santhony.gutierrez@amd.com# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2911308Santhony.gutierrez@amd.com# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3011308Santhony.gutierrez@amd.com# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111308Santhony.gutierrez@amd.com# POSSIBILITY OF SUCH DAMAGE. 3211308Santhony.gutierrez@amd.com# 3311308Santhony.gutierrez@amd.com# Author: Sooraj Puthoor 3411308Santhony.gutierrez@amd.com# 3511308Santhony.gutierrez@amd.com 3611308Santhony.gutierrez@amd.comimport optparse, os, re 3711308Santhony.gutierrez@amd.comimport math 3811308Santhony.gutierrez@amd.comimport glob 3911308Santhony.gutierrez@amd.comimport inspect 4011308Santhony.gutierrez@amd.com 4111308Santhony.gutierrez@amd.comimport m5 4211308Santhony.gutierrez@amd.comfrom m5.objects import * 4311308Santhony.gutierrez@amd.comfrom m5.util import addToPath 4411308Santhony.gutierrez@amd.com 4511308Santhony.gutierrez@amd.comaddToPath('../ruby') 4611308Santhony.gutierrez@amd.comaddToPath('../common') 4711308Santhony.gutierrez@amd.comaddToPath('../topologies') 4811308Santhony.gutierrez@amd.com 4911308Santhony.gutierrez@amd.comimport Options 5011308Santhony.gutierrez@amd.comimport Ruby 5111308Santhony.gutierrez@amd.comimport Simulation 5211308Santhony.gutierrez@amd.comimport GPUTLBOptions, GPUTLBConfig 5311308Santhony.gutierrez@amd.com 5411308Santhony.gutierrez@amd.com########################## Script Options ######################## 5511308Santhony.gutierrez@amd.comdef setOption(parser, opt_str, value = 1): 5611308Santhony.gutierrez@amd.com # check to make sure the option actually exists 5711308Santhony.gutierrez@amd.com if not parser.has_option(opt_str): 5811308Santhony.gutierrez@amd.com raise Exception("cannot find %s in list of possible options" % opt_str) 5911308Santhony.gutierrez@amd.com 6011308Santhony.gutierrez@amd.com opt = parser.get_option(opt_str) 6111308Santhony.gutierrez@amd.com # set the value 6211308Santhony.gutierrez@amd.com exec("parser.values.%s = %s" % (opt.dest, value)) 6311308Santhony.gutierrez@amd.com 6411308Santhony.gutierrez@amd.comdef getOption(parser, opt_str): 6511308Santhony.gutierrez@amd.com # check to make sure the option actually exists 6611308Santhony.gutierrez@amd.com if not parser.has_option(opt_str): 6711308Santhony.gutierrez@amd.com raise Exception("cannot find %s in list of possible options" % opt_str) 6811308Santhony.gutierrez@amd.com 6911308Santhony.gutierrez@amd.com opt = parser.get_option(opt_str) 7011308Santhony.gutierrez@amd.com # get the value 7111308Santhony.gutierrez@amd.com exec("return_value = parser.values.%s" % opt.dest) 7211308Santhony.gutierrez@amd.com return return_value 7311308Santhony.gutierrez@amd.com 7411308Santhony.gutierrez@amd.com# Adding script options 7511308Santhony.gutierrez@amd.comparser = optparse.OptionParser() 7611308Santhony.gutierrez@amd.comOptions.addCommonOptions(parser) 7711308Santhony.gutierrez@amd.comOptions.addSEOptions(parser) 7811308Santhony.gutierrez@amd.com 7911308Santhony.gutierrez@amd.comparser.add_option("--cpu-only-mode", action="store_true", default=False, 8011308Santhony.gutierrez@amd.com help="APU mode. Used to take care of problems in "\ 8111308Santhony.gutierrez@amd.com "Ruby.py while running APU protocols") 8211308Santhony.gutierrez@amd.comparser.add_option("-k", "--kernel-files", 8311308Santhony.gutierrez@amd.com help="file(s) containing GPU kernel code (colon separated)") 8411308Santhony.gutierrez@amd.comparser.add_option("-u", "--num-compute-units", type="int", default=1, 8511308Santhony.gutierrez@amd.com help="number of GPU compute units"), 8611308Santhony.gutierrez@amd.comparser.add_option("--num-cp", type="int", default=0, 8711308Santhony.gutierrez@amd.com help="Number of GPU Command Processors (CP)") 8811308Santhony.gutierrez@amd.comparser.add_option("--benchmark-root", help="Root of benchmark directory tree") 8911308Santhony.gutierrez@amd.com 9011308Santhony.gutierrez@amd.com# not super important now, but to avoid putting the number 4 everywhere, make 9111308Santhony.gutierrez@amd.com# it an option/knob 9211308Santhony.gutierrez@amd.comparser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs" \ 9311308Santhony.gutierrez@amd.com "sharing an SQC (icache, and thus icache TLB)") 9411308Santhony.gutierrez@amd.comparser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \ 9511308Santhony.gutierrez@amd.com "per CU") 9611308Santhony.gutierrez@amd.comparser.add_option("--wf-size", type="int", default=64, 9711308Santhony.gutierrez@amd.com help="Wavefront size(in workitems)") 9811308Santhony.gutierrez@amd.comparser.add_option("--sp-bypass-path-length", type="int", default=4, \ 9911308Santhony.gutierrez@amd.com help="Number of stages of bypass path in vector ALU for Single Precision ops") 10011308Santhony.gutierrez@amd.comparser.add_option("--dp-bypass-path-length", type="int", default=4, \ 10111308Santhony.gutierrez@amd.com help="Number of stages of bypass path in vector ALU for Double Precision ops") 10211308Santhony.gutierrez@amd.com# issue period per SIMD unit: number of cycles before issuing another vector 10311308Santhony.gutierrez@amd.comparser.add_option("--issue-period", type="int", default=4, \ 10411308Santhony.gutierrez@amd.com help="Number of cycles per vector instruction issue period") 10511308Santhony.gutierrez@amd.comparser.add_option("--glbmem-wr-bus-width", type="int", default=32, \ 10611308Santhony.gutierrez@amd.com help="VGPR to Coalescer (Global Memory) data bus width in bytes") 10711308Santhony.gutierrez@amd.comparser.add_option("--glbmem-rd-bus-width", type="int", default=32, \ 10811308Santhony.gutierrez@amd.com help="Coalescer to VGPR (Global Memory) data bus width in bytes") 10911308Santhony.gutierrez@amd.com# Currently we only support 1 local memory pipe 11011308Santhony.gutierrez@amd.comparser.add_option("--shr-mem-pipes-per-cu", type="int", default=1, \ 11111308Santhony.gutierrez@amd.com help="Number of Shared Memory pipelines per CU") 11211308Santhony.gutierrez@amd.com# Currently we only support 1 global memory pipe 11311308Santhony.gutierrez@amd.comparser.add_option("--glb-mem-pipes-per-cu", type="int", default=1, \ 11411308Santhony.gutierrez@amd.com help="Number of Global Memory pipelines per CU") 11511308Santhony.gutierrez@amd.comparser.add_option("--wfs-per-simd", type="int", default=10, help="Number of " \ 11611308Santhony.gutierrez@amd.com "WF slots per SIMD") 11711308Santhony.gutierrez@amd.com 11811308Santhony.gutierrez@amd.comparser.add_option("--vreg-file-size", type="int", default=2048, 11911308Santhony.gutierrez@amd.com help="number of physical vector registers per SIMD") 12011308Santhony.gutierrez@amd.comparser.add_option("--bw-scalor", type="int", default=0, 12111308Santhony.gutierrez@amd.com help="bandwidth scalor for scalability analysis") 12211308Santhony.gutierrez@amd.comparser.add_option("--CPUClock", type="string", default="2GHz", 12311308Santhony.gutierrez@amd.com help="CPU clock") 12411308Santhony.gutierrez@amd.comparser.add_option("--GPUClock", type="string", default="1GHz", 12511308Santhony.gutierrez@amd.com help="GPU clock") 12611308Santhony.gutierrez@amd.comparser.add_option("--cpu-voltage", action="store", type="string", 12711308Santhony.gutierrez@amd.com default='1.0V', 12811308Santhony.gutierrez@amd.com help = """CPU voltage domain""") 12911308Santhony.gutierrez@amd.comparser.add_option("--gpu-voltage", action="store", type="string", 13011308Santhony.gutierrez@amd.com default='1.0V', 13111308Santhony.gutierrez@amd.com help = """CPU voltage domain""") 13211308Santhony.gutierrez@amd.comparser.add_option("--CUExecPolicy", type="string", default="OLDEST-FIRST", 13311308Santhony.gutierrez@amd.com help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)") 13411308Santhony.gutierrez@amd.comparser.add_option("--xact-cas-mode", action="store_true", 13511308Santhony.gutierrez@amd.com help="enable load_compare mode (transactional CAS)") 13611308Santhony.gutierrez@amd.comparser.add_option("--SegFaultDebug",action="store_true", 13711308Santhony.gutierrez@amd.com help="checks for GPU seg fault before TLB access") 13811308Santhony.gutierrez@amd.comparser.add_option("--FunctionalTLB",action="store_true", 13911308Santhony.gutierrez@amd.com help="Assumes TLB has no latency") 14011308Santhony.gutierrez@amd.comparser.add_option("--LocalMemBarrier",action="store_true", 14111308Santhony.gutierrez@amd.com help="Barrier does not wait for writethroughs to complete") 14211308Santhony.gutierrez@amd.comparser.add_option("--countPages", action="store_true", 14311308Santhony.gutierrez@amd.com help="Count Page Accesses and output in per-CU output files") 14411308Santhony.gutierrez@amd.comparser.add_option("--TLB-prefetch", type="int", help = "prefetch depth for"\ 14511308Santhony.gutierrez@amd.com "TLBs") 14611308Santhony.gutierrez@amd.comparser.add_option("--pf-type", type="string", help="type of prefetch: "\ 14711308Santhony.gutierrez@amd.com "PF_CU, PF_WF, PF_PHASE, PF_STRIDE") 14811308Santhony.gutierrez@amd.comparser.add_option("--pf-stride", type="int", help="set prefetch stride") 14911308Santhony.gutierrez@amd.comparser.add_option("--numLdsBanks", type="int", default=32, 15011308Santhony.gutierrez@amd.com help="number of physical banks per LDS module") 15111308Santhony.gutierrez@amd.comparser.add_option("--ldsBankConflictPenalty", type="int", default=1, 15211308Santhony.gutierrez@amd.com help="number of cycles per LDS bank conflict") 15311308Santhony.gutierrez@amd.com 15411308Santhony.gutierrez@amd.com 15511308Santhony.gutierrez@amd.comRuby.define_options(parser) 15611308Santhony.gutierrez@amd.com 15711308Santhony.gutierrez@amd.com#add TLB options to the parser 15811308Santhony.gutierrez@amd.comGPUTLBOptions.tlb_options(parser) 15911308Santhony.gutierrez@amd.com 16011308Santhony.gutierrez@amd.com(options, args) = parser.parse_args() 16111308Santhony.gutierrez@amd.com 16211308Santhony.gutierrez@amd.com# The GPU cache coherence protocols only work with the backing store 16311308Santhony.gutierrez@amd.comsetOption(parser, "--access-backing-store") 16411308Santhony.gutierrez@amd.com 16511308Santhony.gutierrez@amd.com# if benchmark root is specified explicitly, that overrides the search path 16611308Santhony.gutierrez@amd.comif options.benchmark_root: 16711308Santhony.gutierrez@amd.com benchmark_path = [options.benchmark_root] 16811308Santhony.gutierrez@amd.comelse: 16911308Santhony.gutierrez@amd.com # Set default benchmark search path to current dir 17011308Santhony.gutierrez@amd.com benchmark_path = ['.'] 17111308Santhony.gutierrez@amd.com 17211308Santhony.gutierrez@amd.com########################## Sanity Check ######################## 17311308Santhony.gutierrez@amd.com 17411308Santhony.gutierrez@amd.com# Currently the gpu model requires ruby 17511308Santhony.gutierrez@amd.comif buildEnv['PROTOCOL'] == 'None': 17611308Santhony.gutierrez@amd.com fatal("GPU model requires ruby") 17711308Santhony.gutierrez@amd.com 17811308Santhony.gutierrez@amd.com# Currently the gpu model requires only timing or detailed CPU 17911308Santhony.gutierrez@amd.comif not (options.cpu_type == "timing" or 18011308Santhony.gutierrez@amd.com options.cpu_type == "detailed"): 18111308Santhony.gutierrez@amd.com fatal("GPU model requires timing or detailed CPU") 18211308Santhony.gutierrez@amd.com 18311308Santhony.gutierrez@amd.com# This file can support multiple compute units 18411308Santhony.gutierrez@amd.comassert(options.num_compute_units >= 1) 18511308Santhony.gutierrez@amd.com 18611308Santhony.gutierrez@amd.com# Currently, the sqc (I-Cache of GPU) is shared by 18711308Santhony.gutierrez@amd.com# multiple compute units(CUs). The protocol works just fine 18811308Santhony.gutierrez@amd.com# even if sqc is not shared. Overriding this option here 18911308Santhony.gutierrez@amd.com# so that the user need not explicitly set this (assuming 19011308Santhony.gutierrez@amd.com# sharing sqc is the common usage) 19111308Santhony.gutierrez@amd.comn_cu = options.num_compute_units 19211308Santhony.gutierrez@amd.comnum_sqc = int(math.ceil(float(n_cu) / options.cu_per_sqc)) 19311308Santhony.gutierrez@amd.comoptions.num_sqc = num_sqc # pass this to Ruby 19411308Santhony.gutierrez@amd.com 19511308Santhony.gutierrez@amd.com########################## Creating the GPU system ######################## 19611308Santhony.gutierrez@amd.com# shader is the GPU 19711308Santhony.gutierrez@amd.comshader = Shader(n_wf = options.wfs_per_simd, 19811308Santhony.gutierrez@amd.com clk_domain = SrcClockDomain( 19911308Santhony.gutierrez@amd.com clock = options.GPUClock, 20011308Santhony.gutierrez@amd.com voltage_domain = VoltageDomain( 20111308Santhony.gutierrez@amd.com voltage = options.gpu_voltage))) 20211308Santhony.gutierrez@amd.com 20311308Santhony.gutierrez@amd.com# GPU_RfO(Read For Ownership) implements SC/TSO memory model. 20411308Santhony.gutierrez@amd.com# Other GPU protocols implement release consistency at GPU side. 20511308Santhony.gutierrez@amd.com# So, all GPU protocols other than GPU_RfO should make their writes 20611308Santhony.gutierrez@amd.com# visible to the global memory and should read from global memory 20711308Santhony.gutierrez@amd.com# during kernal boundary. The pipeline initiates(or do not initiate) 20811308Santhony.gutierrez@amd.com# the acquire/release operation depending on this impl_kern_boundary_sync 20911308Santhony.gutierrez@amd.com# flag. This flag=true means pipeline initiates a acquire/release operation 21011308Santhony.gutierrez@amd.com# at kernel boundary. 21111308Santhony.gutierrez@amd.comif buildEnv['PROTOCOL'] == 'GPU_RfO': 21211308Santhony.gutierrez@amd.com shader.impl_kern_boundary_sync = False 21311308Santhony.gutierrez@amd.comelse: 21411308Santhony.gutierrez@amd.com shader.impl_kern_boundary_sync = True 21511308Santhony.gutierrez@amd.com 21611308Santhony.gutierrez@amd.com# Switching off per-lane TLB by default 21711308Santhony.gutierrez@amd.comper_lane = False 21811308Santhony.gutierrez@amd.comif options.TLB_config == "perLane": 21911308Santhony.gutierrez@amd.com per_lane = True 22011308Santhony.gutierrez@amd.com 22111308Santhony.gutierrez@amd.com# List of compute units; one GPU can have multiple compute units 22211308Santhony.gutierrez@amd.comcompute_units = [] 22311308Santhony.gutierrez@amd.comfor i in xrange(n_cu): 22411308Santhony.gutierrez@amd.com compute_units.append(ComputeUnit(cu_id = i, perLaneTLB = per_lane, 22511308Santhony.gutierrez@amd.com num_SIMDs = options.simds_per_cu, 22611308Santhony.gutierrez@amd.com wfSize = options.wf_size, 22711308Santhony.gutierrez@amd.com spbypass_pipe_length = options.sp_bypass_path_length, 22811308Santhony.gutierrez@amd.com dpbypass_pipe_length = options.dp_bypass_path_length, 22911308Santhony.gutierrez@amd.com issue_period = options.issue_period, 23011308Santhony.gutierrez@amd.com coalescer_to_vrf_bus_width = \ 23111308Santhony.gutierrez@amd.com options.glbmem_rd_bus_width, 23211308Santhony.gutierrez@amd.com vrf_to_coalescer_bus_width = \ 23311308Santhony.gutierrez@amd.com options.glbmem_wr_bus_width, 23411308Santhony.gutierrez@amd.com num_global_mem_pipes = \ 23511308Santhony.gutierrez@amd.com options.glb_mem_pipes_per_cu, 23611308Santhony.gutierrez@amd.com num_shared_mem_pipes = \ 23711308Santhony.gutierrez@amd.com options.shr_mem_pipes_per_cu, 23811308Santhony.gutierrez@amd.com n_wf = options.wfs_per_simd, 23911308Santhony.gutierrez@amd.com execPolicy = options.CUExecPolicy, 24011308Santhony.gutierrez@amd.com xactCasMode = options.xact_cas_mode, 24111308Santhony.gutierrez@amd.com debugSegFault = options.SegFaultDebug, 24211308Santhony.gutierrez@amd.com functionalTLB = options.FunctionalTLB, 24311308Santhony.gutierrez@amd.com localMemBarrier = options.LocalMemBarrier, 24411308Santhony.gutierrez@amd.com countPages = options.countPages, 24511308Santhony.gutierrez@amd.com localDataStore = \ 24611308Santhony.gutierrez@amd.com LdsState(banks = options.numLdsBanks, 24711308Santhony.gutierrez@amd.com bankConflictPenalty = \ 24811308Santhony.gutierrez@amd.com options.ldsBankConflictPenalty))) 24911308Santhony.gutierrez@amd.com wavefronts = [] 25011308Santhony.gutierrez@amd.com vrfs = [] 25111308Santhony.gutierrez@amd.com for j in xrange(options.simds_per_cu): 25211308Santhony.gutierrez@amd.com for k in xrange(shader.n_wf): 25311534Sjohn.kalamatianos@amd.com wavefronts.append(Wavefront(simdId = j, wf_slot_id = k, 25411534Sjohn.kalamatianos@amd.com wfSize = options.wf_size)) 25511308Santhony.gutierrez@amd.com vrfs.append(VectorRegisterFile(simd_id=j, 25611308Santhony.gutierrez@amd.com num_regs_per_simd=options.vreg_file_size)) 25711308Santhony.gutierrez@amd.com compute_units[-1].wavefronts = wavefronts 25811308Santhony.gutierrez@amd.com compute_units[-1].vector_register_file = vrfs 25911308Santhony.gutierrez@amd.com if options.TLB_prefetch: 26011308Santhony.gutierrez@amd.com compute_units[-1].prefetch_depth = options.TLB_prefetch 26111308Santhony.gutierrez@amd.com compute_units[-1].prefetch_prev_type = options.pf_type 26211308Santhony.gutierrez@amd.com 26311308Santhony.gutierrez@amd.com # attach the LDS and the CU to the bus (actually a Bridge) 26411308Santhony.gutierrez@amd.com compute_units[-1].ldsPort = compute_units[-1].ldsBus.slave 26511308Santhony.gutierrez@amd.com compute_units[-1].ldsBus.master = compute_units[-1].localDataStore.cuPort 26611308Santhony.gutierrez@amd.com 26711308Santhony.gutierrez@amd.com# Attach compute units to GPU 26811308Santhony.gutierrez@amd.comshader.CUs = compute_units 26911308Santhony.gutierrez@amd.com 27011308Santhony.gutierrez@amd.com########################## Creating the CPU system ######################## 27111308Santhony.gutierrez@amd.comoptions.num_cpus = options.num_cpus 27211308Santhony.gutierrez@amd.com 27311308Santhony.gutierrez@amd.com# The shader core will be whatever is after the CPU cores are accounted for 27411308Santhony.gutierrez@amd.comshader_idx = options.num_cpus 27511308Santhony.gutierrez@amd.com 27611308Santhony.gutierrez@amd.com# The command processor will be whatever is after the shader is accounted for 27711308Santhony.gutierrez@amd.comcp_idx = shader_idx + 1 27811308Santhony.gutierrez@amd.comcp_list = [] 27911308Santhony.gutierrez@amd.com 28011308Santhony.gutierrez@amd.com# List of CPUs 28111308Santhony.gutierrez@amd.comcpu_list = [] 28211308Santhony.gutierrez@amd.com 28311308Santhony.gutierrez@amd.com# We only support timing mode for shader and memory 28411308Santhony.gutierrez@amd.comshader.timing = True 28511308Santhony.gutierrez@amd.commem_mode = 'timing' 28611308Santhony.gutierrez@amd.com 28711308Santhony.gutierrez@amd.com# create the cpus 28811308Santhony.gutierrez@amd.comfor i in range(options.num_cpus): 28911308Santhony.gutierrez@amd.com cpu = None 29011308Santhony.gutierrez@amd.com if options.cpu_type == "detailed": 29111308Santhony.gutierrez@amd.com cpu = DerivO3CPU(cpu_id=i, 29211308Santhony.gutierrez@amd.com clk_domain = SrcClockDomain( 29311308Santhony.gutierrez@amd.com clock = options.CPUClock, 29411308Santhony.gutierrez@amd.com voltage_domain = VoltageDomain( 29511308Santhony.gutierrez@amd.com voltage = options.cpu_voltage))) 29611308Santhony.gutierrez@amd.com elif options.cpu_type == "timing": 29711308Santhony.gutierrez@amd.com cpu = TimingSimpleCPU(cpu_id=i, 29811308Santhony.gutierrez@amd.com clk_domain = SrcClockDomain( 29911308Santhony.gutierrez@amd.com clock = options.CPUClock, 30011308Santhony.gutierrez@amd.com voltage_domain = VoltageDomain( 30111308Santhony.gutierrez@amd.com voltage = options.cpu_voltage))) 30211308Santhony.gutierrez@amd.com else: 30311308Santhony.gutierrez@amd.com fatal("Atomic CPU not supported/tested") 30411308Santhony.gutierrez@amd.com cpu_list.append(cpu) 30511308Santhony.gutierrez@amd.com 30611308Santhony.gutierrez@amd.com# create the command processors 30711308Santhony.gutierrez@amd.comfor i in xrange(options.num_cp): 30811308Santhony.gutierrez@amd.com cp = None 30911308Santhony.gutierrez@amd.com if options.cpu_type == "detailed": 31011308Santhony.gutierrez@amd.com cp = DerivO3CPU(cpu_id = options.num_cpus + i, 31111308Santhony.gutierrez@amd.com clk_domain = SrcClockDomain( 31211308Santhony.gutierrez@amd.com clock = options.CPUClock, 31311308Santhony.gutierrez@amd.com voltage_domain = VoltageDomain( 31411308Santhony.gutierrez@amd.com voltage = options.cpu_voltage))) 31511308Santhony.gutierrez@amd.com elif options.cpu_type == 'timing': 31611308Santhony.gutierrez@amd.com cp = TimingSimpleCPU(cpu_id=options.num_cpus + i, 31711308Santhony.gutierrez@amd.com clk_domain = SrcClockDomain( 31811308Santhony.gutierrez@amd.com clock = options.CPUClock, 31911308Santhony.gutierrez@amd.com voltage_domain = VoltageDomain( 32011308Santhony.gutierrez@amd.com voltage = options.cpu_voltage))) 32111308Santhony.gutierrez@amd.com else: 32211308Santhony.gutierrez@amd.com fatal("Atomic CPU not supported/tested") 32311308Santhony.gutierrez@amd.com cp_list = cp_list + [cp] 32411308Santhony.gutierrez@amd.com 32511308Santhony.gutierrez@amd.com########################## Creating the GPU dispatcher ######################## 32611308Santhony.gutierrez@amd.com# Dispatcher dispatches work from host CPU to GPU 32711308Santhony.gutierrez@amd.comhost_cpu = cpu_list[0] 32811308Santhony.gutierrez@amd.comdispatcher = GpuDispatcher() 32911308Santhony.gutierrez@amd.com 33011308Santhony.gutierrez@amd.com########################## Create and assign the workload ######################## 33111308Santhony.gutierrez@amd.com# Check for rel_path in elements of base_list using test, returning 33211308Santhony.gutierrez@amd.com# the first full path that satisfies test 33311308Santhony.gutierrez@amd.comdef find_path(base_list, rel_path, test): 33411308Santhony.gutierrez@amd.com for base in base_list: 33511308Santhony.gutierrez@amd.com if not base: 33611308Santhony.gutierrez@amd.com # base could be None if environment var not set 33711308Santhony.gutierrez@amd.com continue 33811308Santhony.gutierrez@amd.com full_path = os.path.join(base, rel_path) 33911308Santhony.gutierrez@amd.com if test(full_path): 34011308Santhony.gutierrez@amd.com return full_path 34111308Santhony.gutierrez@amd.com fatal("%s not found in %s" % (rel_path, base_list)) 34211308Santhony.gutierrez@amd.com 34311308Santhony.gutierrez@amd.comdef find_file(base_list, rel_path): 34411308Santhony.gutierrez@amd.com return find_path(base_list, rel_path, os.path.isfile) 34511308Santhony.gutierrez@amd.com 34611308Santhony.gutierrez@amd.comexecutable = find_path(benchmark_path, options.cmd, os.path.exists) 34711308Santhony.gutierrez@amd.com# it's common for a benchmark to be in a directory with the same 34811308Santhony.gutierrez@amd.com# name as the executable, so we handle that automatically 34911308Santhony.gutierrez@amd.comif os.path.isdir(executable): 35011308Santhony.gutierrez@amd.com benchmark_path = [executable] 35111308Santhony.gutierrez@amd.com executable = find_file(benchmark_path, options.cmd) 35211308Santhony.gutierrez@amd.comif options.kernel_files: 35311308Santhony.gutierrez@amd.com kernel_files = [find_file(benchmark_path, f) 35411308Santhony.gutierrez@amd.com for f in options.kernel_files.split(':')] 35511308Santhony.gutierrez@amd.comelse: 35611308Santhony.gutierrez@amd.com # if kernel_files is not set, see if there's a unique .asm file 35711308Santhony.gutierrez@amd.com # in the same directory as the executable 35811308Santhony.gutierrez@amd.com kernel_path = os.path.dirname(executable) 35911308Santhony.gutierrez@amd.com kernel_files = glob.glob(os.path.join(kernel_path, '*.asm')) 36011308Santhony.gutierrez@amd.com if kernel_files: 36111308Santhony.gutierrez@amd.com print "Using GPU kernel code file(s)", ",".join(kernel_files) 36211308Santhony.gutierrez@amd.com else: 36311308Santhony.gutierrez@amd.com fatal("Can't locate kernel code (.asm) in " + kernel_path) 36411308Santhony.gutierrez@amd.com 36511308Santhony.gutierrez@amd.com# OpenCL driver 36611308Santhony.gutierrez@amd.comdriver = ClDriver(filename="hsa", codefile=kernel_files) 36711308Santhony.gutierrez@amd.comfor cpu in cpu_list: 36811308Santhony.gutierrez@amd.com cpu.workload = LiveProcess(executable = executable, 36911308Santhony.gutierrez@amd.com cmd = [options.cmd] + options.options.split(), 37011308Santhony.gutierrez@amd.com drivers = [driver]) 37111308Santhony.gutierrez@amd.comfor cp in cp_list: 37211308Santhony.gutierrez@amd.com cp.workload = host_cpu.workload 37311308Santhony.gutierrez@amd.com 37411308Santhony.gutierrez@amd.com########################## Create the overall system ######################## 37511308Santhony.gutierrez@amd.com# Full list of processing cores in the system. Note that 37611308Santhony.gutierrez@amd.com# dispatcher is also added to cpu_list although it is 37711308Santhony.gutierrez@amd.com# not a processing element 37811308Santhony.gutierrez@amd.comcpu_list = cpu_list + [shader] + cp_list + [dispatcher] 37911308Santhony.gutierrez@amd.com 38011308Santhony.gutierrez@amd.com# creating the overall system 38111308Santhony.gutierrez@amd.com# notice the cpu list is explicitly added as a parameter to System 38211308Santhony.gutierrez@amd.comsystem = System(cpu = cpu_list, 38311308Santhony.gutierrez@amd.com mem_ranges = [AddrRange(options.mem_size)], 38411308Santhony.gutierrez@amd.com cache_line_size = options.cacheline_size, 38511308Santhony.gutierrez@amd.com mem_mode = mem_mode) 38611308Santhony.gutierrez@amd.comsystem.voltage_domain = VoltageDomain(voltage = options.sys_voltage) 38711308Santhony.gutierrez@amd.comsystem.clk_domain = SrcClockDomain(clock = options.sys_clock, 38811308Santhony.gutierrez@amd.com voltage_domain = system.voltage_domain) 38911308Santhony.gutierrez@amd.com 39011308Santhony.gutierrez@amd.com# configure the TLB hierarchy 39111308Santhony.gutierrez@amd.comGPUTLBConfig.config_tlb_hierarchy(options, system, shader_idx) 39211308Santhony.gutierrez@amd.com 39311308Santhony.gutierrez@amd.com# create Ruby system 39411308Santhony.gutierrez@amd.comsystem.piobus = IOXBar(width=32, response_latency=0, 39511308Santhony.gutierrez@amd.com frontend_latency=0, forward_latency=0) 39611308Santhony.gutierrez@amd.comRuby.create_system(options, None, system) 39711308Santhony.gutierrez@amd.comsystem.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock, 39811308Santhony.gutierrez@amd.com voltage_domain = system.voltage_domain) 39911308Santhony.gutierrez@amd.com 40011308Santhony.gutierrez@amd.com# attach the CPU ports to Ruby 40111308Santhony.gutierrez@amd.comfor i in range(options.num_cpus): 40211308Santhony.gutierrez@amd.com ruby_port = system.ruby._cpu_ports[i] 40311308Santhony.gutierrez@amd.com 40411308Santhony.gutierrez@amd.com # Create interrupt controller 40511308Santhony.gutierrez@amd.com system.cpu[i].createInterruptController() 40611308Santhony.gutierrez@amd.com 40711308Santhony.gutierrez@amd.com # Connect cache port's to ruby 40811308Santhony.gutierrez@amd.com system.cpu[i].icache_port = ruby_port.slave 40911308Santhony.gutierrez@amd.com system.cpu[i].dcache_port = ruby_port.slave 41011308Santhony.gutierrez@amd.com 41111308Santhony.gutierrez@amd.com ruby_port.mem_master_port = system.piobus.slave 41211308Santhony.gutierrez@amd.com if buildEnv['TARGET_ISA'] == "x86": 41311308Santhony.gutierrez@amd.com system.cpu[i].interrupts[0].pio = system.piobus.master 41411308Santhony.gutierrez@amd.com system.cpu[i].interrupts[0].int_master = system.piobus.slave 41511308Santhony.gutierrez@amd.com system.cpu[i].interrupts[0].int_slave = system.piobus.master 41611308Santhony.gutierrez@amd.com 41711308Santhony.gutierrez@amd.com# attach CU ports to Ruby 41811308Santhony.gutierrez@amd.com# Because of the peculiarities of the CP core, you may have 1 CPU but 2 41911308Santhony.gutierrez@amd.com# sequencers and thus 2 _cpu_ports created. Your GPUs shouldn't be 42011308Santhony.gutierrez@amd.com# hooked up until after the CP. To make this script generic, figure out 42111308Santhony.gutierrez@amd.com# the index as below, but note that this assumes there is one sequencer 42211308Santhony.gutierrez@amd.com# per compute unit and one sequencer per SQC for the math to work out 42311308Santhony.gutierrez@amd.com# correctly. 42411308Santhony.gutierrez@amd.comgpu_port_idx = len(system.ruby._cpu_ports) \ 42511308Santhony.gutierrez@amd.com - options.num_compute_units - options.num_sqc 42611308Santhony.gutierrez@amd.comgpu_port_idx = gpu_port_idx - options.num_cp * 2 42711308Santhony.gutierrez@amd.com 42811308Santhony.gutierrez@amd.comwavefront_size = options.wf_size 42911308Santhony.gutierrez@amd.comfor i in xrange(n_cu): 43011308Santhony.gutierrez@amd.com # The pipeline issues wavefront_size number of uncoalesced requests 43111308Santhony.gutierrez@amd.com # in one GPU issue cycle. Hence wavefront_size mem ports. 43211308Santhony.gutierrez@amd.com for j in xrange(wavefront_size): 43311308Santhony.gutierrez@amd.com system.cpu[shader_idx].CUs[i].memory_port[j] = \ 43411308Santhony.gutierrez@amd.com system.ruby._cpu_ports[gpu_port_idx].slave[j] 43511308Santhony.gutierrez@amd.com gpu_port_idx += 1 43611308Santhony.gutierrez@amd.com 43711308Santhony.gutierrez@amd.comfor i in xrange(n_cu): 43811308Santhony.gutierrez@amd.com if i > 0 and not i % options.cu_per_sqc: 43911308Santhony.gutierrez@amd.com print "incrementing idx on ", i 44011308Santhony.gutierrez@amd.com gpu_port_idx += 1 44111308Santhony.gutierrez@amd.com system.cpu[shader_idx].CUs[i].sqc_port = \ 44211308Santhony.gutierrez@amd.com system.ruby._cpu_ports[gpu_port_idx].slave 44311308Santhony.gutierrez@amd.comgpu_port_idx = gpu_port_idx + 1 44411308Santhony.gutierrez@amd.com 44511308Santhony.gutierrez@amd.com# attach CP ports to Ruby 44611308Santhony.gutierrez@amd.comfor i in xrange(options.num_cp): 44711308Santhony.gutierrez@amd.com system.cpu[cp_idx].createInterruptController() 44811308Santhony.gutierrez@amd.com system.cpu[cp_idx].dcache_port = \ 44911308Santhony.gutierrez@amd.com system.ruby._cpu_ports[gpu_port_idx + i * 2].slave 45011308Santhony.gutierrez@amd.com system.cpu[cp_idx].icache_port = \ 45111308Santhony.gutierrez@amd.com system.ruby._cpu_ports[gpu_port_idx + i * 2 + 1].slave 45211308Santhony.gutierrez@amd.com system.cpu[cp_idx].interrupts[0].pio = system.piobus.master 45311308Santhony.gutierrez@amd.com system.cpu[cp_idx].interrupts[0].int_master = system.piobus.slave 45411308Santhony.gutierrez@amd.com system.cpu[cp_idx].interrupts[0].int_slave = system.piobus.master 45511308Santhony.gutierrez@amd.com cp_idx = cp_idx + 1 45611308Santhony.gutierrez@amd.com 45711308Santhony.gutierrez@amd.com# connect dispatcher to the system.piobus 45811308Santhony.gutierrez@amd.comdispatcher.pio = system.piobus.master 45911308Santhony.gutierrez@amd.comdispatcher.dma = system.piobus.slave 46011308Santhony.gutierrez@amd.com 46111308Santhony.gutierrez@amd.com################# Connect the CPU and GPU via GPU Dispatcher ################### 46211308Santhony.gutierrez@amd.com# CPU rings the GPU doorbell to notify a pending task 46311308Santhony.gutierrez@amd.com# using this interface. 46411308Santhony.gutierrez@amd.com# And GPU uses this interface to notify the CPU of task completion 46511308Santhony.gutierrez@amd.com# The communcation happens through emulated driver. 46611308Santhony.gutierrez@amd.com 46711308Santhony.gutierrez@amd.com# Note this implicit setting of the cpu_pointer, shader_pointer and tlb array 46811308Santhony.gutierrez@amd.com# parameters must be after the explicit setting of the System cpu list 46911308Santhony.gutierrez@amd.comshader.cpu_pointer = host_cpu 47011308Santhony.gutierrez@amd.comdispatcher.cpu = host_cpu 47111308Santhony.gutierrez@amd.comdispatcher.shader_pointer = shader 47211308Santhony.gutierrez@amd.comdispatcher.cl_driver = driver 47311308Santhony.gutierrez@amd.com 47411308Santhony.gutierrez@amd.com########################## Start simulation ######################## 47511308Santhony.gutierrez@amd.com 47611308Santhony.gutierrez@amd.comroot = Root(system=system, full_system=False) 47711308Santhony.gutierrez@amd.comm5.ticks.setGlobalFrequency('1THz') 47811308Santhony.gutierrez@amd.comif options.abs_max_tick: 47911308Santhony.gutierrez@amd.com maxtick = options.abs_max_tick 48011308Santhony.gutierrez@amd.comelse: 48111308Santhony.gutierrez@amd.com maxtick = m5.MaxTick 48211308Santhony.gutierrez@amd.com 48311308Santhony.gutierrez@amd.com# Benchmarks support work item annotations 48411308Santhony.gutierrez@amd.comSimulation.setWorkCountOptions(system, options) 48511308Santhony.gutierrez@amd.com 48611308Santhony.gutierrez@amd.com# Checkpointing is not supported by APU model 48711308Santhony.gutierrez@amd.comif (options.checkpoint_dir != None or 48811308Santhony.gutierrez@amd.com options.checkpoint_restore != None): 48911308Santhony.gutierrez@amd.com fatal("Checkpointing not supported by apu model") 49011308Santhony.gutierrez@amd.com 49111308Santhony.gutierrez@amd.comcheckpoint_dir = None 49211308Santhony.gutierrez@amd.comm5.instantiate(checkpoint_dir) 49311308Santhony.gutierrez@amd.com 49411308Santhony.gutierrez@amd.com# Map workload to this address space 49511308Santhony.gutierrez@amd.comhost_cpu.workload[0].map(0x10000000, 0x200000000, 4096) 49611308Santhony.gutierrez@amd.com 49711308Santhony.gutierrez@amd.comexit_event = m5.simulate(maxtick) 49811308Santhony.gutierrez@amd.comprint "Ticks:", m5.curTick() 49911308Santhony.gutierrez@amd.comprint 'Exiting because ', exit_event.getCause() 50011308Santhony.gutierrez@amd.comsys.exit(exit_event.getCode()) 501