111656Sandreas.hansson@arm.com# Copyright (c) 2015-2016 ARM Limited 211368Sandreas.hansson@arm.com# All rights reserved. 311368Sandreas.hansson@arm.com# 411368Sandreas.hansson@arm.com# The license below extends only to copyright in the software and shall 511368Sandreas.hansson@arm.com# not be construed as granting a license to any other intellectual 611368Sandreas.hansson@arm.com# property including but not limited to intellectual property relating 711368Sandreas.hansson@arm.com# to a hardware implementation of the functionality of the software 811368Sandreas.hansson@arm.com# licensed hereunder. You may use the software subject to the license 911368Sandreas.hansson@arm.com# terms below provided that you ensure that this notice is replicated 1011368Sandreas.hansson@arm.com# unmodified and in its entirety in all distributions of the software, 1111368Sandreas.hansson@arm.com# modified or unmodified, in source code or in binary form. 1211368Sandreas.hansson@arm.com# 1311368Sandreas.hansson@arm.com# Redistribution and use in source and binary forms, with or without 1411368Sandreas.hansson@arm.com# modification, are permitted provided that the following conditions are 1511368Sandreas.hansson@arm.com# met: redistributions of source code must retain the above copyright 1611368Sandreas.hansson@arm.com# notice, this list of conditions and the following disclaimer; 1711368Sandreas.hansson@arm.com# redistributions in binary form must reproduce the above copyright 1811368Sandreas.hansson@arm.com# notice, this list of conditions and the following disclaimer in the 1911368Sandreas.hansson@arm.com# documentation and/or other materials provided with the distribution; 2011368Sandreas.hansson@arm.com# neither the name of the copyright holders nor the names of its 2111368Sandreas.hansson@arm.com# contributors may be used to endorse or promote products derived from 2211368Sandreas.hansson@arm.com# this software without specific prior written permission. 2311368Sandreas.hansson@arm.com# 2411368Sandreas.hansson@arm.com# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2511368Sandreas.hansson@arm.com# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2611368Sandreas.hansson@arm.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2711368Sandreas.hansson@arm.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2811368Sandreas.hansson@arm.com# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2911368Sandreas.hansson@arm.com# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 3011368Sandreas.hansson@arm.com# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 3111368Sandreas.hansson@arm.com# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 3211368Sandreas.hansson@arm.com# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 3311368Sandreas.hansson@arm.com# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 3411368Sandreas.hansson@arm.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3511368Sandreas.hansson@arm.com# 3611368Sandreas.hansson@arm.com# Authors: Andreas Hansson 3711368Sandreas.hansson@arm.com 3812564Sgabeblack@google.comfrom __future__ import print_function 3913774Sandreas.sandberg@arm.comfrom __future__ import absolute_import 4012564Sgabeblack@google.com 4111368Sandreas.hansson@arm.comimport gzip 4211368Sandreas.hansson@arm.comimport optparse 4311368Sandreas.hansson@arm.comimport os 4411368Sandreas.hansson@arm.com 4511368Sandreas.hansson@arm.comimport m5 4611368Sandreas.hansson@arm.comfrom m5.objects import * 4711368Sandreas.hansson@arm.comfrom m5.util import addToPath 4811766Sandreas.sandberg@arm.comfrom m5.stats import periodicStatDump 4911368Sandreas.hansson@arm.com 5011682Sandreas.hansson@arm.comaddToPath('../') 5111682Sandreas.hansson@arm.comfrom common import MemConfig 5211368Sandreas.hansson@arm.com 5311368Sandreas.hansson@arm.comaddToPath('../../util') 5411368Sandreas.hansson@arm.comimport protolib 5511368Sandreas.hansson@arm.com 5611368Sandreas.hansson@arm.com# this script is helpful to observe the memory latency for various 5711368Sandreas.hansson@arm.com# levels in a cache hierarchy, and various cache and memory 5811368Sandreas.hansson@arm.com# configurations, in essence replicating the lmbench lat_mem_rd thrash 5911368Sandreas.hansson@arm.com# behaviour 6011368Sandreas.hansson@arm.com 6111368Sandreas.hansson@arm.com# import the packet proto definitions, and if they are not found, 6211368Sandreas.hansson@arm.com# attempt to generate them automatically 6311368Sandreas.hansson@arm.comtry: 6411368Sandreas.hansson@arm.com import packet_pb2 6511368Sandreas.hansson@arm.comexcept: 6612564Sgabeblack@google.com print("Did not find packet proto definitions, attempting to generate") 6711368Sandreas.hansson@arm.com from subprocess import call 6811368Sandreas.hansson@arm.com error = call(['protoc', '--python_out=configs/dram', 6911368Sandreas.hansson@arm.com '--proto_path=src/proto', 'src/proto/packet.proto']) 7011368Sandreas.hansson@arm.com if not error: 7112564Sgabeblack@google.com print("Generated packet proto definitions") 7211368Sandreas.hansson@arm.com 7311368Sandreas.hansson@arm.com try: 7411368Sandreas.hansson@arm.com import google.protobuf 7511368Sandreas.hansson@arm.com except: 7612564Sgabeblack@google.com print("Please install the Python protobuf module") 7711368Sandreas.hansson@arm.com exit(-1) 7811368Sandreas.hansson@arm.com 7911368Sandreas.hansson@arm.com import packet_pb2 8011368Sandreas.hansson@arm.com else: 8112564Sgabeblack@google.com print("Failed to import packet proto definitions") 8211368Sandreas.hansson@arm.com exit(-1) 8311368Sandreas.hansson@arm.com 8411368Sandreas.hansson@arm.comparser = optparse.OptionParser() 8511368Sandreas.hansson@arm.com 8611837Swendy.elsasser@arm.comparser.add_option("--mem-type", type="choice", default="DDR3_1600_8x8", 8711368Sandreas.hansson@arm.com choices=MemConfig.mem_names(), 8811368Sandreas.hansson@arm.com help = "type of memory to use") 8911368Sandreas.hansson@arm.comparser.add_option("--mem-size", action="store", type="string", 9011368Sandreas.hansson@arm.com default="16MB", 9111368Sandreas.hansson@arm.com help="Specify the memory size") 9211368Sandreas.hansson@arm.comparser.add_option("--reuse-trace", action="store_true", 9311368Sandreas.hansson@arm.com help="Prevent generation of traces and reuse existing") 9411368Sandreas.hansson@arm.com 9511368Sandreas.hansson@arm.com(options, args) = parser.parse_args() 9611368Sandreas.hansson@arm.com 9711368Sandreas.hansson@arm.comif args: 9812564Sgabeblack@google.com print("Error: script doesn't take any positional arguments") 9911368Sandreas.hansson@arm.com sys.exit(1) 10011368Sandreas.hansson@arm.com 10111368Sandreas.hansson@arm.com# start by creating the system itself, using a multi-layer 2.0 GHz 10211368Sandreas.hansson@arm.com# crossbar, delivering 64 bytes / 3 cycles (one header cycle) which 10311368Sandreas.hansson@arm.com# amounts to 42.7 GByte/s per layer and thus per port 10411368Sandreas.hansson@arm.comsystem = System(membus = SystemXBar(width = 32)) 10511368Sandreas.hansson@arm.comsystem.clk_domain = SrcClockDomain(clock = '2.0GHz', 10611368Sandreas.hansson@arm.com voltage_domain = 10711368Sandreas.hansson@arm.com VoltageDomain(voltage = '1V')) 10811368Sandreas.hansson@arm.com 10911368Sandreas.hansson@arm.commem_range = AddrRange(options.mem_size) 11011368Sandreas.hansson@arm.comsystem.mem_ranges = [mem_range] 11111368Sandreas.hansson@arm.com 11211368Sandreas.hansson@arm.com# do not worry about reserving space for the backing store 11311368Sandreas.hansson@arm.comsystem.mmap_using_noreserve = True 11411368Sandreas.hansson@arm.com 11511368Sandreas.hansson@arm.com# currently not exposed as command-line options, set here for now 11611368Sandreas.hansson@arm.comoptions.mem_channels = 1 11711368Sandreas.hansson@arm.comoptions.mem_ranks = 1 11811368Sandreas.hansson@arm.comoptions.external_memory_system = 0 11911368Sandreas.hansson@arm.comoptions.tlm_memory = 0 12011368Sandreas.hansson@arm.comoptions.elastic_trace_en = 0 12111368Sandreas.hansson@arm.com 12211368Sandreas.hansson@arm.comMemConfig.config_mem(options, system) 12311368Sandreas.hansson@arm.com 12411368Sandreas.hansson@arm.com# there is no point slowing things down by saving any data 12511368Sandreas.hansson@arm.comfor ctrl in system.mem_ctrls: 12611368Sandreas.hansson@arm.com ctrl.null = True 12711368Sandreas.hansson@arm.com 12811368Sandreas.hansson@arm.com # the following assumes that we are using the native DRAM 12911368Sandreas.hansson@arm.com # controller, check to be sure 13011368Sandreas.hansson@arm.com if isinstance(ctrl, m5.objects.DRAMCtrl): 13111368Sandreas.hansson@arm.com # make the DRAM refresh interval sufficiently infinite to avoid 13211368Sandreas.hansson@arm.com # latency spikes 13311368Sandreas.hansson@arm.com ctrl.tREFI = '100s' 13411368Sandreas.hansson@arm.com 13511368Sandreas.hansson@arm.com# use the same concept as the utilisation sweep, and print the config 13611368Sandreas.hansson@arm.com# so that we can later read it in 13711368Sandreas.hansson@arm.comcfg_file_name = os.path.join(m5.options.outdir, "lat_mem_rd.cfg") 13811368Sandreas.hansson@arm.comcfg_file = open(cfg_file_name, 'w') 13911368Sandreas.hansson@arm.com 14011368Sandreas.hansson@arm.com# set an appropriate burst length in bytes 14111368Sandreas.hansson@arm.comburst_size = 64 14211368Sandreas.hansson@arm.comsystem.cache_line_size = burst_size 14311368Sandreas.hansson@arm.com 14411368Sandreas.hansson@arm.com# lazy version to check if an integer is a power of two 14511368Sandreas.hansson@arm.comdef is_pow2(num): 14611368Sandreas.hansson@arm.com return num != 0 and ((num & (num - 1)) == 0) 14711368Sandreas.hansson@arm.com 14811368Sandreas.hansson@arm.com# assume we start every range at 0 14911368Sandreas.hansson@arm.commax_range = int(mem_range.end) 15011368Sandreas.hansson@arm.com 15111368Sandreas.hansson@arm.com# start at a size of 4 kByte, and go up till we hit the max, increase 15211368Sandreas.hansson@arm.com# the step every time we hit a power of two 15311368Sandreas.hansson@arm.commin_range = 4096 15411368Sandreas.hansson@arm.comranges = [min_range] 15511368Sandreas.hansson@arm.comstep = 1024 15611368Sandreas.hansson@arm.com 15711368Sandreas.hansson@arm.comwhile ranges[-1] < max_range: 15811368Sandreas.hansson@arm.com new_range = ranges[-1] + step 15911368Sandreas.hansson@arm.com if is_pow2(new_range): 16011368Sandreas.hansson@arm.com step *= 2 16111368Sandreas.hansson@arm.com ranges.append(new_range) 16211368Sandreas.hansson@arm.com 16311368Sandreas.hansson@arm.com# how many times to repeat the measurement for each data point 16411368Sandreas.hansson@arm.comiterations = 2 16511368Sandreas.hansson@arm.com 16611368Sandreas.hansson@arm.com# 150 ns in ticks, this is choosen to be high enough that transactions 16711368Sandreas.hansson@arm.com# do not pile up in the system, adjust if needed 16811368Sandreas.hansson@arm.comitt = 150 * 1000 16911368Sandreas.hansson@arm.com 17011368Sandreas.hansson@arm.com# for every data point, we create a trace containing a random address 17111368Sandreas.hansson@arm.com# sequence, so that we can play back the same sequence for warming and 17211368Sandreas.hansson@arm.com# the actual measurement 17311368Sandreas.hansson@arm.comdef create_trace(filename, max_addr, burst_size, itt): 17411368Sandreas.hansson@arm.com try: 17511368Sandreas.hansson@arm.com proto_out = gzip.open(filename, 'wb') 17611368Sandreas.hansson@arm.com except IOError: 17712564Sgabeblack@google.com print("Failed to open ", filename, " for writing") 17811368Sandreas.hansson@arm.com exit(-1) 17911368Sandreas.hansson@arm.com 18011368Sandreas.hansson@arm.com # write the magic number in 4-byte Little Endian, similar to what 18111368Sandreas.hansson@arm.com # is done in src/proto/protoio.cc 18211368Sandreas.hansson@arm.com proto_out.write("gem5") 18311368Sandreas.hansson@arm.com 18411368Sandreas.hansson@arm.com # add the packet header 18511368Sandreas.hansson@arm.com header = packet_pb2.PacketHeader() 18611368Sandreas.hansson@arm.com header.obj_id = "lat_mem_rd for range 0:" + str(max_addr) 18711368Sandreas.hansson@arm.com # assume the default tick rate (1 ps) 18811368Sandreas.hansson@arm.com header.tick_freq = 1000000000000 18911368Sandreas.hansson@arm.com protolib.encodeMessage(proto_out, header) 19011368Sandreas.hansson@arm.com 19111368Sandreas.hansson@arm.com # create a list of every single address to touch 19213731Sandreas.sandberg@arm.com addrs = list(range(0, max_addr, burst_size)) 19311368Sandreas.hansson@arm.com 19411368Sandreas.hansson@arm.com import random 19511368Sandreas.hansson@arm.com random.shuffle(addrs) 19611368Sandreas.hansson@arm.com 19711368Sandreas.hansson@arm.com tick = 0 19811368Sandreas.hansson@arm.com 19911368Sandreas.hansson@arm.com # create a packet we can re-use for all the addresses 20011368Sandreas.hansson@arm.com packet = packet_pb2.Packet() 20111368Sandreas.hansson@arm.com # ReadReq is 1 in src/mem/packet.hh Command enum 20211368Sandreas.hansson@arm.com packet.cmd = 1 20311368Sandreas.hansson@arm.com packet.size = int(burst_size) 20411368Sandreas.hansson@arm.com 20511368Sandreas.hansson@arm.com for addr in addrs: 20611368Sandreas.hansson@arm.com packet.tick = long(tick) 20711368Sandreas.hansson@arm.com packet.addr = long(addr) 20811368Sandreas.hansson@arm.com protolib.encodeMessage(proto_out, packet) 20911368Sandreas.hansson@arm.com tick = tick + itt 21011368Sandreas.hansson@arm.com 21111368Sandreas.hansson@arm.com proto_out.close() 21211368Sandreas.hansson@arm.com 21311368Sandreas.hansson@arm.com# this will take a while, so keep the user informed 21412564Sgabeblack@google.comprint("Generating traces, please wait...") 21511368Sandreas.hansson@arm.com 21611368Sandreas.hansson@arm.comnxt_range = 0 21711368Sandreas.hansson@arm.comnxt_state = 0 21811368Sandreas.hansson@arm.comperiod = long(itt * (max_range / burst_size)) 21911368Sandreas.hansson@arm.com 22011368Sandreas.hansson@arm.com# now we create the states for each range 22111368Sandreas.hansson@arm.comfor r in ranges: 22211368Sandreas.hansson@arm.com filename = os.path.join(m5.options.outdir, 22311368Sandreas.hansson@arm.com 'lat_mem_rd%d.trc.gz' % nxt_range) 22411368Sandreas.hansson@arm.com 22511368Sandreas.hansson@arm.com if not options.reuse_trace: 22611368Sandreas.hansson@arm.com # create the actual random trace for this range 22711368Sandreas.hansson@arm.com create_trace(filename, r, burst_size, itt) 22811368Sandreas.hansson@arm.com 22911368Sandreas.hansson@arm.com # the warming state 23011368Sandreas.hansson@arm.com cfg_file.write("STATE %d %d TRACE %s 0\n" % 23111368Sandreas.hansson@arm.com (nxt_state, period, filename)) 23211368Sandreas.hansson@arm.com nxt_state = nxt_state + 1 23311368Sandreas.hansson@arm.com 23411368Sandreas.hansson@arm.com # the measuring states 23511368Sandreas.hansson@arm.com for i in range(iterations): 23611368Sandreas.hansson@arm.com cfg_file.write("STATE %d %d TRACE %s 0\n" % 23711368Sandreas.hansson@arm.com (nxt_state, period, filename)) 23811368Sandreas.hansson@arm.com nxt_state = nxt_state + 1 23911368Sandreas.hansson@arm.com 24011368Sandreas.hansson@arm.com nxt_range = nxt_range + 1 24111368Sandreas.hansson@arm.com 24211368Sandreas.hansson@arm.comcfg_file.write("INIT 0\n") 24311368Sandreas.hansson@arm.com 24411368Sandreas.hansson@arm.com# go through the states one by one 24511368Sandreas.hansson@arm.comfor state in range(1, nxt_state): 24611368Sandreas.hansson@arm.com cfg_file.write("TRANSITION %d %d 1\n" % (state - 1, state)) 24711368Sandreas.hansson@arm.com 24811368Sandreas.hansson@arm.comcfg_file.write("TRANSITION %d %d 1\n" % (nxt_state - 1, nxt_state - 1)) 24911368Sandreas.hansson@arm.com 25011368Sandreas.hansson@arm.comcfg_file.close() 25111368Sandreas.hansson@arm.com 25211368Sandreas.hansson@arm.com# create a traffic generator, and point it to the file we just created 25311656Sandreas.hansson@arm.comsystem.tgen = TrafficGen(config_file = cfg_file_name, 25411656Sandreas.hansson@arm.com progress_check = '10s') 25511368Sandreas.hansson@arm.com 25611368Sandreas.hansson@arm.com# add a communication monitor 25711368Sandreas.hansson@arm.comsystem.monitor = CommMonitor() 25811803Srjthakur@google.comsystem.monitor.footprint = MemFootprintProbe() 25911368Sandreas.hansson@arm.com 26011368Sandreas.hansson@arm.com# connect the traffic generator to the system 26111368Sandreas.hansson@arm.comsystem.tgen.port = system.monitor.slave 26211368Sandreas.hansson@arm.com 26311368Sandreas.hansson@arm.com# create the actual cache hierarchy, for now just go with something 26411368Sandreas.hansson@arm.com# basic to explore some of the options 26511682Sandreas.hansson@arm.comfrom common.Caches import * 26611368Sandreas.hansson@arm.com 26711368Sandreas.hansson@arm.com# a starting point for an L3 cache 26811368Sandreas.hansson@arm.comclass L3Cache(Cache): 26911368Sandreas.hansson@arm.com assoc = 16 27012550Snikos.nikoleris@arm.com tag_latency = 20 27112550Snikos.nikoleris@arm.com data_latency = 20 27212550Snikos.nikoleris@arm.com sequential_access = True 27311368Sandreas.hansson@arm.com response_latency = 40 27411368Sandreas.hansson@arm.com mshrs = 32 27511368Sandreas.hansson@arm.com tgts_per_mshr = 12 27611368Sandreas.hansson@arm.com write_buffers = 16 27711368Sandreas.hansson@arm.com 27811368Sandreas.hansson@arm.com# note that everything is in the same clock domain, 2.0 GHz as 27911368Sandreas.hansson@arm.com# specified above 28011368Sandreas.hansson@arm.comsystem.l1cache = L1_DCache(size = '64kB') 28111368Sandreas.hansson@arm.comsystem.monitor.master = system.l1cache.cpu_side 28211368Sandreas.hansson@arm.com 28311368Sandreas.hansson@arm.comsystem.l2cache = L2Cache(size = '512kB', writeback_clean = True) 28411368Sandreas.hansson@arm.comsystem.l2cache.xbar = L2XBar() 28511368Sandreas.hansson@arm.comsystem.l1cache.mem_side = system.l2cache.xbar.slave 28611368Sandreas.hansson@arm.comsystem.l2cache.cpu_side = system.l2cache.xbar.master 28711368Sandreas.hansson@arm.com 28811368Sandreas.hansson@arm.com# make the L3 mostly exclusive, and correspondingly ensure that the L2 28911368Sandreas.hansson@arm.com# writes back also clean lines to the L3 29011368Sandreas.hansson@arm.comsystem.l3cache = L3Cache(size = '4MB', clusivity = 'mostly_excl') 29111368Sandreas.hansson@arm.comsystem.l3cache.xbar = L2XBar() 29211368Sandreas.hansson@arm.comsystem.l2cache.mem_side = system.l3cache.xbar.slave 29311368Sandreas.hansson@arm.comsystem.l3cache.cpu_side = system.l3cache.xbar.master 29411368Sandreas.hansson@arm.comsystem.l3cache.mem_side = system.membus.slave 29511368Sandreas.hansson@arm.com 29611368Sandreas.hansson@arm.com# connect the system port even if it is not used in this example 29711368Sandreas.hansson@arm.comsystem.system_port = system.membus.slave 29811368Sandreas.hansson@arm.com 29911368Sandreas.hansson@arm.com# every period, dump and reset all stats 30011368Sandreas.hansson@arm.comperiodicStatDump(period) 30111368Sandreas.hansson@arm.com 30211368Sandreas.hansson@arm.com# run Forrest, run! 30311368Sandreas.hansson@arm.comroot = Root(full_system = False, system = system) 30411368Sandreas.hansson@arm.comroot.system.mem_mode = 'timing' 30511368Sandreas.hansson@arm.com 30611368Sandreas.hansson@arm.comm5.instantiate() 30711368Sandreas.hansson@arm.comm5.simulate(nxt_state * period) 30811368Sandreas.hansson@arm.com 30911368Sandreas.hansson@arm.com# print all we need to make sense of the stats output 31012564Sgabeblack@google.comprint("lat_mem_rd with %d iterations, ranges:" % iterations) 31111368Sandreas.hansson@arm.comfor r in ranges: 31212564Sgabeblack@google.com print(r) 313