1# Copyright (c) 2015-2016 ARM Limited 2# All rights reserved. 3# 4# The license below extends only to copyright in the software and shall 5# not be construed as granting a license to any other intellectual 6# property including but not limited to intellectual property relating 7# to a hardware implementation of the functionality of the software 8# licensed hereunder. You may use the software subject to the license 9# terms below provided that you ensure that this notice is replicated 10# unmodified and in its entirety in all distributions of the software, 11# modified or unmodified, in source code or in binary form. 12# 13# Redistribution and use in source and binary forms, with or without 14# modification, are permitted provided that the following conditions are 15# met: redistributions of source code must retain the above copyright 16# notice, this list of conditions and the following disclaimer; 17# redistributions in binary form must reproduce the above copyright 18# notice, this list of conditions and the following disclaimer in the 19# documentation and/or other materials provided with the distribution; 20# neither the name of the copyright holders nor the names of its 21# contributors may be used to endorse or promote products derived from 22# this software without specific prior written permission. 23# 24# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 27# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 28# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 29# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 30# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 31# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 32# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 33# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 34# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35# 36# Authors: Andreas Hansson 37 38from __future__ import print_function 39from __future__ import absolute_import 40 41import gzip 42import optparse 43import os 44 45import m5 46from m5.objects import * 47from m5.util import addToPath 48from m5.stats import periodicStatDump 49 50addToPath('../') 51from common import MemConfig 52 53addToPath('../../util') 54import protolib 55 56# this script is helpful to observe the memory latency for various 57# levels in a cache hierarchy, and various cache and memory 58# configurations, in essence replicating the lmbench lat_mem_rd thrash 59# behaviour 60 61# import the packet proto definitions, and if they are not found, 62# attempt to generate them automatically 63try: 64 import packet_pb2 65except: 66 print("Did not find packet proto definitions, attempting to generate") 67 from subprocess import call 68 error = call(['protoc', '--python_out=configs/dram', 69 '--proto_path=src/proto', 'src/proto/packet.proto']) 70 if not error: 71 print("Generated packet proto definitions") 72 73 try: 74 import google.protobuf 75 except: 76 print("Please install the Python protobuf module") 77 exit(-1) 78 79 import packet_pb2 80 else: 81 print("Failed to import packet proto definitions") 82 exit(-1) 83 84parser = optparse.OptionParser() 85 86parser.add_option("--mem-type", type="choice", default="DDR3_1600_8x8", 87 choices=MemConfig.mem_names(), 88 help = "type of memory to use") 89parser.add_option("--mem-size", action="store", type="string", 90 default="16MB", 91 help="Specify the memory size") 92parser.add_option("--reuse-trace", action="store_true", 93 help="Prevent generation of traces and reuse existing") 94 95(options, args) = parser.parse_args() 96 97if args: 98 print("Error: script doesn't take any positional arguments") 99 sys.exit(1) 100 101# start by creating the system itself, using a multi-layer 2.0 GHz 102# crossbar, delivering 64 bytes / 3 cycles (one header cycle) which 103# amounts to 42.7 GByte/s per layer and thus per port 104system = System(membus = SystemXBar(width = 32)) 105system.clk_domain = SrcClockDomain(clock = '2.0GHz', 106 voltage_domain = 107 VoltageDomain(voltage = '1V')) 108 109mem_range = AddrRange(options.mem_size) 110system.mem_ranges = [mem_range] 111 112# do not worry about reserving space for the backing store 113system.mmap_using_noreserve = True 114 115# currently not exposed as command-line options, set here for now 116options.mem_channels = 1 117options.mem_ranks = 1 118options.external_memory_system = 0 119options.tlm_memory = 0 120options.elastic_trace_en = 0 121 122MemConfig.config_mem(options, system) 123 124# there is no point slowing things down by saving any data 125for ctrl in system.mem_ctrls: 126 ctrl.null = True 127 128 # the following assumes that we are using the native DRAM 129 # controller, check to be sure 130 if isinstance(ctrl, m5.objects.DRAMCtrl): 131 # make the DRAM refresh interval sufficiently infinite to avoid 132 # latency spikes 133 ctrl.tREFI = '100s' 134 135# use the same concept as the utilisation sweep, and print the config 136# so that we can later read it in 137cfg_file_name = os.path.join(m5.options.outdir, "lat_mem_rd.cfg") 138cfg_file = open(cfg_file_name, 'w') 139 140# set an appropriate burst length in bytes 141burst_size = 64 142system.cache_line_size = burst_size 143 144# lazy version to check if an integer is a power of two 145def is_pow2(num): 146 return num != 0 and ((num & (num - 1)) == 0) 147 148# assume we start every range at 0 149max_range = int(mem_range.end) 150 151# start at a size of 4 kByte, and go up till we hit the max, increase 152# the step every time we hit a power of two 153min_range = 4096 154ranges = [min_range] 155step = 1024 156 157while ranges[-1] < max_range: 158 new_range = ranges[-1] + step 159 if is_pow2(new_range): 160 step *= 2 161 ranges.append(new_range) 162 163# how many times to repeat the measurement for each data point 164iterations = 2 165 166# 150 ns in ticks, this is choosen to be high enough that transactions 167# do not pile up in the system, adjust if needed 168itt = 150 * 1000 169 170# for every data point, we create a trace containing a random address 171# sequence, so that we can play back the same sequence for warming and 172# the actual measurement 173def create_trace(filename, max_addr, burst_size, itt): 174 try: 175 proto_out = gzip.open(filename, 'wb') 176 except IOError: 177 print("Failed to open ", filename, " for writing") 178 exit(-1) 179 180 # write the magic number in 4-byte Little Endian, similar to what 181 # is done in src/proto/protoio.cc 182 proto_out.write("gem5") 183 184 # add the packet header 185 header = packet_pb2.PacketHeader() 186 header.obj_id = "lat_mem_rd for range 0:" + str(max_addr) 187 # assume the default tick rate (1 ps) 188 header.tick_freq = 1000000000000 189 protolib.encodeMessage(proto_out, header) 190 191 # create a list of every single address to touch 192 addrs = list(range(0, max_addr, burst_size)) 193 194 import random 195 random.shuffle(addrs) 196 197 tick = 0 198 199 # create a packet we can re-use for all the addresses 200 packet = packet_pb2.Packet() 201 # ReadReq is 1 in src/mem/packet.hh Command enum 202 packet.cmd = 1 203 packet.size = int(burst_size) 204 205 for addr in addrs: 206 packet.tick = long(tick) 207 packet.addr = long(addr) 208 protolib.encodeMessage(proto_out, packet) 209 tick = tick + itt 210 211 proto_out.close() 212 213# this will take a while, so keep the user informed 214print("Generating traces, please wait...") 215 216nxt_range = 0 217nxt_state = 0 218period = long(itt * (max_range / burst_size)) 219 220# now we create the states for each range 221for r in ranges: 222 filename = os.path.join(m5.options.outdir, 223 'lat_mem_rd%d.trc.gz' % nxt_range) 224 225 if not options.reuse_trace: 226 # create the actual random trace for this range 227 create_trace(filename, r, burst_size, itt) 228 229 # the warming state 230 cfg_file.write("STATE %d %d TRACE %s 0\n" % 231 (nxt_state, period, filename)) 232 nxt_state = nxt_state + 1 233 234 # the measuring states 235 for i in range(iterations): 236 cfg_file.write("STATE %d %d TRACE %s 0\n" % 237 (nxt_state, period, filename)) 238 nxt_state = nxt_state + 1 239 240 nxt_range = nxt_range + 1 241 242cfg_file.write("INIT 0\n") 243 244# go through the states one by one 245for state in range(1, nxt_state): 246 cfg_file.write("TRANSITION %d %d 1\n" % (state - 1, state)) 247 248cfg_file.write("TRANSITION %d %d 1\n" % (nxt_state - 1, nxt_state - 1)) 249 250cfg_file.close() 251 252# create a traffic generator, and point it to the file we just created 253system.tgen = TrafficGen(config_file = cfg_file_name, 254 progress_check = '10s') 255 256# add a communication monitor 257system.monitor = CommMonitor() 258system.monitor.footprint = MemFootprintProbe() 259 260# connect the traffic generator to the system 261system.tgen.port = system.monitor.slave 262 263# create the actual cache hierarchy, for now just go with something 264# basic to explore some of the options 265from common.Caches import * 266 267# a starting point for an L3 cache 268class L3Cache(Cache): 269 assoc = 16 270 tag_latency = 20 271 data_latency = 20 272 sequential_access = True 273 response_latency = 40 274 mshrs = 32 275 tgts_per_mshr = 12 276 write_buffers = 16 277 278# note that everything is in the same clock domain, 2.0 GHz as 279# specified above 280system.l1cache = L1_DCache(size = '64kB') 281system.monitor.master = system.l1cache.cpu_side 282 283system.l2cache = L2Cache(size = '512kB', writeback_clean = True) 284system.l2cache.xbar = L2XBar() 285system.l1cache.mem_side = system.l2cache.xbar.slave 286system.l2cache.cpu_side = system.l2cache.xbar.master 287 288# make the L3 mostly exclusive, and correspondingly ensure that the L2 289# writes back also clean lines to the L3 290system.l3cache = L3Cache(size = '4MB', clusivity = 'mostly_excl') 291system.l3cache.xbar = L2XBar() 292system.l2cache.mem_side = system.l3cache.xbar.slave 293system.l3cache.cpu_side = system.l3cache.xbar.master 294system.l3cache.mem_side = system.membus.slave 295 296# connect the system port even if it is not used in this example 297system.system_port = system.membus.slave 298 299# every period, dump and reset all stats 300periodicStatDump(period) 301 302# run Forrest, run! 303root = Root(full_system = False, system = system) 304root.system.mem_mode = 'timing' 305 306m5.instantiate() 307m5.simulate(nxt_state * period) 308 309# print all we need to make sense of the stats output 310print("lat_mem_rd with %d iterations, ranges:" % iterations) 311for r in ranges: 312 print(r) 313