111656Sandreas.hansson@arm.com# Copyright (c) 2015-2016 ARM Limited
211368Sandreas.hansson@arm.com# All rights reserved.
311368Sandreas.hansson@arm.com#
411368Sandreas.hansson@arm.com# The license below extends only to copyright in the software and shall
511368Sandreas.hansson@arm.com# not be construed as granting a license to any other intellectual
611368Sandreas.hansson@arm.com# property including but not limited to intellectual property relating
711368Sandreas.hansson@arm.com# to a hardware implementation of the functionality of the software
811368Sandreas.hansson@arm.com# licensed hereunder.  You may use the software subject to the license
911368Sandreas.hansson@arm.com# terms below provided that you ensure that this notice is replicated
1011368Sandreas.hansson@arm.com# unmodified and in its entirety in all distributions of the software,
1111368Sandreas.hansson@arm.com# modified or unmodified, in source code or in binary form.
1211368Sandreas.hansson@arm.com#
1311368Sandreas.hansson@arm.com# Redistribution and use in source and binary forms, with or without
1411368Sandreas.hansson@arm.com# modification, are permitted provided that the following conditions are
1511368Sandreas.hansson@arm.com# met: redistributions of source code must retain the above copyright
1611368Sandreas.hansson@arm.com# notice, this list of conditions and the following disclaimer;
1711368Sandreas.hansson@arm.com# redistributions in binary form must reproduce the above copyright
1811368Sandreas.hansson@arm.com# notice, this list of conditions and the following disclaimer in the
1911368Sandreas.hansson@arm.com# documentation and/or other materials provided with the distribution;
2011368Sandreas.hansson@arm.com# neither the name of the copyright holders nor the names of its
2111368Sandreas.hansson@arm.com# contributors may be used to endorse or promote products derived from
2211368Sandreas.hansson@arm.com# this software without specific prior written permission.
2311368Sandreas.hansson@arm.com#
2411368Sandreas.hansson@arm.com# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2511368Sandreas.hansson@arm.com# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2611368Sandreas.hansson@arm.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2711368Sandreas.hansson@arm.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2811368Sandreas.hansson@arm.com# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2911368Sandreas.hansson@arm.com# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
3011368Sandreas.hansson@arm.com# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
3111368Sandreas.hansson@arm.com# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
3211368Sandreas.hansson@arm.com# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
3311368Sandreas.hansson@arm.com# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3411368Sandreas.hansson@arm.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3511368Sandreas.hansson@arm.com#
3611368Sandreas.hansson@arm.com# Authors: Andreas Hansson
3711368Sandreas.hansson@arm.com
3812564Sgabeblack@google.comfrom __future__ import print_function
3913774Sandreas.sandberg@arm.comfrom __future__ import absolute_import
4012564Sgabeblack@google.com
4111368Sandreas.hansson@arm.comimport gzip
4211368Sandreas.hansson@arm.comimport optparse
4311368Sandreas.hansson@arm.comimport os
4411368Sandreas.hansson@arm.com
4511368Sandreas.hansson@arm.comimport m5
4611368Sandreas.hansson@arm.comfrom m5.objects import *
4711368Sandreas.hansson@arm.comfrom m5.util import addToPath
4811766Sandreas.sandberg@arm.comfrom m5.stats import periodicStatDump
4911368Sandreas.hansson@arm.com
5011682Sandreas.hansson@arm.comaddToPath('../')
5111682Sandreas.hansson@arm.comfrom common import MemConfig
5211368Sandreas.hansson@arm.com
5311368Sandreas.hansson@arm.comaddToPath('../../util')
5411368Sandreas.hansson@arm.comimport protolib
5511368Sandreas.hansson@arm.com
5611368Sandreas.hansson@arm.com# this script is helpful to observe the memory latency for various
5711368Sandreas.hansson@arm.com# levels in a cache hierarchy, and various cache and memory
5811368Sandreas.hansson@arm.com# configurations, in essence replicating the lmbench lat_mem_rd thrash
5911368Sandreas.hansson@arm.com# behaviour
6011368Sandreas.hansson@arm.com
6111368Sandreas.hansson@arm.com# import the packet proto definitions, and if they are not found,
6211368Sandreas.hansson@arm.com# attempt to generate them automatically
6311368Sandreas.hansson@arm.comtry:
6411368Sandreas.hansson@arm.com    import packet_pb2
6511368Sandreas.hansson@arm.comexcept:
6612564Sgabeblack@google.com    print("Did not find packet proto definitions, attempting to generate")
6711368Sandreas.hansson@arm.com    from subprocess import call
6811368Sandreas.hansson@arm.com    error = call(['protoc', '--python_out=configs/dram',
6911368Sandreas.hansson@arm.com                  '--proto_path=src/proto', 'src/proto/packet.proto'])
7011368Sandreas.hansson@arm.com    if not error:
7112564Sgabeblack@google.com        print("Generated packet proto definitions")
7211368Sandreas.hansson@arm.com
7311368Sandreas.hansson@arm.com        try:
7411368Sandreas.hansson@arm.com            import google.protobuf
7511368Sandreas.hansson@arm.com        except:
7612564Sgabeblack@google.com            print("Please install the Python protobuf module")
7711368Sandreas.hansson@arm.com            exit(-1)
7811368Sandreas.hansson@arm.com
7911368Sandreas.hansson@arm.com        import packet_pb2
8011368Sandreas.hansson@arm.com    else:
8112564Sgabeblack@google.com        print("Failed to import packet proto definitions")
8211368Sandreas.hansson@arm.com        exit(-1)
8311368Sandreas.hansson@arm.com
8411368Sandreas.hansson@arm.comparser = optparse.OptionParser()
8511368Sandreas.hansson@arm.com
8611837Swendy.elsasser@arm.comparser.add_option("--mem-type", type="choice", default="DDR3_1600_8x8",
8711368Sandreas.hansson@arm.com                  choices=MemConfig.mem_names(),
8811368Sandreas.hansson@arm.com                  help = "type of memory to use")
8911368Sandreas.hansson@arm.comparser.add_option("--mem-size", action="store", type="string",
9011368Sandreas.hansson@arm.com                  default="16MB",
9111368Sandreas.hansson@arm.com                  help="Specify the memory size")
9211368Sandreas.hansson@arm.comparser.add_option("--reuse-trace", action="store_true",
9311368Sandreas.hansson@arm.com                  help="Prevent generation of traces and reuse existing")
9411368Sandreas.hansson@arm.com
9511368Sandreas.hansson@arm.com(options, args) = parser.parse_args()
9611368Sandreas.hansson@arm.com
9711368Sandreas.hansson@arm.comif args:
9812564Sgabeblack@google.com    print("Error: script doesn't take any positional arguments")
9911368Sandreas.hansson@arm.com    sys.exit(1)
10011368Sandreas.hansson@arm.com
10111368Sandreas.hansson@arm.com# start by creating the system itself, using a multi-layer 2.0 GHz
10211368Sandreas.hansson@arm.com# crossbar, delivering 64 bytes / 3 cycles (one header cycle) which
10311368Sandreas.hansson@arm.com# amounts to 42.7 GByte/s per layer and thus per port
10411368Sandreas.hansson@arm.comsystem = System(membus = SystemXBar(width = 32))
10511368Sandreas.hansson@arm.comsystem.clk_domain = SrcClockDomain(clock = '2.0GHz',
10611368Sandreas.hansson@arm.com                                   voltage_domain =
10711368Sandreas.hansson@arm.com                                   VoltageDomain(voltage = '1V'))
10811368Sandreas.hansson@arm.com
10911368Sandreas.hansson@arm.commem_range = AddrRange(options.mem_size)
11011368Sandreas.hansson@arm.comsystem.mem_ranges = [mem_range]
11111368Sandreas.hansson@arm.com
11211368Sandreas.hansson@arm.com# do not worry about reserving space for the backing store
11311368Sandreas.hansson@arm.comsystem.mmap_using_noreserve = True
11411368Sandreas.hansson@arm.com
11511368Sandreas.hansson@arm.com# currently not exposed as command-line options, set here for now
11611368Sandreas.hansson@arm.comoptions.mem_channels = 1
11711368Sandreas.hansson@arm.comoptions.mem_ranks = 1
11811368Sandreas.hansson@arm.comoptions.external_memory_system = 0
11911368Sandreas.hansson@arm.comoptions.tlm_memory = 0
12011368Sandreas.hansson@arm.comoptions.elastic_trace_en = 0
12111368Sandreas.hansson@arm.com
12211368Sandreas.hansson@arm.comMemConfig.config_mem(options, system)
12311368Sandreas.hansson@arm.com
12411368Sandreas.hansson@arm.com# there is no point slowing things down by saving any data
12511368Sandreas.hansson@arm.comfor ctrl in system.mem_ctrls:
12611368Sandreas.hansson@arm.com    ctrl.null = True
12711368Sandreas.hansson@arm.com
12811368Sandreas.hansson@arm.com    # the following assumes that we are using the native DRAM
12911368Sandreas.hansson@arm.com    # controller, check to be sure
13011368Sandreas.hansson@arm.com    if isinstance(ctrl, m5.objects.DRAMCtrl):
13111368Sandreas.hansson@arm.com        # make the DRAM refresh interval sufficiently infinite to avoid
13211368Sandreas.hansson@arm.com        # latency spikes
13311368Sandreas.hansson@arm.com        ctrl.tREFI = '100s'
13411368Sandreas.hansson@arm.com
13511368Sandreas.hansson@arm.com# use the same concept as the utilisation sweep, and print the config
13611368Sandreas.hansson@arm.com# so that we can later read it in
13711368Sandreas.hansson@arm.comcfg_file_name = os.path.join(m5.options.outdir, "lat_mem_rd.cfg")
13811368Sandreas.hansson@arm.comcfg_file = open(cfg_file_name, 'w')
13911368Sandreas.hansson@arm.com
14011368Sandreas.hansson@arm.com# set an appropriate burst length in bytes
14111368Sandreas.hansson@arm.comburst_size = 64
14211368Sandreas.hansson@arm.comsystem.cache_line_size = burst_size
14311368Sandreas.hansson@arm.com
14411368Sandreas.hansson@arm.com# lazy version to check if an integer is a power of two
14511368Sandreas.hansson@arm.comdef is_pow2(num):
14611368Sandreas.hansson@arm.com    return num != 0 and ((num & (num - 1)) == 0)
14711368Sandreas.hansson@arm.com
14811368Sandreas.hansson@arm.com# assume we start every range at 0
14911368Sandreas.hansson@arm.commax_range = int(mem_range.end)
15011368Sandreas.hansson@arm.com
15111368Sandreas.hansson@arm.com# start at a size of 4 kByte, and go up till we hit the max, increase
15211368Sandreas.hansson@arm.com# the step every time we hit a power of two
15311368Sandreas.hansson@arm.commin_range = 4096
15411368Sandreas.hansson@arm.comranges = [min_range]
15511368Sandreas.hansson@arm.comstep = 1024
15611368Sandreas.hansson@arm.com
15711368Sandreas.hansson@arm.comwhile ranges[-1] < max_range:
15811368Sandreas.hansson@arm.com    new_range = ranges[-1] + step
15911368Sandreas.hansson@arm.com    if is_pow2(new_range):
16011368Sandreas.hansson@arm.com        step *= 2
16111368Sandreas.hansson@arm.com    ranges.append(new_range)
16211368Sandreas.hansson@arm.com
16311368Sandreas.hansson@arm.com# how many times to repeat the measurement for each data point
16411368Sandreas.hansson@arm.comiterations = 2
16511368Sandreas.hansson@arm.com
16611368Sandreas.hansson@arm.com# 150 ns in ticks, this is choosen to be high enough that transactions
16711368Sandreas.hansson@arm.com# do not pile up in the system, adjust if needed
16811368Sandreas.hansson@arm.comitt = 150 * 1000
16911368Sandreas.hansson@arm.com
17011368Sandreas.hansson@arm.com# for every data point, we create a trace containing a random address
17111368Sandreas.hansson@arm.com# sequence, so that we can play back the same sequence for warming and
17211368Sandreas.hansson@arm.com# the actual measurement
17311368Sandreas.hansson@arm.comdef create_trace(filename, max_addr, burst_size, itt):
17411368Sandreas.hansson@arm.com    try:
17511368Sandreas.hansson@arm.com        proto_out = gzip.open(filename, 'wb')
17611368Sandreas.hansson@arm.com    except IOError:
17712564Sgabeblack@google.com        print("Failed to open ", filename, " for writing")
17811368Sandreas.hansson@arm.com        exit(-1)
17911368Sandreas.hansson@arm.com
18011368Sandreas.hansson@arm.com    # write the magic number in 4-byte Little Endian, similar to what
18111368Sandreas.hansson@arm.com    # is done in src/proto/protoio.cc
18211368Sandreas.hansson@arm.com    proto_out.write("gem5")
18311368Sandreas.hansson@arm.com
18411368Sandreas.hansson@arm.com    # add the packet header
18511368Sandreas.hansson@arm.com    header = packet_pb2.PacketHeader()
18611368Sandreas.hansson@arm.com    header.obj_id = "lat_mem_rd for range 0:" + str(max_addr)
18711368Sandreas.hansson@arm.com    # assume the default tick rate (1 ps)
18811368Sandreas.hansson@arm.com    header.tick_freq = 1000000000000
18911368Sandreas.hansson@arm.com    protolib.encodeMessage(proto_out, header)
19011368Sandreas.hansson@arm.com
19111368Sandreas.hansson@arm.com    # create a list of every single address to touch
19213731Sandreas.sandberg@arm.com    addrs = list(range(0, max_addr, burst_size))
19311368Sandreas.hansson@arm.com
19411368Sandreas.hansson@arm.com    import random
19511368Sandreas.hansson@arm.com    random.shuffle(addrs)
19611368Sandreas.hansson@arm.com
19711368Sandreas.hansson@arm.com    tick = 0
19811368Sandreas.hansson@arm.com
19911368Sandreas.hansson@arm.com    # create a packet we can re-use for all the addresses
20011368Sandreas.hansson@arm.com    packet = packet_pb2.Packet()
20111368Sandreas.hansson@arm.com    # ReadReq is 1 in src/mem/packet.hh Command enum
20211368Sandreas.hansson@arm.com    packet.cmd = 1
20311368Sandreas.hansson@arm.com    packet.size = int(burst_size)
20411368Sandreas.hansson@arm.com
20511368Sandreas.hansson@arm.com    for addr in addrs:
20611368Sandreas.hansson@arm.com        packet.tick = long(tick)
20711368Sandreas.hansson@arm.com        packet.addr = long(addr)
20811368Sandreas.hansson@arm.com        protolib.encodeMessage(proto_out, packet)
20911368Sandreas.hansson@arm.com        tick = tick + itt
21011368Sandreas.hansson@arm.com
21111368Sandreas.hansson@arm.com    proto_out.close()
21211368Sandreas.hansson@arm.com
21311368Sandreas.hansson@arm.com# this will take a while, so keep the user informed
21412564Sgabeblack@google.comprint("Generating traces, please wait...")
21511368Sandreas.hansson@arm.com
21611368Sandreas.hansson@arm.comnxt_range = 0
21711368Sandreas.hansson@arm.comnxt_state = 0
21811368Sandreas.hansson@arm.comperiod = long(itt * (max_range / burst_size))
21911368Sandreas.hansson@arm.com
22011368Sandreas.hansson@arm.com# now we create the states for each range
22111368Sandreas.hansson@arm.comfor r in ranges:
22211368Sandreas.hansson@arm.com    filename = os.path.join(m5.options.outdir,
22311368Sandreas.hansson@arm.com                            'lat_mem_rd%d.trc.gz' % nxt_range)
22411368Sandreas.hansson@arm.com
22511368Sandreas.hansson@arm.com    if not options.reuse_trace:
22611368Sandreas.hansson@arm.com        # create the actual random trace for this range
22711368Sandreas.hansson@arm.com        create_trace(filename, r, burst_size, itt)
22811368Sandreas.hansson@arm.com
22911368Sandreas.hansson@arm.com    # the warming state
23011368Sandreas.hansson@arm.com    cfg_file.write("STATE %d %d TRACE %s 0\n" %
23111368Sandreas.hansson@arm.com                   (nxt_state, period, filename))
23211368Sandreas.hansson@arm.com    nxt_state = nxt_state + 1
23311368Sandreas.hansson@arm.com
23411368Sandreas.hansson@arm.com    # the measuring states
23511368Sandreas.hansson@arm.com    for i in range(iterations):
23611368Sandreas.hansson@arm.com        cfg_file.write("STATE %d %d TRACE %s 0\n" %
23711368Sandreas.hansson@arm.com                       (nxt_state, period, filename))
23811368Sandreas.hansson@arm.com        nxt_state = nxt_state + 1
23911368Sandreas.hansson@arm.com
24011368Sandreas.hansson@arm.com    nxt_range = nxt_range + 1
24111368Sandreas.hansson@arm.com
24211368Sandreas.hansson@arm.comcfg_file.write("INIT 0\n")
24311368Sandreas.hansson@arm.com
24411368Sandreas.hansson@arm.com# go through the states one by one
24511368Sandreas.hansson@arm.comfor state in range(1, nxt_state):
24611368Sandreas.hansson@arm.com    cfg_file.write("TRANSITION %d %d 1\n" % (state - 1, state))
24711368Sandreas.hansson@arm.com
24811368Sandreas.hansson@arm.comcfg_file.write("TRANSITION %d %d 1\n" % (nxt_state - 1, nxt_state - 1))
24911368Sandreas.hansson@arm.com
25011368Sandreas.hansson@arm.comcfg_file.close()
25111368Sandreas.hansson@arm.com
25211368Sandreas.hansson@arm.com# create a traffic generator, and point it to the file we just created
25311656Sandreas.hansson@arm.comsystem.tgen = TrafficGen(config_file = cfg_file_name,
25411656Sandreas.hansson@arm.com                         progress_check = '10s')
25511368Sandreas.hansson@arm.com
25611368Sandreas.hansson@arm.com# add a communication monitor
25711368Sandreas.hansson@arm.comsystem.monitor = CommMonitor()
25811803Srjthakur@google.comsystem.monitor.footprint = MemFootprintProbe()
25911368Sandreas.hansson@arm.com
26011368Sandreas.hansson@arm.com# connect the traffic generator to the system
26111368Sandreas.hansson@arm.comsystem.tgen.port = system.monitor.slave
26211368Sandreas.hansson@arm.com
26311368Sandreas.hansson@arm.com# create the actual cache hierarchy, for now just go with something
26411368Sandreas.hansson@arm.com# basic to explore some of the options
26511682Sandreas.hansson@arm.comfrom common.Caches import *
26611368Sandreas.hansson@arm.com
26711368Sandreas.hansson@arm.com# a starting point for an L3 cache
26811368Sandreas.hansson@arm.comclass L3Cache(Cache):
26911368Sandreas.hansson@arm.com    assoc = 16
27012550Snikos.nikoleris@arm.com    tag_latency = 20
27112550Snikos.nikoleris@arm.com    data_latency = 20
27212550Snikos.nikoleris@arm.com    sequential_access = True
27311368Sandreas.hansson@arm.com    response_latency = 40
27411368Sandreas.hansson@arm.com    mshrs = 32
27511368Sandreas.hansson@arm.com    tgts_per_mshr = 12
27611368Sandreas.hansson@arm.com    write_buffers = 16
27711368Sandreas.hansson@arm.com
27811368Sandreas.hansson@arm.com# note that everything is in the same clock domain, 2.0 GHz as
27911368Sandreas.hansson@arm.com# specified above
28011368Sandreas.hansson@arm.comsystem.l1cache = L1_DCache(size = '64kB')
28111368Sandreas.hansson@arm.comsystem.monitor.master = system.l1cache.cpu_side
28211368Sandreas.hansson@arm.com
28311368Sandreas.hansson@arm.comsystem.l2cache = L2Cache(size = '512kB', writeback_clean = True)
28411368Sandreas.hansson@arm.comsystem.l2cache.xbar = L2XBar()
28511368Sandreas.hansson@arm.comsystem.l1cache.mem_side = system.l2cache.xbar.slave
28611368Sandreas.hansson@arm.comsystem.l2cache.cpu_side = system.l2cache.xbar.master
28711368Sandreas.hansson@arm.com
28811368Sandreas.hansson@arm.com# make the L3 mostly exclusive, and correspondingly ensure that the L2
28911368Sandreas.hansson@arm.com# writes back also clean lines to the L3
29011368Sandreas.hansson@arm.comsystem.l3cache = L3Cache(size = '4MB', clusivity = 'mostly_excl')
29111368Sandreas.hansson@arm.comsystem.l3cache.xbar = L2XBar()
29211368Sandreas.hansson@arm.comsystem.l2cache.mem_side = system.l3cache.xbar.slave
29311368Sandreas.hansson@arm.comsystem.l3cache.cpu_side = system.l3cache.xbar.master
29411368Sandreas.hansson@arm.comsystem.l3cache.mem_side = system.membus.slave
29511368Sandreas.hansson@arm.com
29611368Sandreas.hansson@arm.com# connect the system port even if it is not used in this example
29711368Sandreas.hansson@arm.comsystem.system_port = system.membus.slave
29811368Sandreas.hansson@arm.com
29911368Sandreas.hansson@arm.com# every period, dump and reset all stats
30011368Sandreas.hansson@arm.comperiodicStatDump(period)
30111368Sandreas.hansson@arm.com
30211368Sandreas.hansson@arm.com# run Forrest, run!
30311368Sandreas.hansson@arm.comroot = Root(full_system = False, system = system)
30411368Sandreas.hansson@arm.comroot.system.mem_mode = 'timing'
30511368Sandreas.hansson@arm.com
30611368Sandreas.hansson@arm.comm5.instantiate()
30711368Sandreas.hansson@arm.comm5.simulate(nxt_state * period)
30811368Sandreas.hansson@arm.com
30911368Sandreas.hansson@arm.com# print all we need to make sense of the stats output
31012564Sgabeblack@google.comprint("lat_mem_rd with %d iterations, ranges:" % iterations)
31111368Sandreas.hansson@arm.comfor r in ranges:
31212564Sgabeblack@google.com    print(r)
313