1# Copyright (c) 2015-2016 ARM Limited
2# All rights reserved.
3#
4# The license below extends only to copyright in the software and shall
5# not be construed as granting a license to any other intellectual
6# property including but not limited to intellectual property relating
7# to a hardware implementation of the functionality of the software
8# licensed hereunder.  You may use the software subject to the license
9# terms below provided that you ensure that this notice is replicated
10# unmodified and in its entirety in all distributions of the software,
11# modified or unmodified, in source code or in binary form.
12#
13# Redistribution and use in source and binary forms, with or without
14# modification, are permitted provided that the following conditions are
15# met: redistributions of source code must retain the above copyright
16# notice, this list of conditions and the following disclaimer;
17# redistributions in binary form must reproduce the above copyright
18# notice, this list of conditions and the following disclaimer in the
19# documentation and/or other materials provided with the distribution;
20# neither the name of the copyright holders nor the names of its
21# contributors may be used to endorse or promote products derived from
22# this software without specific prior written permission.
23#
24# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35#
36# Authors: Andreas Hansson
37
38from __future__ import print_function
39from __future__ import absolute_import
40
41import gzip
42import optparse
43import os
44
45import m5
46from m5.objects import *
47from m5.util import addToPath
48from m5.stats import periodicStatDump
49
50addToPath('../')
51from common import MemConfig
52
53addToPath('../../util')
54import protolib
55
56# this script is helpful to observe the memory latency for various
57# levels in a cache hierarchy, and various cache and memory
58# configurations, in essence replicating the lmbench lat_mem_rd thrash
59# behaviour
60
61# import the packet proto definitions, and if they are not found,
62# attempt to generate them automatically
63try:
64    import packet_pb2
65except:
66    print("Did not find packet proto definitions, attempting to generate")
67    from subprocess import call
68    error = call(['protoc', '--python_out=configs/dram',
69                  '--proto_path=src/proto', 'src/proto/packet.proto'])
70    if not error:
71        print("Generated packet proto definitions")
72
73        try:
74            import google.protobuf
75        except:
76            print("Please install the Python protobuf module")
77            exit(-1)
78
79        import packet_pb2
80    else:
81        print("Failed to import packet proto definitions")
82        exit(-1)
83
84parser = optparse.OptionParser()
85
86parser.add_option("--mem-type", type="choice", default="DDR3_1600_8x8",
87                  choices=MemConfig.mem_names(),
88                  help = "type of memory to use")
89parser.add_option("--mem-size", action="store", type="string",
90                  default="16MB",
91                  help="Specify the memory size")
92parser.add_option("--reuse-trace", action="store_true",
93                  help="Prevent generation of traces and reuse existing")
94
95(options, args) = parser.parse_args()
96
97if args:
98    print("Error: script doesn't take any positional arguments")
99    sys.exit(1)
100
101# start by creating the system itself, using a multi-layer 2.0 GHz
102# crossbar, delivering 64 bytes / 3 cycles (one header cycle) which
103# amounts to 42.7 GByte/s per layer and thus per port
104system = System(membus = SystemXBar(width = 32))
105system.clk_domain = SrcClockDomain(clock = '2.0GHz',
106                                   voltage_domain =
107                                   VoltageDomain(voltage = '1V'))
108
109mem_range = AddrRange(options.mem_size)
110system.mem_ranges = [mem_range]
111
112# do not worry about reserving space for the backing store
113system.mmap_using_noreserve = True
114
115# currently not exposed as command-line options, set here for now
116options.mem_channels = 1
117options.mem_ranks = 1
118options.external_memory_system = 0
119options.tlm_memory = 0
120options.elastic_trace_en = 0
121
122MemConfig.config_mem(options, system)
123
124# there is no point slowing things down by saving any data
125for ctrl in system.mem_ctrls:
126    ctrl.null = True
127
128    # the following assumes that we are using the native DRAM
129    # controller, check to be sure
130    if isinstance(ctrl, m5.objects.DRAMCtrl):
131        # make the DRAM refresh interval sufficiently infinite to avoid
132        # latency spikes
133        ctrl.tREFI = '100s'
134
135# use the same concept as the utilisation sweep, and print the config
136# so that we can later read it in
137cfg_file_name = os.path.join(m5.options.outdir, "lat_mem_rd.cfg")
138cfg_file = open(cfg_file_name, 'w')
139
140# set an appropriate burst length in bytes
141burst_size = 64
142system.cache_line_size = burst_size
143
144# lazy version to check if an integer is a power of two
145def is_pow2(num):
146    return num != 0 and ((num & (num - 1)) == 0)
147
148# assume we start every range at 0
149max_range = int(mem_range.end)
150
151# start at a size of 4 kByte, and go up till we hit the max, increase
152# the step every time we hit a power of two
153min_range = 4096
154ranges = [min_range]
155step = 1024
156
157while ranges[-1] < max_range:
158    new_range = ranges[-1] + step
159    if is_pow2(new_range):
160        step *= 2
161    ranges.append(new_range)
162
163# how many times to repeat the measurement for each data point
164iterations = 2
165
166# 150 ns in ticks, this is choosen to be high enough that transactions
167# do not pile up in the system, adjust if needed
168itt = 150 * 1000
169
170# for every data point, we create a trace containing a random address
171# sequence, so that we can play back the same sequence for warming and
172# the actual measurement
173def create_trace(filename, max_addr, burst_size, itt):
174    try:
175        proto_out = gzip.open(filename, 'wb')
176    except IOError:
177        print("Failed to open ", filename, " for writing")
178        exit(-1)
179
180    # write the magic number in 4-byte Little Endian, similar to what
181    # is done in src/proto/protoio.cc
182    proto_out.write("gem5")
183
184    # add the packet header
185    header = packet_pb2.PacketHeader()
186    header.obj_id = "lat_mem_rd for range 0:" + str(max_addr)
187    # assume the default tick rate (1 ps)
188    header.tick_freq = 1000000000000
189    protolib.encodeMessage(proto_out, header)
190
191    # create a list of every single address to touch
192    addrs = list(range(0, max_addr, burst_size))
193
194    import random
195    random.shuffle(addrs)
196
197    tick = 0
198
199    # create a packet we can re-use for all the addresses
200    packet = packet_pb2.Packet()
201    # ReadReq is 1 in src/mem/packet.hh Command enum
202    packet.cmd = 1
203    packet.size = int(burst_size)
204
205    for addr in addrs:
206        packet.tick = long(tick)
207        packet.addr = long(addr)
208        protolib.encodeMessage(proto_out, packet)
209        tick = tick + itt
210
211    proto_out.close()
212
213# this will take a while, so keep the user informed
214print("Generating traces, please wait...")
215
216nxt_range = 0
217nxt_state = 0
218period = long(itt * (max_range / burst_size))
219
220# now we create the states for each range
221for r in ranges:
222    filename = os.path.join(m5.options.outdir,
223                            'lat_mem_rd%d.trc.gz' % nxt_range)
224
225    if not options.reuse_trace:
226        # create the actual random trace for this range
227        create_trace(filename, r, burst_size, itt)
228
229    # the warming state
230    cfg_file.write("STATE %d %d TRACE %s 0\n" %
231                   (nxt_state, period, filename))
232    nxt_state = nxt_state + 1
233
234    # the measuring states
235    for i in range(iterations):
236        cfg_file.write("STATE %d %d TRACE %s 0\n" %
237                       (nxt_state, period, filename))
238        nxt_state = nxt_state + 1
239
240    nxt_range = nxt_range + 1
241
242cfg_file.write("INIT 0\n")
243
244# go through the states one by one
245for state in range(1, nxt_state):
246    cfg_file.write("TRANSITION %d %d 1\n" % (state - 1, state))
247
248cfg_file.write("TRANSITION %d %d 1\n" % (nxt_state - 1, nxt_state - 1))
249
250cfg_file.close()
251
252# create a traffic generator, and point it to the file we just created
253system.tgen = TrafficGen(config_file = cfg_file_name,
254                         progress_check = '10s')
255
256# add a communication monitor
257system.monitor = CommMonitor()
258system.monitor.footprint = MemFootprintProbe()
259
260# connect the traffic generator to the system
261system.tgen.port = system.monitor.slave
262
263# create the actual cache hierarchy, for now just go with something
264# basic to explore some of the options
265from common.Caches import *
266
267# a starting point for an L3 cache
268class L3Cache(Cache):
269    assoc = 16
270    tag_latency = 20
271    data_latency = 20
272    sequential_access = True
273    response_latency = 40
274    mshrs = 32
275    tgts_per_mshr = 12
276    write_buffers = 16
277
278# note that everything is in the same clock domain, 2.0 GHz as
279# specified above
280system.l1cache = L1_DCache(size = '64kB')
281system.monitor.master = system.l1cache.cpu_side
282
283system.l2cache = L2Cache(size = '512kB', writeback_clean = True)
284system.l2cache.xbar = L2XBar()
285system.l1cache.mem_side = system.l2cache.xbar.slave
286system.l2cache.cpu_side = system.l2cache.xbar.master
287
288# make the L3 mostly exclusive, and correspondingly ensure that the L2
289# writes back also clean lines to the L3
290system.l3cache = L3Cache(size = '4MB', clusivity = 'mostly_excl')
291system.l3cache.xbar = L2XBar()
292system.l2cache.mem_side = system.l3cache.xbar.slave
293system.l3cache.cpu_side = system.l3cache.xbar.master
294system.l3cache.mem_side = system.membus.slave
295
296# connect the system port even if it is not used in this example
297system.system_port = system.membus.slave
298
299# every period, dump and reset all stats
300periodicStatDump(period)
301
302# run Forrest, run!
303root = Root(full_system = False, system = system)
304root.system.mem_mode = 'timing'
305
306m5.instantiate()
307m5.simulate(nxt_state * period)
308
309# print all we need to make sense of the stats output
310print("lat_mem_rd with %d iterations, ranges:" % iterations)
311for r in ranges:
312    print(r)
313