lat_mem_rd.py (11368:2fd64ea0a7cb) lat_mem_rd.py (11656:02a0c6b9c057)
1# Copyright (c) 2015 ARM Limited
1# Copyright (c) 2015-2016 ARM Limited
2# All rights reserved.
3#
4# The license below extends only to copyright in the software and shall
5# not be construed as granting a license to any other intellectual
6# property including but not limited to intellectual property relating
7# to a hardware implementation of the functionality of the software
8# licensed hereunder. You may use the software subject to the license
9# terms below provided that you ensure that this notice is replicated
10# unmodified and in its entirety in all distributions of the software,
11# modified or unmodified, in source code or in binary form.
12#
13# Redistribution and use in source and binary forms, with or without
14# modification, are permitted provided that the following conditions are
15# met: redistributions of source code must retain the above copyright
16# notice, this list of conditions and the following disclaimer;
17# redistributions in binary form must reproduce the above copyright
18# notice, this list of conditions and the following disclaimer in the
19# documentation and/or other materials provided with the distribution;
20# neither the name of the copyright holders nor the names of its
21# contributors may be used to endorse or promote products derived from
22# this software without specific prior written permission.
23#
24# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35#
36# Authors: Andreas Hansson
37
38import gzip
39import optparse
40import os
41
42import m5
43from m5.objects import *
44from m5.util import addToPath
45from m5.internal.stats import periodicStatDump
46
47addToPath('../common')
48import MemConfig
49
50addToPath('../../util')
51import protolib
52
53# this script is helpful to observe the memory latency for various
54# levels in a cache hierarchy, and various cache and memory
55# configurations, in essence replicating the lmbench lat_mem_rd thrash
56# behaviour
57
58# import the packet proto definitions, and if they are not found,
59# attempt to generate them automatically
60try:
61 import packet_pb2
62except:
63 print "Did not find packet proto definitions, attempting to generate"
64 from subprocess import call
65 error = call(['protoc', '--python_out=configs/dram',
66 '--proto_path=src/proto', 'src/proto/packet.proto'])
67 if not error:
68 print "Generated packet proto definitions"
69
70 try:
71 import google.protobuf
72 except:
73 print "Please install the Python protobuf module"
74 exit(-1)
75
76 import packet_pb2
77 else:
78 print "Failed to import packet proto definitions"
79 exit(-1)
80
81parser = optparse.OptionParser()
82
83parser.add_option("--mem-type", type="choice", default="DDR3_1600_x64",
84 choices=MemConfig.mem_names(),
85 help = "type of memory to use")
86parser.add_option("--mem-size", action="store", type="string",
87 default="16MB",
88 help="Specify the memory size")
89parser.add_option("--reuse-trace", action="store_true",
90 help="Prevent generation of traces and reuse existing")
91
92(options, args) = parser.parse_args()
93
94if args:
95 print "Error: script doesn't take any positional arguments"
96 sys.exit(1)
97
98# start by creating the system itself, using a multi-layer 2.0 GHz
99# crossbar, delivering 64 bytes / 3 cycles (one header cycle) which
100# amounts to 42.7 GByte/s per layer and thus per port
101system = System(membus = SystemXBar(width = 32))
102system.clk_domain = SrcClockDomain(clock = '2.0GHz',
103 voltage_domain =
104 VoltageDomain(voltage = '1V'))
105
106mem_range = AddrRange(options.mem_size)
107system.mem_ranges = [mem_range]
108
109# do not worry about reserving space for the backing store
110system.mmap_using_noreserve = True
111
112# currently not exposed as command-line options, set here for now
113options.mem_channels = 1
114options.mem_ranks = 1
115options.external_memory_system = 0
116options.tlm_memory = 0
117options.elastic_trace_en = 0
118
119MemConfig.config_mem(options, system)
120
121# there is no point slowing things down by saving any data
122for ctrl in system.mem_ctrls:
123 ctrl.null = True
124
125 # the following assumes that we are using the native DRAM
126 # controller, check to be sure
127 if isinstance(ctrl, m5.objects.DRAMCtrl):
128 # make the DRAM refresh interval sufficiently infinite to avoid
129 # latency spikes
130 ctrl.tREFI = '100s'
131
132# use the same concept as the utilisation sweep, and print the config
133# so that we can later read it in
134cfg_file_name = os.path.join(m5.options.outdir, "lat_mem_rd.cfg")
135cfg_file = open(cfg_file_name, 'w')
136
137# set an appropriate burst length in bytes
138burst_size = 64
139system.cache_line_size = burst_size
140
141# lazy version to check if an integer is a power of two
142def is_pow2(num):
143 return num != 0 and ((num & (num - 1)) == 0)
144
145# assume we start every range at 0
146max_range = int(mem_range.end)
147
148# start at a size of 4 kByte, and go up till we hit the max, increase
149# the step every time we hit a power of two
150min_range = 4096
151ranges = [min_range]
152step = 1024
153
154while ranges[-1] < max_range:
155 new_range = ranges[-1] + step
156 if is_pow2(new_range):
157 step *= 2
158 ranges.append(new_range)
159
160# how many times to repeat the measurement for each data point
161iterations = 2
162
163# 150 ns in ticks, this is choosen to be high enough that transactions
164# do not pile up in the system, adjust if needed
165itt = 150 * 1000
166
167# for every data point, we create a trace containing a random address
168# sequence, so that we can play back the same sequence for warming and
169# the actual measurement
170def create_trace(filename, max_addr, burst_size, itt):
171 try:
172 proto_out = gzip.open(filename, 'wb')
173 except IOError:
174 print "Failed to open ", filename, " for writing"
175 exit(-1)
176
177 # write the magic number in 4-byte Little Endian, similar to what
178 # is done in src/proto/protoio.cc
179 proto_out.write("gem5")
180
181 # add the packet header
182 header = packet_pb2.PacketHeader()
183 header.obj_id = "lat_mem_rd for range 0:" + str(max_addr)
184 # assume the default tick rate (1 ps)
185 header.tick_freq = 1000000000000
186 protolib.encodeMessage(proto_out, header)
187
188 # create a list of every single address to touch
189 addrs = range(0, max_addr, burst_size)
190
191 import random
192 random.shuffle(addrs)
193
194 tick = 0
195
196 # create a packet we can re-use for all the addresses
197 packet = packet_pb2.Packet()
198 # ReadReq is 1 in src/mem/packet.hh Command enum
199 packet.cmd = 1
200 packet.size = int(burst_size)
201
202 for addr in addrs:
203 packet.tick = long(tick)
204 packet.addr = long(addr)
205 protolib.encodeMessage(proto_out, packet)
206 tick = tick + itt
207
208 proto_out.close()
209
210# this will take a while, so keep the user informed
211print "Generating traces, please wait..."
212
213nxt_range = 0
214nxt_state = 0
215period = long(itt * (max_range / burst_size))
216
217# now we create the states for each range
218for r in ranges:
219 filename = os.path.join(m5.options.outdir,
220 'lat_mem_rd%d.trc.gz' % nxt_range)
221
222 if not options.reuse_trace:
223 # create the actual random trace for this range
224 create_trace(filename, r, burst_size, itt)
225
226 # the warming state
227 cfg_file.write("STATE %d %d TRACE %s 0\n" %
228 (nxt_state, period, filename))
229 nxt_state = nxt_state + 1
230
231 # the measuring states
232 for i in range(iterations):
233 cfg_file.write("STATE %d %d TRACE %s 0\n" %
234 (nxt_state, period, filename))
235 nxt_state = nxt_state + 1
236
237 nxt_range = nxt_range + 1
238
239cfg_file.write("INIT 0\n")
240
241# go through the states one by one
242for state in range(1, nxt_state):
243 cfg_file.write("TRANSITION %d %d 1\n" % (state - 1, state))
244
245cfg_file.write("TRANSITION %d %d 1\n" % (nxt_state - 1, nxt_state - 1))
246
247cfg_file.close()
248
249# create a traffic generator, and point it to the file we just created
2# All rights reserved.
3#
4# The license below extends only to copyright in the software and shall
5# not be construed as granting a license to any other intellectual
6# property including but not limited to intellectual property relating
7# to a hardware implementation of the functionality of the software
8# licensed hereunder. You may use the software subject to the license
9# terms below provided that you ensure that this notice is replicated
10# unmodified and in its entirety in all distributions of the software,
11# modified or unmodified, in source code or in binary form.
12#
13# Redistribution and use in source and binary forms, with or without
14# modification, are permitted provided that the following conditions are
15# met: redistributions of source code must retain the above copyright
16# notice, this list of conditions and the following disclaimer;
17# redistributions in binary form must reproduce the above copyright
18# notice, this list of conditions and the following disclaimer in the
19# documentation and/or other materials provided with the distribution;
20# neither the name of the copyright holders nor the names of its
21# contributors may be used to endorse or promote products derived from
22# this software without specific prior written permission.
23#
24# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35#
36# Authors: Andreas Hansson
37
38import gzip
39import optparse
40import os
41
42import m5
43from m5.objects import *
44from m5.util import addToPath
45from m5.internal.stats import periodicStatDump
46
47addToPath('../common')
48import MemConfig
49
50addToPath('../../util')
51import protolib
52
53# this script is helpful to observe the memory latency for various
54# levels in a cache hierarchy, and various cache and memory
55# configurations, in essence replicating the lmbench lat_mem_rd thrash
56# behaviour
57
58# import the packet proto definitions, and if they are not found,
59# attempt to generate them automatically
60try:
61 import packet_pb2
62except:
63 print "Did not find packet proto definitions, attempting to generate"
64 from subprocess import call
65 error = call(['protoc', '--python_out=configs/dram',
66 '--proto_path=src/proto', 'src/proto/packet.proto'])
67 if not error:
68 print "Generated packet proto definitions"
69
70 try:
71 import google.protobuf
72 except:
73 print "Please install the Python protobuf module"
74 exit(-1)
75
76 import packet_pb2
77 else:
78 print "Failed to import packet proto definitions"
79 exit(-1)
80
81parser = optparse.OptionParser()
82
83parser.add_option("--mem-type", type="choice", default="DDR3_1600_x64",
84 choices=MemConfig.mem_names(),
85 help = "type of memory to use")
86parser.add_option("--mem-size", action="store", type="string",
87 default="16MB",
88 help="Specify the memory size")
89parser.add_option("--reuse-trace", action="store_true",
90 help="Prevent generation of traces and reuse existing")
91
92(options, args) = parser.parse_args()
93
94if args:
95 print "Error: script doesn't take any positional arguments"
96 sys.exit(1)
97
98# start by creating the system itself, using a multi-layer 2.0 GHz
99# crossbar, delivering 64 bytes / 3 cycles (one header cycle) which
100# amounts to 42.7 GByte/s per layer and thus per port
101system = System(membus = SystemXBar(width = 32))
102system.clk_domain = SrcClockDomain(clock = '2.0GHz',
103 voltage_domain =
104 VoltageDomain(voltage = '1V'))
105
106mem_range = AddrRange(options.mem_size)
107system.mem_ranges = [mem_range]
108
109# do not worry about reserving space for the backing store
110system.mmap_using_noreserve = True
111
112# currently not exposed as command-line options, set here for now
113options.mem_channels = 1
114options.mem_ranks = 1
115options.external_memory_system = 0
116options.tlm_memory = 0
117options.elastic_trace_en = 0
118
119MemConfig.config_mem(options, system)
120
121# there is no point slowing things down by saving any data
122for ctrl in system.mem_ctrls:
123 ctrl.null = True
124
125 # the following assumes that we are using the native DRAM
126 # controller, check to be sure
127 if isinstance(ctrl, m5.objects.DRAMCtrl):
128 # make the DRAM refresh interval sufficiently infinite to avoid
129 # latency spikes
130 ctrl.tREFI = '100s'
131
132# use the same concept as the utilisation sweep, and print the config
133# so that we can later read it in
134cfg_file_name = os.path.join(m5.options.outdir, "lat_mem_rd.cfg")
135cfg_file = open(cfg_file_name, 'w')
136
137# set an appropriate burst length in bytes
138burst_size = 64
139system.cache_line_size = burst_size
140
141# lazy version to check if an integer is a power of two
142def is_pow2(num):
143 return num != 0 and ((num & (num - 1)) == 0)
144
145# assume we start every range at 0
146max_range = int(mem_range.end)
147
148# start at a size of 4 kByte, and go up till we hit the max, increase
149# the step every time we hit a power of two
150min_range = 4096
151ranges = [min_range]
152step = 1024
153
154while ranges[-1] < max_range:
155 new_range = ranges[-1] + step
156 if is_pow2(new_range):
157 step *= 2
158 ranges.append(new_range)
159
160# how many times to repeat the measurement for each data point
161iterations = 2
162
163# 150 ns in ticks, this is choosen to be high enough that transactions
164# do not pile up in the system, adjust if needed
165itt = 150 * 1000
166
167# for every data point, we create a trace containing a random address
168# sequence, so that we can play back the same sequence for warming and
169# the actual measurement
170def create_trace(filename, max_addr, burst_size, itt):
171 try:
172 proto_out = gzip.open(filename, 'wb')
173 except IOError:
174 print "Failed to open ", filename, " for writing"
175 exit(-1)
176
177 # write the magic number in 4-byte Little Endian, similar to what
178 # is done in src/proto/protoio.cc
179 proto_out.write("gem5")
180
181 # add the packet header
182 header = packet_pb2.PacketHeader()
183 header.obj_id = "lat_mem_rd for range 0:" + str(max_addr)
184 # assume the default tick rate (1 ps)
185 header.tick_freq = 1000000000000
186 protolib.encodeMessage(proto_out, header)
187
188 # create a list of every single address to touch
189 addrs = range(0, max_addr, burst_size)
190
191 import random
192 random.shuffle(addrs)
193
194 tick = 0
195
196 # create a packet we can re-use for all the addresses
197 packet = packet_pb2.Packet()
198 # ReadReq is 1 in src/mem/packet.hh Command enum
199 packet.cmd = 1
200 packet.size = int(burst_size)
201
202 for addr in addrs:
203 packet.tick = long(tick)
204 packet.addr = long(addr)
205 protolib.encodeMessage(proto_out, packet)
206 tick = tick + itt
207
208 proto_out.close()
209
210# this will take a while, so keep the user informed
211print "Generating traces, please wait..."
212
213nxt_range = 0
214nxt_state = 0
215period = long(itt * (max_range / burst_size))
216
217# now we create the states for each range
218for r in ranges:
219 filename = os.path.join(m5.options.outdir,
220 'lat_mem_rd%d.trc.gz' % nxt_range)
221
222 if not options.reuse_trace:
223 # create the actual random trace for this range
224 create_trace(filename, r, burst_size, itt)
225
226 # the warming state
227 cfg_file.write("STATE %d %d TRACE %s 0\n" %
228 (nxt_state, period, filename))
229 nxt_state = nxt_state + 1
230
231 # the measuring states
232 for i in range(iterations):
233 cfg_file.write("STATE %d %d TRACE %s 0\n" %
234 (nxt_state, period, filename))
235 nxt_state = nxt_state + 1
236
237 nxt_range = nxt_range + 1
238
239cfg_file.write("INIT 0\n")
240
241# go through the states one by one
242for state in range(1, nxt_state):
243 cfg_file.write("TRANSITION %d %d 1\n" % (state - 1, state))
244
245cfg_file.write("TRANSITION %d %d 1\n" % (nxt_state - 1, nxt_state - 1))
246
247cfg_file.close()
248
249# create a traffic generator, and point it to the file we just created
250system.tgen = TrafficGen(config_file = cfg_file_name)
250system.tgen = TrafficGen(config_file = cfg_file_name,
251 progress_check = '10s')
251
252# add a communication monitor
253system.monitor = CommMonitor()
254
255# connect the traffic generator to the system
256system.tgen.port = system.monitor.slave
257
258# create the actual cache hierarchy, for now just go with something
259# basic to explore some of the options
260from Caches import *
261
262# a starting point for an L3 cache
263class L3Cache(Cache):
264 assoc = 16
265 hit_latency = 40
266 response_latency = 40
267 mshrs = 32
268 tgts_per_mshr = 12
269 write_buffers = 16
270
271# note that everything is in the same clock domain, 2.0 GHz as
272# specified above
273system.l1cache = L1_DCache(size = '64kB')
274system.monitor.master = system.l1cache.cpu_side
275
276system.l2cache = L2Cache(size = '512kB', writeback_clean = True)
277system.l2cache.xbar = L2XBar()
278system.l1cache.mem_side = system.l2cache.xbar.slave
279system.l2cache.cpu_side = system.l2cache.xbar.master
280
281# make the L3 mostly exclusive, and correspondingly ensure that the L2
282# writes back also clean lines to the L3
283system.l3cache = L3Cache(size = '4MB', clusivity = 'mostly_excl')
284system.l3cache.xbar = L2XBar()
285system.l2cache.mem_side = system.l3cache.xbar.slave
286system.l3cache.cpu_side = system.l3cache.xbar.master
287system.l3cache.mem_side = system.membus.slave
288
289# connect the system port even if it is not used in this example
290system.system_port = system.membus.slave
291
292# every period, dump and reset all stats
293periodicStatDump(period)
294
295# run Forrest, run!
296root = Root(full_system = False, system = system)
297root.system.mem_mode = 'timing'
298
299m5.instantiate()
300m5.simulate(nxt_state * period)
301
302# print all we need to make sense of the stats output
303print "lat_mem_rd with %d iterations, ranges:" % iterations
304for r in ranges:
305 print r
252
253# add a communication monitor
254system.monitor = CommMonitor()
255
256# connect the traffic generator to the system
257system.tgen.port = system.monitor.slave
258
259# create the actual cache hierarchy, for now just go with something
260# basic to explore some of the options
261from Caches import *
262
263# a starting point for an L3 cache
264class L3Cache(Cache):
265 assoc = 16
266 hit_latency = 40
267 response_latency = 40
268 mshrs = 32
269 tgts_per_mshr = 12
270 write_buffers = 16
271
272# note that everything is in the same clock domain, 2.0 GHz as
273# specified above
274system.l1cache = L1_DCache(size = '64kB')
275system.monitor.master = system.l1cache.cpu_side
276
277system.l2cache = L2Cache(size = '512kB', writeback_clean = True)
278system.l2cache.xbar = L2XBar()
279system.l1cache.mem_side = system.l2cache.xbar.slave
280system.l2cache.cpu_side = system.l2cache.xbar.master
281
282# make the L3 mostly exclusive, and correspondingly ensure that the L2
283# writes back also clean lines to the L3
284system.l3cache = L3Cache(size = '4MB', clusivity = 'mostly_excl')
285system.l3cache.xbar = L2XBar()
286system.l2cache.mem_side = system.l3cache.xbar.slave
287system.l3cache.cpu_side = system.l3cache.xbar.master
288system.l3cache.mem_side = system.membus.slave
289
290# connect the system port even if it is not used in this example
291system.system_port = system.membus.slave
292
293# every period, dump and reset all stats
294periodicStatDump(period)
295
296# run Forrest, run!
297root = Root(full_system = False, system = system)
298root.system.mem_mode = 'timing'
299
300m5.instantiate()
301m5.simulate(nxt_state * period)
302
303# print all we need to make sense of the stats output
304print "lat_mem_rd with %d iterations, ranges:" % iterations
305for r in ranges:
306 print r