MOESI_AMD_Base.py revision 12598
1#
2#  Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
3#  All rights reserved.
4#
5#  For use for simulation and test purposes only
6#
7#  Redistribution and use in source and binary forms, with or without
8#  modification, are permitted provided that the following conditions are met:
9#
10#  1. Redistributions of source code must retain the above copyright notice,
11#  this list of conditions and the following disclaimer.
12#
13#  2. Redistributions in binary form must reproduce the above copyright notice,
14#  this list of conditions and the following disclaimer in the documentation
15#  and/or other materials provided with the distribution.
16#
17#  3. Neither the name of the copyright holder nor the names of its contributors
18#  may be used to endorse or promote products derived from this software
19#  without specific prior written permission.
20#
21#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31#  POSSIBILITY OF SUCH DAMAGE.
32#
33#  Author: Lisa Hsu
34#
35
36import math
37import m5
38from m5.objects import *
39from m5.defines import buildEnv
40from Ruby import create_topology
41from Ruby import send_evicts
42
43from topologies.Cluster import Cluster
44from topologies.Crossbar import Crossbar
45
46class CntrlBase:
47    _seqs = 0
48    @classmethod
49    def seqCount(cls):
50        # Use SeqCount not class since we need global count
51        CntrlBase._seqs += 1
52        return CntrlBase._seqs - 1
53
54    _cntrls = 0
55    @classmethod
56    def cntrlCount(cls):
57        # Use CntlCount not class since we need global count
58        CntrlBase._cntrls += 1
59        return CntrlBase._cntrls - 1
60
61    _version = 0
62    @classmethod
63    def versionCount(cls):
64        cls._version += 1 # Use count for this particular type
65        return cls._version - 1
66
67class L1DCache(RubyCache):
68    resourceStalls = False
69    def create(self, options):
70        self.size = MemorySize(options.l1d_size)
71        self.assoc = options.l1d_assoc
72        self.replacement_policy = PseudoLRUReplacementPolicy()
73
74class L1ICache(RubyCache):
75    resourceStalls = False
76    def create(self, options):
77        self.size = MemorySize(options.l1i_size)
78        self.assoc = options.l1i_assoc
79        self.replacement_policy = PseudoLRUReplacementPolicy()
80
81class L2Cache(RubyCache):
82    resourceStalls = False
83    def create(self, options):
84        self.size = MemorySize(options.l2_size)
85        self.assoc = options.l2_assoc
86        self.replacement_policy = PseudoLRUReplacementPolicy()
87
88class CPCntrl(CorePair_Controller, CntrlBase):
89
90    def create(self, options, ruby_system, system):
91        self.version = self.versionCount()
92
93        self.L1Icache = L1ICache()
94        self.L1Icache.create(options)
95        self.L1D0cache = L1DCache()
96        self.L1D0cache.create(options)
97        self.L1D1cache = L1DCache()
98        self.L1D1cache.create(options)
99        self.L2cache = L2Cache()
100        self.L2cache.create(options)
101
102        self.sequencer = RubySequencer()
103        self.sequencer.icache_hit_latency = 2
104        self.sequencer.dcache_hit_latency = 2
105        self.sequencer.version = self.seqCount()
106        self.sequencer.icache = self.L1Icache
107        self.sequencer.dcache = self.L1D0cache
108        self.sequencer.ruby_system = ruby_system
109        self.sequencer.coreid = 0
110        self.sequencer.is_cpu_sequencer = True
111
112        self.sequencer1 = RubySequencer()
113        self.sequencer1.version = self.seqCount()
114        self.sequencer1.icache = self.L1Icache
115        self.sequencer1.dcache = self.L1D1cache
116        self.sequencer1.icache_hit_latency = 2
117        self.sequencer1.dcache_hit_latency = 2
118        self.sequencer1.ruby_system = ruby_system
119        self.sequencer1.coreid = 1
120        self.sequencer1.is_cpu_sequencer = True
121
122        self.issue_latency = options.cpu_to_dir_latency
123        self.send_evictions = send_evicts(options)
124
125        self.ruby_system = ruby_system
126
127        if options.recycle_latency:
128            self.recycle_latency = options.recycle_latency
129
130class L3Cache(RubyCache):
131    assoc = 8
132    dataArrayBanks = 256
133    tagArrayBanks = 256
134
135    def create(self, options, ruby_system, system):
136        self.size = MemorySize(options.l3_size)
137        self.size.value /= options.num_dirs
138        self.dataArrayBanks /= options.num_dirs
139        self.tagArrayBanks /= options.num_dirs
140        self.dataArrayBanks /= options.num_dirs
141        self.tagArrayBanks /= options.num_dirs
142        self.dataAccessLatency = options.l3_data_latency
143        self.tagAccessLatency = options.l3_tag_latency
144        self.resourceStalls = options.no_resource_stalls
145        self.replacement_policy = PseudoLRUReplacementPolicy()
146
147class L3Cntrl(L3Cache_Controller, CntrlBase):
148    def create(self, options, ruby_system, system):
149        self.version = self.versionCount()
150        self.L3cache = L3Cache()
151        self.L3cache.create(options, ruby_system, system)
152
153        self.l3_response_latency = max(self.L3cache.dataAccessLatency,
154                                       self.L3cache.tagAccessLatency)
155        self.ruby_system = ruby_system
156
157        if options.recycle_latency:
158            self.recycle_latency = options.recycle_latency
159
160    def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
161                           req_to_l3, probe_to_l3, resp_to_l3):
162        self.reqToDir = req_to_dir
163        self.respToDir = resp_to_dir
164        self.l3UnblockToDir = l3_unblock_to_dir
165        self.reqToL3 = req_to_l3
166        self.probeToL3 = probe_to_l3
167        self.respToL3 = resp_to_l3
168
169class DirCntrl(Directory_Controller, CntrlBase):
170    def create(self, options, dir_ranges, ruby_system, system):
171        self.version = self.versionCount()
172
173        self.response_latency = 30
174
175        self.addr_ranges = dir_ranges
176        self.directory = RubyDirectoryMemory()
177
178        self.L3CacheMemory = L3Cache()
179        self.L3CacheMemory.create(options, ruby_system, system)
180
181        self.l3_hit_latency = max(self.L3CacheMemory.dataAccessLatency,
182                                  self.L3CacheMemory.tagAccessLatency)
183
184        self.number_of_TBEs = options.num_tbes
185
186        self.ruby_system = ruby_system
187
188        if options.recycle_latency:
189            self.recycle_latency = options.recycle_latency
190
191        self.CPUonly = True
192
193    def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
194                           req_to_l3, probe_to_l3, resp_to_l3):
195        self.reqToDir = req_to_dir
196        self.respToDir = resp_to_dir
197        self.l3UnblockToDir = l3_unblock_to_dir
198        self.reqToL3 = req_to_l3
199        self.probeToL3 = probe_to_l3
200        self.respToL3 = resp_to_l3
201
202def define_options(parser):
203    parser.add_option("--num-subcaches", type="int", default=4)
204    parser.add_option("--l3-data-latency", type="int", default=20)
205    parser.add_option("--l3-tag-latency", type="int", default=15)
206    parser.add_option("--cpu-to-dir-latency", type="int", default=15)
207    parser.add_option("--no-resource-stalls", action="store_false",
208                      default=True)
209    parser.add_option("--num-tbes", type="int", default=256)
210    parser.add_option("--l2-latency", type="int", default=50) # load to use
211
212def create_system(options, full_system, system, dma_devices, bootmem,
213                  ruby_system):
214    if buildEnv['PROTOCOL'] != 'MOESI_AMD_Base':
215        panic("This script requires the MOESI_AMD_Base protocol.")
216
217    cpu_sequencers = []
218
219    #
220    # The ruby network creation expects the list of nodes in the system to
221    # be consistent with the NetDest list.  Therefore the l1 controller
222    # nodes must be listed before the directory nodes and directory nodes
223    # before dma nodes, etc.
224    #
225    l1_cntrl_nodes = []
226    l3_cntrl_nodes = []
227    dir_cntrl_nodes = []
228
229    control_count = 0
230
231    #
232    # Must create the individual controllers before the network to ensure
233    # the controller constructors are called before the network constructor
234    #
235
236    # This is the base crossbar that connects the L3s, Dirs, and cpu
237    # Cluster
238    mainCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s
239
240    if options.numa_high_bit:
241        numa_bit = options.numa_high_bit
242    else:
243        # if the numa_bit is not specified, set the directory bits as the
244        # lowest bits above the block offset bits, and the numa_bit as the
245        # highest of those directory bits
246        dir_bits = int(math.log(options.num_dirs, 2))
247        block_size_bits = int(math.log(options.cacheline_size, 2))
248        numa_bit = block_size_bits + dir_bits - 1
249
250    for i in xrange(options.num_dirs):
251        dir_ranges = []
252        for r in system.mem_ranges:
253            addr_range = m5.objects.AddrRange(r.start, size = r.size(),
254                                              intlvHighBit = numa_bit,
255                                              intlvBits = dir_bits,
256                                              intlvMatch = i)
257            dir_ranges.append(addr_range)
258
259
260        dir_cntrl = DirCntrl(TCC_select_num_bits = 0)
261        dir_cntrl.create(options, dir_ranges, ruby_system, system)
262
263        # Connect the Directory controller to the ruby network
264        dir_cntrl.requestFromCores = MessageBuffer(ordered = True)
265        dir_cntrl.requestFromCores.slave = ruby_system.network.master
266
267        dir_cntrl.responseFromCores = MessageBuffer()
268        dir_cntrl.responseFromCores.slave = ruby_system.network.master
269
270        dir_cntrl.unblockFromCores = MessageBuffer()
271        dir_cntrl.unblockFromCores.slave = ruby_system.network.master
272
273        dir_cntrl.probeToCore = MessageBuffer()
274        dir_cntrl.probeToCore.master = ruby_system.network.slave
275
276        dir_cntrl.responseToCore = MessageBuffer()
277        dir_cntrl.responseToCore.master = ruby_system.network.slave
278
279        dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
280        dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
281        dir_cntrl.responseFromMemory = MessageBuffer()
282
283        exec("system.dir_cntrl%d = dir_cntrl" % i)
284        dir_cntrl_nodes.append(dir_cntrl)
285
286        mainCluster.add(dir_cntrl)
287
288    # Technically this config can support an odd number of cpus, but the top
289    # level config files, such as the ruby_random_tester, will get confused if
290    # the number of cpus does not equal the number of sequencers.  Thus make
291    # sure that an even number of cpus is specified.
292    assert((options.num_cpus % 2) == 0)
293
294    # For an odd number of CPUs, still create the right number of controllers
295    cpuCluster = Cluster(extBW = 512, intBW = 512)  # 1 TB/s
296    for i in xrange((options.num_cpus + 1) / 2):
297
298        cp_cntrl = CPCntrl()
299        cp_cntrl.create(options, ruby_system, system)
300
301        exec("system.cp_cntrl%d = cp_cntrl" % i)
302        #
303        # Add controllers and sequencers to the appropriate lists
304        #
305        cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1])
306
307        # Connect the CP controllers and the network
308        cp_cntrl.requestFromCore = MessageBuffer()
309        cp_cntrl.requestFromCore.master = ruby_system.network.slave
310
311        cp_cntrl.responseFromCore = MessageBuffer()
312        cp_cntrl.responseFromCore.master = ruby_system.network.slave
313
314        cp_cntrl.unblockFromCore = MessageBuffer()
315        cp_cntrl.unblockFromCore.master = ruby_system.network.slave
316
317        cp_cntrl.probeToCore = MessageBuffer()
318        cp_cntrl.probeToCore.slave = ruby_system.network.master
319
320        cp_cntrl.responseToCore = MessageBuffer()
321        cp_cntrl.responseToCore.slave = ruby_system.network.master
322
323        cp_cntrl.mandatoryQueue = MessageBuffer()
324        cp_cntrl.triggerQueue = MessageBuffer(ordered = True)
325
326        cpuCluster.add(cp_cntrl)
327
328    # Assuming no DMA devices
329    assert(len(dma_devices) == 0)
330
331    # Add cpu/gpu clusters to main cluster
332    mainCluster.add(cpuCluster)
333
334    ruby_system.network.number_of_virtual_networks = 10
335
336    return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
337