MOESI_AMD_Base.py revision 12065
1#
2#  Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
3#  All rights reserved.
4#
5#  For use for simulation and test purposes only
6#
7#  Redistribution and use in source and binary forms, with or without
8#  modification, are permitted provided that the following conditions are met:
9#
10#  1. Redistributions of source code must retain the above copyright notice,
11#  this list of conditions and the following disclaimer.
12#
13#  2. Redistributions in binary form must reproduce the above copyright notice,
14#  this list of conditions and the following disclaimer in the documentation
15#  and/or other materials provided with the distribution.
16#
17#  3. Neither the name of the copyright holder nor the names of its contributors
18#  may be used to endorse or promote products derived from this software
19#  without specific prior written permission.
20#
21#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31#  POSSIBILITY OF SUCH DAMAGE.
32#
33#  Author: Lisa Hsu
34#
35
36import math
37import m5
38from m5.objects import *
39from m5.defines import buildEnv
40from Ruby import create_topology
41from Ruby import send_evicts
42
43from topologies.Cluster import Cluster
44from topologies.Crossbar import Crossbar
45
46class CntrlBase:
47    _seqs = 0
48    @classmethod
49    def seqCount(cls):
50        # Use SeqCount not class since we need global count
51        CntrlBase._seqs += 1
52        return CntrlBase._seqs - 1
53
54    _cntrls = 0
55    @classmethod
56    def cntrlCount(cls):
57        # Use CntlCount not class since we need global count
58        CntrlBase._cntrls += 1
59        return CntrlBase._cntrls - 1
60
61    _version = 0
62    @classmethod
63    def versionCount(cls):
64        cls._version += 1 # Use count for this particular type
65        return cls._version - 1
66
67class L1DCache(RubyCache):
68    resourceStalls = False
69    def create(self, options):
70        self.size = MemorySize(options.l1d_size)
71        self.assoc = options.l1d_assoc
72        self.replacement_policy = PseudoLRUReplacementPolicy()
73
74class L1ICache(RubyCache):
75    resourceStalls = False
76    def create(self, options):
77        self.size = MemorySize(options.l1i_size)
78        self.assoc = options.l1i_assoc
79        self.replacement_policy = PseudoLRUReplacementPolicy()
80
81class L2Cache(RubyCache):
82    resourceStalls = False
83    def create(self, options):
84        self.size = MemorySize(options.l2_size)
85        self.assoc = options.l2_assoc
86        self.replacement_policy = PseudoLRUReplacementPolicy()
87
88class CPCntrl(CorePair_Controller, CntrlBase):
89
90    def create(self, options, ruby_system, system):
91        self.version = self.versionCount()
92
93        self.L1Icache = L1ICache()
94        self.L1Icache.create(options)
95        self.L1D0cache = L1DCache()
96        self.L1D0cache.create(options)
97        self.L1D1cache = L1DCache()
98        self.L1D1cache.create(options)
99        self.L2cache = L2Cache()
100        self.L2cache.create(options)
101
102        self.sequencer = RubySequencer()
103        self.sequencer.icache_hit_latency = 2
104        self.sequencer.dcache_hit_latency = 2
105        self.sequencer.version = self.seqCount()
106        self.sequencer.icache = self.L1Icache
107        self.sequencer.dcache = self.L1D0cache
108        self.sequencer.ruby_system = ruby_system
109        self.sequencer.coreid = 0
110        self.sequencer.is_cpu_sequencer = True
111
112        self.sequencer1 = RubySequencer()
113        self.sequencer1.version = self.seqCount()
114        self.sequencer1.icache = self.L1Icache
115        self.sequencer1.dcache = self.L1D1cache
116        self.sequencer1.icache_hit_latency = 2
117        self.sequencer1.dcache_hit_latency = 2
118        self.sequencer1.ruby_system = ruby_system
119        self.sequencer1.coreid = 1
120        self.sequencer1.is_cpu_sequencer = True
121
122        self.issue_latency = options.cpu_to_dir_latency
123        self.send_evictions = send_evicts(options)
124
125        self.ruby_system = ruby_system
126
127        if options.recycle_latency:
128            self.recycle_latency = options.recycle_latency
129
130class L3Cache(RubyCache):
131    assoc = 8
132    dataArrayBanks = 256
133    tagArrayBanks = 256
134
135    def create(self, options, ruby_system, system):
136        self.size = MemorySize(options.l3_size)
137        self.size.value /= options.num_dirs
138        self.dataArrayBanks /= options.num_dirs
139        self.tagArrayBanks /= options.num_dirs
140        self.dataArrayBanks /= options.num_dirs
141        self.tagArrayBanks /= options.num_dirs
142        self.dataAccessLatency = options.l3_data_latency
143        self.tagAccessLatency = options.l3_tag_latency
144        self.resourceStalls = options.no_resource_stalls
145        self.replacement_policy = PseudoLRUReplacementPolicy()
146
147class L3Cntrl(L3Cache_Controller, CntrlBase):
148    def create(self, options, ruby_system, system):
149        self.version = self.versionCount()
150        self.L3cache = L3Cache()
151        self.L3cache.create(options, ruby_system, system)
152
153        self.l3_response_latency = max(self.L3cache.dataAccessLatency,
154                                       self.L3cache.tagAccessLatency)
155        self.ruby_system = ruby_system
156
157        if options.recycle_latency:
158            self.recycle_latency = options.recycle_latency
159
160    def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
161                           req_to_l3, probe_to_l3, resp_to_l3):
162        self.reqToDir = req_to_dir
163        self.respToDir = resp_to_dir
164        self.l3UnblockToDir = l3_unblock_to_dir
165        self.reqToL3 = req_to_l3
166        self.probeToL3 = probe_to_l3
167        self.respToL3 = resp_to_l3
168
169class DirCntrl(Directory_Controller, CntrlBase):
170    def create(self, options, dir_ranges, ruby_system, system):
171        self.version = self.versionCount()
172
173        self.response_latency = 30
174
175        self.addr_ranges = dir_ranges
176        self.directory = RubyDirectoryMemory()
177
178        self.L3CacheMemory = L3Cache()
179        self.L3CacheMemory.create(options, ruby_system, system)
180
181        self.l3_hit_latency = max(self.L3CacheMemory.dataAccessLatency,
182                                  self.L3CacheMemory.tagAccessLatency)
183
184        self.number_of_TBEs = options.num_tbes
185
186        self.ruby_system = ruby_system
187
188        if options.recycle_latency:
189            self.recycle_latency = options.recycle_latency
190
191        self.CPUonly = True
192
193    def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
194                           req_to_l3, probe_to_l3, resp_to_l3):
195        self.reqToDir = req_to_dir
196        self.respToDir = resp_to_dir
197        self.l3UnblockToDir = l3_unblock_to_dir
198        self.reqToL3 = req_to_l3
199        self.probeToL3 = probe_to_l3
200        self.respToL3 = resp_to_l3
201
202def define_options(parser):
203    parser.add_option("--num-subcaches", type="int", default=4)
204    parser.add_option("--l3-data-latency", type="int", default=20)
205    parser.add_option("--l3-tag-latency", type="int", default=15)
206    parser.add_option("--cpu-to-dir-latency", type="int", default=15)
207    parser.add_option("--no-resource-stalls", action="store_false",
208                      default=True)
209    parser.add_option("--num-tbes", type="int", default=256)
210    parser.add_option("--l2-latency", type="int", default=50) # load to use
211
212def create_system(options, full_system, system, dma_devices, ruby_system):
213    if buildEnv['PROTOCOL'] != 'MOESI_AMD_Base':
214        panic("This script requires the MOESI_AMD_Base protocol.")
215
216    cpu_sequencers = []
217
218    #
219    # The ruby network creation expects the list of nodes in the system to
220    # be consistent with the NetDest list.  Therefore the l1 controller
221    # nodes must be listed before the directory nodes and directory nodes
222    # before dma nodes, etc.
223    #
224    l1_cntrl_nodes = []
225    l3_cntrl_nodes = []
226    dir_cntrl_nodes = []
227
228    control_count = 0
229
230    #
231    # Must create the individual controllers before the network to ensure
232    # the controller constructors are called before the network constructor
233    #
234
235    # This is the base crossbar that connects the L3s, Dirs, and cpu
236    # Cluster
237    mainCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s
238
239    if options.numa_high_bit:
240        numa_bit = options.numa_high_bit
241    else:
242        # if the numa_bit is not specified, set the directory bits as the
243        # lowest bits above the block offset bits, and the numa_bit as the
244        # highest of those directory bits
245        dir_bits = int(math.log(options.num_dirs, 2))
246        block_size_bits = int(math.log(options.cacheline_size, 2))
247        numa_bit = block_size_bits + dir_bits - 1
248
249    for i in xrange(options.num_dirs):
250        dir_ranges = []
251        for r in system.mem_ranges:
252            addr_range = m5.objects.AddrRange(r.start, size = r.size(),
253                                              intlvHighBit = numa_bit,
254                                              intlvBits = dir_bits,
255                                              intlvMatch = i)
256            dir_ranges.append(addr_range)
257
258
259        dir_cntrl = DirCntrl(TCC_select_num_bits = 0)
260        dir_cntrl.create(options, dir_ranges, ruby_system, system)
261
262        # Connect the Directory controller to the ruby network
263        dir_cntrl.requestFromCores = MessageBuffer(ordered = True)
264        dir_cntrl.requestFromCores.slave = ruby_system.network.master
265
266        dir_cntrl.responseFromCores = MessageBuffer()
267        dir_cntrl.responseFromCores.slave = ruby_system.network.master
268
269        dir_cntrl.unblockFromCores = MessageBuffer()
270        dir_cntrl.unblockFromCores.slave = ruby_system.network.master
271
272        dir_cntrl.probeToCore = MessageBuffer()
273        dir_cntrl.probeToCore.master = ruby_system.network.slave
274
275        dir_cntrl.responseToCore = MessageBuffer()
276        dir_cntrl.responseToCore.master = ruby_system.network.slave
277
278        dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
279        dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
280        dir_cntrl.responseFromMemory = MessageBuffer()
281
282        exec("system.dir_cntrl%d = dir_cntrl" % i)
283        dir_cntrl_nodes.append(dir_cntrl)
284
285        mainCluster.add(dir_cntrl)
286
287    # Technically this config can support an odd number of cpus, but the top
288    # level config files, such as the ruby_random_tester, will get confused if
289    # the number of cpus does not equal the number of sequencers.  Thus make
290    # sure that an even number of cpus is specified.
291    assert((options.num_cpus % 2) == 0)
292
293    # For an odd number of CPUs, still create the right number of controllers
294    cpuCluster = Cluster(extBW = 512, intBW = 512)  # 1 TB/s
295    for i in xrange((options.num_cpus + 1) / 2):
296
297        cp_cntrl = CPCntrl()
298        cp_cntrl.create(options, ruby_system, system)
299
300        exec("system.cp_cntrl%d = cp_cntrl" % i)
301        #
302        # Add controllers and sequencers to the appropriate lists
303        #
304        cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1])
305
306        # Connect the CP controllers and the network
307        cp_cntrl.requestFromCore = MessageBuffer()
308        cp_cntrl.requestFromCore.master = ruby_system.network.slave
309
310        cp_cntrl.responseFromCore = MessageBuffer()
311        cp_cntrl.responseFromCore.master = ruby_system.network.slave
312
313        cp_cntrl.unblockFromCore = MessageBuffer()
314        cp_cntrl.unblockFromCore.master = ruby_system.network.slave
315
316        cp_cntrl.probeToCore = MessageBuffer()
317        cp_cntrl.probeToCore.slave = ruby_system.network.master
318
319        cp_cntrl.responseToCore = MessageBuffer()
320        cp_cntrl.responseToCore.slave = ruby_system.network.master
321
322        cp_cntrl.mandatoryQueue = MessageBuffer()
323        cp_cntrl.triggerQueue = MessageBuffer(ordered = True)
324
325        cpuCluster.add(cp_cntrl)
326
327    # Assuming no DMA devices
328    assert(len(dma_devices) == 0)
329
330    # Add cpu/gpu clusters to main cluster
331    mainCluster.add(cpuCluster)
332
333    ruby_system.network.number_of_virtual_networks = 10
334
335    return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
336