GPU_VIPER.py revision 13980:62a28c423e91
1# Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
2# All rights reserved.
3#
4# For use for simulation and test purposes only
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are met:
8#
9# 1. Redistributions of source code must retain the above copyright notice,
10# this list of conditions and the following disclaimer.
11#
12# 2. Redistributions in binary form must reproduce the above copyright notice,
13# this list of conditions and the following disclaimer in the documentation
14# and/or other materials provided with the distribution.
15#
16# 3. Neither the name of the copyright holder nor the names of its
17# contributors may be used to endorse or promote products derived from this
18# software without specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30# POSSIBILITY OF SUCH DAMAGE.
31#
32# Authors: Lisa Hsu
33
34import math
35import m5
36from m5.objects import *
37from m5.defines import buildEnv
38from m5.util import addToPath
39from Ruby import create_topology
40from Ruby import send_evicts
41
42addToPath('../')
43
44from topologies.Cluster import Cluster
45from topologies.Crossbar import Crossbar
46
47class CntrlBase:
48    _seqs = 0
49    @classmethod
50    def seqCount(cls):
51        # Use SeqCount not class since we need global count
52        CntrlBase._seqs += 1
53        return CntrlBase._seqs - 1
54
55    _cntrls = 0
56    @classmethod
57    def cntrlCount(cls):
58        # Use CntlCount not class since we need global count
59        CntrlBase._cntrls += 1
60        return CntrlBase._cntrls - 1
61
62    _version = 0
63    @classmethod
64    def versionCount(cls):
65        cls._version += 1 # Use count for this particular type
66        return cls._version - 1
67
68class L1Cache(RubyCache):
69    resourceStalls = False
70    dataArrayBanks = 2
71    tagArrayBanks = 2
72    dataAccessLatency = 1
73    tagAccessLatency = 1
74    def create(self, size, assoc, options):
75        self.size = MemorySize(size)
76        self.assoc = assoc
77        self.replacement_policy = PseudoLRUReplacementPolicy()
78
79class L2Cache(RubyCache):
80    resourceStalls = False
81    assoc = 16
82    dataArrayBanks = 16
83    tagArrayBanks = 16
84    def create(self, size, assoc, options):
85        self.size = MemorySize(size)
86        self.assoc = assoc
87        self.replacement_policy = PseudoLRUReplacementPolicy()
88
89class CPCntrl(CorePair_Controller, CntrlBase):
90
91    def create(self, options, ruby_system, system):
92        self.version = self.versionCount()
93
94        self.L1Icache = L1Cache()
95        self.L1Icache.create(options.l1i_size, options.l1i_assoc, options)
96        self.L1D0cache = L1Cache()
97        self.L1D0cache.create(options.l1d_size, options.l1d_assoc, options)
98        self.L1D1cache = L1Cache()
99        self.L1D1cache.create(options.l1d_size, options.l1d_assoc, options)
100        self.L2cache = L2Cache()
101        self.L2cache.create(options.l2_size, options.l2_assoc, options)
102
103        self.sequencer = RubySequencer()
104        self.sequencer.version = self.seqCount()
105        self.sequencer.icache = self.L1Icache
106        self.sequencer.dcache = self.L1D0cache
107        self.sequencer.ruby_system = ruby_system
108        self.sequencer.coreid = 0
109        self.sequencer.is_cpu_sequencer = True
110
111        self.sequencer1 = RubySequencer()
112        self.sequencer1.version = self.seqCount()
113        self.sequencer1.icache = self.L1Icache
114        self.sequencer1.dcache = self.L1D1cache
115        self.sequencer1.ruby_system = ruby_system
116        self.sequencer1.coreid = 1
117        self.sequencer1.is_cpu_sequencer = True
118
119        self.issue_latency = options.cpu_to_dir_latency
120        self.send_evictions = send_evicts(options)
121
122        self.ruby_system = ruby_system
123
124        if options.recycle_latency:
125            self.recycle_latency = options.recycle_latency
126
127class TCPCache(RubyCache):
128    size = "16kB"
129    assoc = 16
130    dataArrayBanks = 16 #number of data banks
131    tagArrayBanks = 16  #number of tag banks
132    dataAccessLatency = 4
133    tagAccessLatency = 1
134    def create(self, options):
135        self.size = MemorySize(options.tcp_size)
136        self.assoc = options.tcp_assoc
137        self.resourceStalls = options.no_tcc_resource_stalls
138        self.replacement_policy = PseudoLRUReplacementPolicy()
139
140class TCPCntrl(TCP_Controller, CntrlBase):
141
142    def create(self, options, ruby_system, system):
143        self.version = self.versionCount()
144
145        self.L1cache = TCPCache(tagAccessLatency = options.TCP_latency,
146                                dataAccessLatency = options.TCP_latency)
147        self.L1cache.resourceStalls = options.no_resource_stalls
148        self.L1cache.create(options)
149        self.issue_latency = 1
150
151        self.coalescer = VIPERCoalescer()
152        self.coalescer.version = self.seqCount()
153        self.coalescer.icache = self.L1cache
154        self.coalescer.dcache = self.L1cache
155        self.coalescer.ruby_system = ruby_system
156        self.coalescer.support_inst_reqs = False
157        self.coalescer.is_cpu_sequencer = False
158
159        self.sequencer = RubySequencer()
160        self.sequencer.version = self.seqCount()
161        self.sequencer.icache = self.L1cache
162        self.sequencer.dcache = self.L1cache
163        self.sequencer.ruby_system = ruby_system
164        self.sequencer.is_cpu_sequencer = True
165
166        self.use_seq_not_coal = False
167
168        self.ruby_system = ruby_system
169
170        if options.recycle_latency:
171            self.recycle_latency = options.recycle_latency
172
173    def createCP(self, options, ruby_system, system):
174        self.version = self.versionCount()
175
176        self.L1cache = TCPCache(tagAccessLatency = options.TCP_latency,
177                                dataAccessLatency = options.TCP_latency)
178        self.L1cache.resourceStalls = options.no_resource_stalls
179        self.L1cache.create(options)
180        self.issue_latency = 1
181
182        self.coalescer = VIPERCoalescer()
183        self.coalescer.version = self.seqCount()
184        self.coalescer.icache = self.L1cache
185        self.coalescer.dcache = self.L1cache
186        self.coalescer.ruby_system = ruby_system
187        self.coalescer.support_inst_reqs = False
188        self.coalescer.is_cpu_sequencer = False
189
190        self.sequencer = RubySequencer()
191        self.sequencer.version = self.seqCount()
192        self.sequencer.icache = self.L1cache
193        self.sequencer.dcache = self.L1cache
194        self.sequencer.ruby_system = ruby_system
195        self.sequencer.is_cpu_sequencer = True
196
197        self.use_seq_not_coal = True
198
199        self.ruby_system = ruby_system
200
201        if options.recycle_latency:
202            self.recycle_latency = options.recycle_latency
203
204class SQCCache(RubyCache):
205    dataArrayBanks = 8
206    tagArrayBanks = 8
207    dataAccessLatency = 1
208    tagAccessLatency = 1
209
210    def create(self, options):
211        self.size = MemorySize(options.sqc_size)
212        self.assoc = options.sqc_assoc
213        self.replacement_policy = PseudoLRUReplacementPolicy()
214
215class SQCCntrl(SQC_Controller, CntrlBase):
216
217    def create(self, options, ruby_system, system):
218        self.version = self.versionCount()
219
220        self.L1cache = SQCCache()
221        self.L1cache.create(options)
222        self.L1cache.resourceStalls = options.no_resource_stalls
223
224        self.sequencer = RubySequencer()
225
226        self.sequencer.version = self.seqCount()
227        self.sequencer.icache = self.L1cache
228        self.sequencer.dcache = self.L1cache
229        self.sequencer.ruby_system = ruby_system
230        self.sequencer.support_data_reqs = False
231        self.sequencer.is_cpu_sequencer = False
232
233        self.ruby_system = ruby_system
234
235        if options.recycle_latency:
236            self.recycle_latency = options.recycle_latency
237
238class TCC(RubyCache):
239    size = MemorySize("256kB")
240    assoc = 16
241    dataAccessLatency = 8
242    tagAccessLatency = 2
243    resourceStalls = True
244    def create(self, options):
245        self.assoc = options.tcc_assoc
246        if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
247          s = options.num_compute_units
248          tcc_size = s * 128
249          tcc_size = str(tcc_size)+'kB'
250          self.size = MemorySize(tcc_size)
251          self.dataArrayBanks = 64
252          self.tagArrayBanks = 64
253        else:
254          self.size = MemorySize(options.tcc_size)
255          self.dataArrayBanks = 256 / options.num_tccs #number of data banks
256          self.tagArrayBanks = 256 / options.num_tccs #number of tag banks
257        self.size.value = self.size.value / options.num_tccs
258        if ((self.size.value / long(self.assoc)) < 128):
259            self.size.value = long(128 * self.assoc)
260        self.start_index_bit = math.log(options.cacheline_size, 2) + \
261                               math.log(options.num_tccs, 2)
262        self.replacement_policy = PseudoLRUReplacementPolicy()
263
264
265class TCCCntrl(TCC_Controller, CntrlBase):
266    def create(self, options, ruby_system, system):
267        self.version = self.versionCount()
268        self.L2cache = TCC()
269        self.L2cache.create(options)
270        self.L2cache.resourceStalls = options.no_tcc_resource_stalls
271
272        self.ruby_system = ruby_system
273
274        if options.recycle_latency:
275            self.recycle_latency = options.recycle_latency
276
277class L3Cache(RubyCache):
278    dataArrayBanks = 16
279    tagArrayBanks = 16
280
281    def create(self, options, ruby_system, system):
282        self.size = MemorySize(options.l3_size)
283        self.size.value /= options.num_dirs
284        self.assoc = options.l3_assoc
285        self.dataArrayBanks /= options.num_dirs
286        self.tagArrayBanks /= options.num_dirs
287        self.dataArrayBanks /= options.num_dirs
288        self.tagArrayBanks /= options.num_dirs
289        self.dataAccessLatency = options.l3_data_latency
290        self.tagAccessLatency = options.l3_tag_latency
291        self.resourceStalls = False
292        self.replacement_policy = PseudoLRUReplacementPolicy()
293
294class L3Cntrl(L3Cache_Controller, CntrlBase):
295    def create(self, options, ruby_system, system):
296        self.version = self.versionCount()
297        self.L3cache = L3Cache()
298        self.L3cache.create(options, ruby_system, system)
299
300        self.l3_response_latency = max(self.L3cache.dataAccessLatency, self.L3cache.tagAccessLatency)
301        self.ruby_system = ruby_system
302
303        if options.recycle_latency:
304            self.recycle_latency = options.recycle_latency
305
306    def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
307                           req_to_l3, probe_to_l3, resp_to_l3):
308        self.reqToDir = req_to_dir
309        self.respToDir = resp_to_dir
310        self.l3UnblockToDir = l3_unblock_to_dir
311        self.reqToL3 = req_to_l3
312        self.probeToL3 = probe_to_l3
313        self.respToL3 = resp_to_l3
314
315class DirMem(RubyDirectoryMemory, CntrlBase):
316    def create(self, options, ruby_system, system):
317        self.version = self.versionCount()
318
319        phys_mem_size = AddrRange(options.mem_size).size()
320        mem_module_size = phys_mem_size / options.num_dirs
321        dir_size = MemorySize('0B')
322        dir_size.value = mem_module_size
323        self.size = dir_size
324
325class DirCntrl(Directory_Controller, CntrlBase):
326    def create(self, options, ruby_system, system):
327        self.version = self.versionCount()
328
329        self.response_latency = 30
330
331        self.directory = DirMem()
332        self.directory.create(options, ruby_system, system)
333
334        self.L3CacheMemory = L3Cache()
335        self.L3CacheMemory.create(options, ruby_system, system)
336
337        self.l3_hit_latency = max(self.L3CacheMemory.dataAccessLatency,
338                                  self.L3CacheMemory.tagAccessLatency)
339
340        self.number_of_TBEs = options.num_tbes
341
342        self.ruby_system = ruby_system
343
344        if options.recycle_latency:
345            self.recycle_latency = options.recycle_latency
346
347    def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
348                           req_to_l3, probe_to_l3, resp_to_l3):
349        self.reqToDir = req_to_dir
350        self.respToDir = resp_to_dir
351        self.l3UnblockToDir = l3_unblock_to_dir
352        self.reqToL3 = req_to_l3
353        self.probeToL3 = probe_to_l3
354        self.respToL3 = resp_to_l3
355
356def define_options(parser):
357    parser.add_option("--num-subcaches", type = "int", default = 4)
358    parser.add_option("--l3-data-latency", type = "int", default = 20)
359    parser.add_option("--l3-tag-latency", type = "int", default = 15)
360    parser.add_option("--cpu-to-dir-latency", type = "int", default = 120)
361    parser.add_option("--gpu-to-dir-latency", type = "int", default = 120)
362    parser.add_option("--no-resource-stalls", action = "store_false",
363                      default = True)
364    parser.add_option("--no-tcc-resource-stalls", action = "store_false",
365                      default = True)
366    parser.add_option("--use-L3-on-WT", action = "store_true", default = False)
367    parser.add_option("--num-tbes", type = "int", default = 256)
368    parser.add_option("--l2-latency", type = "int", default = 50)  # load to use
369    parser.add_option("--num-tccs", type = "int", default = 1,
370                      help = "number of TCC banks in the GPU")
371    parser.add_option("--sqc-size", type = 'string', default = '32kB',
372                      help = "SQC cache size")
373    parser.add_option("--sqc-assoc", type = 'int', default = 8,
374                      help = "SQC cache assoc")
375    parser.add_option("--WB_L1", action = "store_true", default = False,
376                      help = "writeback L1")
377    parser.add_option("--WB_L2", action = "store_true", default = False,
378                      help = "writeback L2")
379    parser.add_option("--TCP_latency", type = "int", default = 4,
380                      help = "TCP latency")
381    parser.add_option("--TCC_latency", type = "int", default = 16,
382                      help = "TCC latency")
383    parser.add_option("--tcc-size", type = 'string', default = '256kB',
384                      help = "agregate tcc size")
385    parser.add_option("--tcc-assoc", type = 'int', default = 16,
386                      help = "tcc assoc")
387    parser.add_option("--tcp-size", type = 'string', default = '16kB',
388                      help = "tcp size")
389    parser.add_option("--tcp-assoc", type = 'int', default = 16,
390                      help = "tcp assoc")
391    parser.add_option("--noL1", action = "store_true", default = False,
392                      help = "bypassL1")
393
394def create_system(options, full_system, system, dma_devices, bootmem,
395                  ruby_system):
396    if buildEnv['PROTOCOL'] != 'GPU_VIPER':
397        panic("This script requires the GPU_VIPER protocol to be built.")
398
399    cpu_sequencers = []
400
401    #
402    # The ruby network creation expects the list of nodes in the system to be
403    # consistent with the NetDest list.  Therefore the l1 controller nodes
404    # must be listed before the directory nodes and directory nodes before
405    # dma nodes, etc.
406    #
407    cp_cntrl_nodes = []
408    tcp_cntrl_nodes = []
409    sqc_cntrl_nodes = []
410    tcc_cntrl_nodes = []
411    dir_cntrl_nodes = []
412    l3_cntrl_nodes = []
413
414    #
415    # Must create the individual controllers before the network to ensure the
416    # controller constructors are called before the network constructor
417    #
418
419    # For an odd number of CPUs, still create the right number of controllers
420    TCC_bits = int(math.log(options.num_tccs, 2))
421
422    # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
423    # Clusters
424    crossbar_bw = None
425    mainCluster = None
426    if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
427        #Assuming a 2GHz clock
428        crossbar_bw = 16 * options.num_compute_units * options.bw_scalor
429        mainCluster = Cluster(intBW=crossbar_bw)
430    else:
431        mainCluster = Cluster(intBW=8) # 16 GB/s
432    for i in range(options.num_dirs):
433
434        dir_cntrl = DirCntrl(noTCCdir = True, TCC_select_num_bits = TCC_bits)
435        dir_cntrl.create(options, ruby_system, system)
436        dir_cntrl.number_of_TBEs = options.num_tbes
437        dir_cntrl.useL3OnWT = options.use_L3_on_WT
438        # the number_of_TBEs is inclusive of TBEs below
439
440        # Connect the Directory controller to the ruby network
441        dir_cntrl.requestFromCores = MessageBuffer(ordered = True)
442        dir_cntrl.requestFromCores.slave = ruby_system.network.master
443
444        dir_cntrl.responseFromCores = MessageBuffer()
445        dir_cntrl.responseFromCores.slave = ruby_system.network.master
446
447        dir_cntrl.unblockFromCores = MessageBuffer()
448        dir_cntrl.unblockFromCores.slave = ruby_system.network.master
449
450        dir_cntrl.probeToCore = MessageBuffer()
451        dir_cntrl.probeToCore.master = ruby_system.network.slave
452
453        dir_cntrl.responseToCore = MessageBuffer()
454        dir_cntrl.responseToCore.master = ruby_system.network.slave
455
456        dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
457        dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
458        dir_cntrl.responseFromMemory = MessageBuffer()
459
460        exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
461        dir_cntrl_nodes.append(dir_cntrl)
462
463        mainCluster.add(dir_cntrl)
464
465    cpuCluster = None
466    if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
467        cpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
468    else:
469        cpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s
470    for i in range((options.num_cpus + 1) // 2):
471
472        cp_cntrl = CPCntrl()
473        cp_cntrl.create(options, ruby_system, system)
474
475        exec("ruby_system.cp_cntrl%d = cp_cntrl" % i)
476        #
477        # Add controllers and sequencers to the appropriate lists
478        #
479        cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1])
480
481        # Connect the CP controllers and the network
482        cp_cntrl.requestFromCore = MessageBuffer()
483        cp_cntrl.requestFromCore.master = ruby_system.network.slave
484
485        cp_cntrl.responseFromCore = MessageBuffer()
486        cp_cntrl.responseFromCore.master = ruby_system.network.slave
487
488        cp_cntrl.unblockFromCore = MessageBuffer()
489        cp_cntrl.unblockFromCore.master = ruby_system.network.slave
490
491        cp_cntrl.probeToCore = MessageBuffer()
492        cp_cntrl.probeToCore.slave = ruby_system.network.master
493
494        cp_cntrl.responseToCore = MessageBuffer()
495        cp_cntrl.responseToCore.slave = ruby_system.network.master
496
497        cp_cntrl.mandatoryQueue = MessageBuffer()
498        cp_cntrl.triggerQueue = MessageBuffer(ordered = True)
499
500        cpuCluster.add(cp_cntrl)
501
502    # Register CPUs and caches for each CorePair and directory (SE mode only)
503    if not full_system:
504        for i in xrange((options.num_cpus + 1) // 2):
505            FileSystemConfig.register_cpu(physical_package_id = 0,
506                                          core_siblings = \
507                                            xrange(options.num_cpus),
508                                          core_id = i*2,
509                                          thread_siblings = [])
510
511            FileSystemConfig.register_cpu(physical_package_id = 0,
512                                          core_siblings = \
513                                            xrange(options.num_cpus),
514                                          core_id = i*2+1,
515                                          thread_siblings = [])
516
517            FileSystemConfig.register_cache(level = 0,
518                                            idu_type = 'Instruction',
519                                            size = options.l1i_size,
520                                            line_size = options.cacheline_size,
521                                            assoc = options.l1i_assoc,
522                                            cpus = [i*2, i*2+1])
523
524            FileSystemConfig.register_cache(level = 0,
525                                            idu_type = 'Data',
526                                            size = options.l1d_size,
527                                            line_size = options.cacheline_size,
528                                            assoc = options.l1d_assoc,
529                                            cpus = [i*2])
530
531            FileSystemConfig.register_cache(level = 0,
532                                            idu_type = 'Data',
533                                            size = options.l1d_size,
534                                            line_size = options.cacheline_size,
535                                            assoc = options.l1d_assoc,
536                                            cpus = [i*2+1])
537
538            FileSystemConfig.register_cache(level = 1,
539                                            idu_type = 'Unified',
540                                            size = options.l2_size,
541                                            line_size = options.cacheline_size,
542                                            assoc = options.l2_assoc,
543                                            cpus = [i*2, i*2+1])
544
545        for i in range(options.num_dirs):
546            FileSystemConfig.register_cache(level = 2,
547                                            idu_type = 'Unified',
548                                            size = options.l3_size,
549                                            line_size = options.cacheline_size,
550                                            assoc = options.l3_assoc,
551                                            cpus = [n for n in
552                                                xrange(options.num_cpus)])
553
554    gpuCluster = None
555    if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
556      gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
557    else:
558      gpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s
559    for i in range(options.num_compute_units):
560
561        tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
562                             issue_latency = 1,
563                             number_of_TBEs = 2560)
564        # TBEs set to max outstanding requests
565        tcp_cntrl.create(options, ruby_system, system)
566        tcp_cntrl.WB = options.WB_L1
567        tcp_cntrl.disableL1 = options.noL1
568        tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency
569        tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency
570
571        exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % i)
572        #
573        # Add controllers and sequencers to the appropriate lists
574        #
575        cpu_sequencers.append(tcp_cntrl.coalescer)
576        tcp_cntrl_nodes.append(tcp_cntrl)
577
578        # Connect the TCP controller to the ruby network
579        tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
580        tcp_cntrl.requestFromTCP.master = ruby_system.network.slave
581
582        tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
583        tcp_cntrl.responseFromTCP.master = ruby_system.network.slave
584
585        tcp_cntrl.unblockFromCore = MessageBuffer()
586        tcp_cntrl.unblockFromCore.master = ruby_system.network.slave
587
588        tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
589        tcp_cntrl.probeToTCP.slave = ruby_system.network.master
590
591        tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
592        tcp_cntrl.responseToTCP.slave = ruby_system.network.master
593
594        tcp_cntrl.mandatoryQueue = MessageBuffer()
595
596        gpuCluster.add(tcp_cntrl)
597
598    for i in range(options.num_sqc):
599
600        sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
601        sqc_cntrl.create(options, ruby_system, system)
602
603        exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % i)
604        #
605        # Add controllers and sequencers to the appropriate lists
606        #
607        cpu_sequencers.append(sqc_cntrl.sequencer)
608
609        # Connect the SQC controller to the ruby network
610        sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True)
611        sqc_cntrl.requestFromSQC.master = ruby_system.network.slave
612
613        sqc_cntrl.probeToSQC = MessageBuffer(ordered = True)
614        sqc_cntrl.probeToSQC.slave = ruby_system.network.master
615
616        sqc_cntrl.responseToSQC = MessageBuffer(ordered = True)
617        sqc_cntrl.responseToSQC.slave = ruby_system.network.master
618
619        sqc_cntrl.mandatoryQueue = MessageBuffer()
620
621        # SQC also in GPU cluster
622        gpuCluster.add(sqc_cntrl)
623
624    for i in range(options.num_cp):
625
626        tcp_ID = options.num_compute_units + i
627        sqc_ID = options.num_sqc + i
628
629        tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
630                             issue_latency = 1,
631                             number_of_TBEs = 2560)
632        # TBEs set to max outstanding requests
633        tcp_cntrl.createCP(options, ruby_system, system)
634        tcp_cntrl.WB = options.WB_L1
635        tcp_cntrl.disableL1 = options.noL1
636        tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency
637        tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency
638
639        exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % tcp_ID)
640        #
641        # Add controllers and sequencers to the appropriate lists
642        #
643        cpu_sequencers.append(tcp_cntrl.sequencer)
644        tcp_cntrl_nodes.append(tcp_cntrl)
645
646        # Connect the CP (TCP) controllers to the ruby network
647        tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
648        tcp_cntrl.requestFromTCP.master = ruby_system.network.slave
649
650        tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
651        tcp_cntrl.responseFromTCP.master = ruby_system.network.slave
652
653        tcp_cntrl.unblockFromCore = MessageBuffer(ordered = True)
654        tcp_cntrl.unblockFromCore.master = ruby_system.network.slave
655
656        tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
657        tcp_cntrl.probeToTCP.slave = ruby_system.network.master
658
659        tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
660        tcp_cntrl.responseToTCP.slave = ruby_system.network.master
661
662        tcp_cntrl.mandatoryQueue = MessageBuffer()
663
664        gpuCluster.add(tcp_cntrl)
665
666        sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
667        sqc_cntrl.create(options, ruby_system, system)
668
669        exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % sqc_ID)
670        #
671        # Add controllers and sequencers to the appropriate lists
672        #
673        cpu_sequencers.append(sqc_cntrl.sequencer)
674
675        # SQC also in GPU cluster
676        gpuCluster.add(sqc_cntrl)
677
678    for i in range(options.num_tccs):
679
680        tcc_cntrl = TCCCntrl(l2_response_latency = options.TCC_latency)
681        tcc_cntrl.create(options, ruby_system, system)
682        tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency
683        tcc_cntrl.l2_response_latency = options.TCC_latency
684        tcc_cntrl_nodes.append(tcc_cntrl)
685        tcc_cntrl.WB = options.WB_L2
686        tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units
687        # the number_of_TBEs is inclusive of TBEs below
688
689        # Connect the TCC controllers to the ruby network
690        tcc_cntrl.requestFromTCP = MessageBuffer(ordered = True)
691        tcc_cntrl.requestFromTCP.slave = ruby_system.network.master
692
693        tcc_cntrl.responseToCore = MessageBuffer(ordered = True)
694        tcc_cntrl.responseToCore.master = ruby_system.network.slave
695
696        tcc_cntrl.probeFromNB = MessageBuffer()
697        tcc_cntrl.probeFromNB.slave = ruby_system.network.master
698
699        tcc_cntrl.responseFromNB = MessageBuffer()
700        tcc_cntrl.responseFromNB.slave = ruby_system.network.master
701
702        tcc_cntrl.requestToNB = MessageBuffer(ordered = True)
703        tcc_cntrl.requestToNB.master = ruby_system.network.slave
704
705        tcc_cntrl.responseToNB = MessageBuffer()
706        tcc_cntrl.responseToNB.master = ruby_system.network.slave
707
708        tcc_cntrl.unblockToNB = MessageBuffer()
709        tcc_cntrl.unblockToNB.master = ruby_system.network.slave
710
711        tcc_cntrl.triggerQueue = MessageBuffer(ordered = True)
712
713        exec("ruby_system.tcc_cntrl%d = tcc_cntrl" % i)
714
715        # connect all of the wire buffers between L3 and dirs up
716        # TCC cntrls added to the GPU cluster
717        gpuCluster.add(tcc_cntrl)
718
719    # Assuming no DMA devices
720    assert(len(dma_devices) == 0)
721
722    # Add cpu/gpu clusters to main cluster
723    mainCluster.add(cpuCluster)
724    mainCluster.add(gpuCluster)
725
726    ruby_system.network.number_of_virtual_networks = 10
727
728    return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
729