GPU_VIPER_Region.py revision 12647:6d7e2f321496
1# Copyright (c) 2015 Advanced Micro Devices, Inc.
2# All rights reserved.
3#
4# For use for simulation and test purposes only
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are met:
8#
9# 1. Redistributions of source code must retain the above copyright notice,
10# this list of conditions and the following disclaimer.
11#
12# 2. Redistributions in binary form must reproduce the above copyright notice,
13# this list of conditions and the following disclaimer in the documentation
14# and/or other materials provided with the distribution.
15#
16# 3. Neither the name of the copyright holder nor the names of its
17# contributors may be used to endorse or promote products derived from this
18# software without specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30# POSSIBILITY OF SUCH DAMAGE.
31#
32# Authors: Sooraj Puthoor
33
34import math
35import m5
36from m5.objects import *
37from m5.defines import buildEnv
38from Ruby import send_evicts
39
40from topologies.Cluster import Cluster
41
42class CntrlBase:
43    _seqs = 0
44    @classmethod
45    def seqCount(cls):
46        # Use SeqCount not class since we need global count
47        CntrlBase._seqs += 1
48        return CntrlBase._seqs - 1
49
50    _cntrls = 0
51    @classmethod
52    def cntrlCount(cls):
53        # Use CntlCount not class since we need global count
54        CntrlBase._cntrls += 1
55        return CntrlBase._cntrls - 1
56
57    _version = 0
58    @classmethod
59    def versionCount(cls):
60        cls._version += 1 # Use count for this particular type
61        return cls._version - 1
62
63#
64# Note: the L1 Cache latency is only used by the sequencer on fast path hits
65#
66class L1Cache(RubyCache):
67    resourceStalls = False
68    dataArrayBanks = 2
69    tagArrayBanks = 2
70    dataAccessLatency = 1
71    tagAccessLatency = 1
72    def create(self, size, assoc, options):
73        self.size = MemorySize(size)
74        self.assoc = assoc
75        self.replacement_policy = PseudoLRUReplacementPolicy()
76
77class L2Cache(RubyCache):
78    resourceStalls = False
79    assoc = 16
80    dataArrayBanks = 16
81    tagArrayBanks = 16
82    def create(self, size, assoc, options):
83        self.size = MemorySize(size)
84        self.assoc = assoc
85        self.replacement_policy = PseudoLRUReplacementPolicy()
86
87class CPCntrl(CorePair_Controller, CntrlBase):
88
89    def create(self, options, ruby_system, system):
90        self.version = self.versionCount()
91
92        self.L1Icache = L1Cache()
93        self.L1Icache.create(options.l1i_size, options.l1i_assoc, options)
94        self.L1D0cache = L1Cache()
95        self.L1D0cache.create(options.l1d_size, options.l1d_assoc, options)
96        self.L1D1cache = L1Cache()
97        self.L1D1cache.create(options.l1d_size, options.l1d_assoc, options)
98        self.L2cache = L2Cache()
99        self.L2cache.create(options.l2_size, options.l2_assoc, options)
100
101        self.sequencer = RubySequencer()
102        self.sequencer.version = self.seqCount()
103        self.sequencer.icache = self.L1Icache
104        self.sequencer.dcache = self.L1D0cache
105        self.sequencer.ruby_system = ruby_system
106        self.sequencer.coreid = 0
107        self.sequencer.is_cpu_sequencer = True
108
109        self.sequencer1 = RubySequencer()
110        self.sequencer1.version = self.seqCount()
111        self.sequencer1.icache = self.L1Icache
112        self.sequencer1.dcache = self.L1D1cache
113        self.sequencer1.ruby_system = ruby_system
114        self.sequencer1.coreid = 1
115        self.sequencer1.is_cpu_sequencer = True
116
117        self.issue_latency = 1
118        self.send_evictions = send_evicts(options)
119
120        self.ruby_system = ruby_system
121
122        if options.recycle_latency:
123            self.recycle_latency = options.recycle_latency
124
125class TCPCache(RubyCache):
126    size = "16kB"
127    assoc = 16
128    dataArrayBanks = 16
129    tagArrayBanks = 16
130    dataAccessLatency = 4
131    tagAccessLatency = 1
132    def create(self, options):
133        self.size = MemorySize(options.tcp_size)
134        self.dataArrayBanks = 16
135        self.tagArrayBanks = 16
136        self.dataAccessLatency = 4
137        self.tagAccessLatency = 1
138        self.resourceStalls = options.no_tcc_resource_stalls
139        self.replacement_policy = PseudoLRUReplacementPolicy(assoc = self.assoc)
140
141class TCPCntrl(TCP_Controller, CntrlBase):
142
143    def create(self, options, ruby_system, system):
144        self.version = self.versionCount()
145        self.L1cache = TCPCache(dataAccessLatency = options.TCP_latency)
146        self.L1cache.create(options)
147        self.issue_latency = 1
148
149        self.coalescer = VIPERCoalescer()
150        self.coalescer.version = self.seqCount()
151        self.coalescer.icache = self.L1cache
152        self.coalescer.dcache = self.L1cache
153        self.coalescer.ruby_system = ruby_system
154        self.coalescer.support_inst_reqs = False
155        self.coalescer.is_cpu_sequencer = False
156
157        self.sequencer = RubySequencer()
158        self.sequencer.version = self.seqCount()
159        self.sequencer.icache = self.L1cache
160        self.sequencer.dcache = self.L1cache
161        self.sequencer.ruby_system = ruby_system
162        self.sequencer.is_cpu_sequencer = True
163
164        self.use_seq_not_coal = False
165
166        self.ruby_system = ruby_system
167        if options.recycle_latency:
168            self.recycle_latency = options.recycle_latency
169
170class SQCCache(RubyCache):
171    dataArrayBanks = 8
172    tagArrayBanks = 8
173    dataAccessLatency = 1
174    tagAccessLatency = 1
175
176    def create(self, options):
177        self.size = MemorySize(options.sqc_size)
178        self.assoc = options.sqc_assoc
179        self.replacement_policy = PseudoLRUReplacementPolicy(assoc = self.assoc)
180
181class SQCCntrl(SQC_Controller, CntrlBase):
182
183    def create(self, options, ruby_system, system):
184        self.version = self.versionCount()
185        self.L1cache = SQCCache()
186        self.L1cache.create(options)
187        self.L1cache.resourceStalls = False
188        self.sequencer = RubySequencer()
189        self.sequencer.version = self.seqCount()
190        self.sequencer.icache = self.L1cache
191        self.sequencer.dcache = self.L1cache
192        self.sequencer.ruby_system = ruby_system
193        self.sequencer.support_data_reqs = False
194        self.sequencer.is_cpu_sequencer = False
195        self.ruby_system = ruby_system
196        if options.recycle_latency:
197            self.recycle_latency = options.recycle_latency
198
199class TCC(RubyCache):
200    size = MemorySize("256kB")
201    assoc = 16
202    dataAccessLatency = 8
203    tagAccessLatency = 2
204    resourceStalls = False
205    def create(self, options):
206        self.assoc = options.tcc_assoc
207        if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
208          s = options.num_compute_units
209          tcc_size = s * 128
210          tcc_size = str(tcc_size)+'kB'
211          self.size = MemorySize(tcc_size)
212          self.dataArrayBanks = 64
213          self.tagArrayBanks = 64
214        else:
215          self.size = MemorySize(options.tcc_size)
216          self.dataArrayBanks = 256 / options.num_tccs #number of data banks
217          self.tagArrayBanks = 256 / options.num_tccs #number of tag banks
218        self.size.value = self.size.value / options.num_tccs
219        if ((self.size.value / long(self.assoc)) < 128):
220            self.size.value = long(128 * self.assoc)
221        self.start_index_bit = math.log(options.cacheline_size, 2) + \
222                               math.log(options.num_tccs, 2)
223        self.replacement_policy = PseudoLRUReplacementPolicy(assoc = self.assoc)
224
225class TCCCntrl(TCC_Controller, CntrlBase):
226    def create(self, options, ruby_system, system):
227        self.version = self.versionCount()
228        self.L2cache = TCC()
229        self.L2cache.create(options)
230        self.ruby_system = ruby_system
231        if options.recycle_latency:
232            self.recycle_latency = options.recycle_latency
233
234class L3Cache(RubyCache):
235    dataArrayBanks = 16
236    tagArrayBanks = 16
237
238    def create(self, options, ruby_system, system):
239        self.size = MemorySize(options.l3_size)
240        self.size.value /= options.num_dirs
241        self.assoc = options.l3_assoc
242        self.dataArrayBanks /= options.num_dirs
243        self.tagArrayBanks /= options.num_dirs
244        self.dataArrayBanks /= options.num_dirs
245        self.tagArrayBanks /= options.num_dirs
246        self.dataAccessLatency = options.l3_data_latency
247        self.tagAccessLatency = options.l3_tag_latency
248        self.resourceStalls = False
249        self.replacement_policy = PseudoLRUReplacementPolicy(assoc = self.assoc)
250
251class L3Cntrl(L3Cache_Controller, CntrlBase):
252    def create(self, options, ruby_system, system):
253        self.version = self.versionCount()
254        self.L3cache = L3Cache()
255        self.L3cache.create(options, ruby_system, system)
256        self.l3_response_latency = \
257            max(self.L3cache.dataAccessLatency, self.L3cache.tagAccessLatency)
258        self.ruby_system = ruby_system
259        if options.recycle_latency:
260            self.recycle_latency = options.recycle_latency
261
262    def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
263                           req_to_l3, probe_to_l3, resp_to_l3):
264        self.reqToDir = req_to_dir
265        self.respToDir = resp_to_dir
266        self.l3UnblockToDir = l3_unblock_to_dir
267        self.reqToL3 = req_to_l3
268        self.probeToL3 = probe_to_l3
269        self.respToL3 = resp_to_l3
270
271# Directory memory: Directory memory of infinite size which is
272# used by directory controller to store the "states" of the
273# state machine. The state machine is implemented per cache block
274class DirMem(RubyDirectoryMemory, CntrlBase):
275    def create(self, options, ruby_system, system):
276        self.version = self.versionCount()
277        phys_mem_size = AddrRange(options.mem_size).size()
278        mem_module_size = phys_mem_size / options.num_dirs
279        dir_size = MemorySize('0B')
280        dir_size.value = mem_module_size
281        self.size = dir_size
282
283# Directory controller: Contains directory memory, L3 cache and associated state
284# machine which is used to accurately redirect a data request to L3 cache or to
285# memory. The permissions requests do not come to this directory for region
286# based protocols as they are handled exclusively by the region directory.
287# However, region directory controller uses this directory controller for
288# sending probe requests and receiving probe responses.
289class DirCntrl(Directory_Controller, CntrlBase):
290    def create(self, options, ruby_system, system):
291        self.version = self.versionCount()
292        self.response_latency = 25
293        self.response_latency_regionDir = 1
294        self.directory = DirMem()
295        self.directory.create(options, ruby_system, system)
296        self.L3CacheMemory = L3Cache()
297        self.L3CacheMemory.create(options, ruby_system, system)
298        self.l3_hit_latency = \
299            max(self.L3CacheMemory.dataAccessLatency,
300            self.L3CacheMemory.tagAccessLatency)
301
302        self.ruby_system = ruby_system
303        if options.recycle_latency:
304            self.recycle_latency = options.recycle_latency
305
306    def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
307                           req_to_l3, probe_to_l3, resp_to_l3):
308        self.reqToDir = req_to_dir
309        self.respToDir = resp_to_dir
310        self.l3UnblockToDir = l3_unblock_to_dir
311        self.reqToL3 = req_to_l3
312        self.probeToL3 = probe_to_l3
313        self.respToL3 = resp_to_l3
314
315# Region directory : Stores region permissions
316class RegionDir(RubyCache):
317
318    def create(self, options, ruby_system, system):
319        self.block_size = "%dB" % (64 * options.blocks_per_region)
320        self.size = options.region_dir_entries * \
321            self.block_size * options.num_compute_units
322        self.assoc = 8
323        self.tagArrayBanks = 8
324        self.tagAccessLatency = options.dir_tag_latency
325        self.dataAccessLatency = 1
326        self.resourceStalls = options.no_resource_stalls
327        self.start_index_bit = 6 + int(math.log(options.blocks_per_region, 2))
328        self.replacement_policy = PseudoLRUReplacementPolicy(assoc = self.assoc)
329# Region directory controller : Contains region directory and associated state
330# machine for dealing with region coherence requests.
331class RegionCntrl(RegionDir_Controller, CntrlBase):
332    def create(self, options, ruby_system, system):
333        self.version = self.versionCount()
334        self.cacheMemory = RegionDir()
335        self.cacheMemory.create(options, ruby_system, system)
336        self.blocksPerRegion = options.blocks_per_region
337        self.toDirLatency = \
338            max(self.cacheMemory.dataAccessLatency,
339            self.cacheMemory.tagAccessLatency)
340        self.ruby_system = ruby_system
341        self.always_migrate = options.always_migrate
342        self.sym_migrate = options.symmetric_migrate
343        self.asym_migrate = options.asymmetric_migrate
344        if self.always_migrate:
345            assert(not self.asym_migrate and not self.sym_migrate)
346        if self.sym_migrate:
347            assert(not self.always_migrate and not self.asym_migrate)
348        if self.asym_migrate:
349            assert(not self.always_migrate and not self.sym_migrate)
350        if options.recycle_latency:
351            self.recycle_latency = options.recycle_latency
352
353# Region Buffer: A region directory cache which avoids some potential
354# long latency lookup of region directory for getting region permissions
355class RegionBuffer(RubyCache):
356    assoc = 4
357    dataArrayBanks = 256
358    tagArrayBanks = 256
359    dataAccessLatency = 1
360    tagAccessLatency = 1
361    resourceStalls = True
362
363class RBCntrl(RegionBuffer_Controller, CntrlBase):
364    def create(self, options, ruby_system, system):
365        self.version = self.versionCount()
366        self.cacheMemory = RegionBuffer()
367        self.cacheMemory.resourceStalls = options.no_tcc_resource_stalls
368        self.cacheMemory.dataArrayBanks = 64
369        self.cacheMemory.tagArrayBanks = 64
370        self.blocksPerRegion = options.blocks_per_region
371        self.cacheMemory.block_size = "%dB" % (64 * self.blocksPerRegion)
372        self.cacheMemory.start_index_bit = \
373            6 + int(math.log(self.blocksPerRegion, 2))
374        self.cacheMemory.size = options.region_buffer_entries * \
375            self.cacheMemory.block_size * options.num_compute_units
376        self.toDirLatency = options.gpu_to_dir_latency
377        self.toRegionDirLatency = options.cpu_to_dir_latency
378        self.noTCCdir = True
379        TCC_bits = int(math.log(options.num_tccs, 2))
380        self.TCC_select_num_bits = TCC_bits
381        self.ruby_system = ruby_system
382
383        if options.recycle_latency:
384            self.recycle_latency = options.recycle_latency
385        self.cacheMemory.replacement_policy = \
386            PseudoLRUReplacementPolicy(assoc = self.cacheMemory.assoc)
387
388def define_options(parser):
389    parser.add_option("--num-subcaches", type="int", default=4)
390    parser.add_option("--l3-data-latency", type="int", default=20)
391    parser.add_option("--l3-tag-latency", type="int", default=15)
392    parser.add_option("--cpu-to-dir-latency", type="int", default=120)
393    parser.add_option("--gpu-to-dir-latency", type="int", default=60)
394    parser.add_option("--no-resource-stalls", action="store_false",
395                      default=True)
396    parser.add_option("--no-tcc-resource-stalls", action="store_false",
397                      default=True)
398    parser.add_option("--num-tbes", type="int", default=32)
399    parser.add_option("--l2-latency", type="int", default=50) # load to use
400    parser.add_option("--num-tccs", type="int", default=1,
401                      help="number of TCC banks in the GPU")
402
403    parser.add_option("--sqc-size", type='string', default='32kB',
404                      help="SQC cache size")
405    parser.add_option("--sqc-assoc", type='int', default=8,
406                      help="SQC cache assoc")
407
408    parser.add_option("--WB_L1", action="store_true",
409        default=False, help="L2 Writeback Cache")
410    parser.add_option("--WB_L2", action="store_true",
411        default=False, help="L2 Writeback Cache")
412    parser.add_option("--TCP_latency",
413        type="int", default=4, help="TCP latency")
414    parser.add_option("--TCC_latency",
415        type="int", default=16, help="TCC latency")
416    parser.add_option("--tcc-size", type='string', default='2MB',
417                      help="agregate tcc size")
418    parser.add_option("--tcc-assoc", type='int', default=16,
419                      help="tcc assoc")
420    parser.add_option("--tcp-size", type='string', default='16kB',
421                      help="tcp size")
422
423    parser.add_option("--dir-tag-latency", type="int", default=4)
424    parser.add_option("--dir-tag-banks", type="int", default=4)
425    parser.add_option("--blocks-per-region", type="int", default=16)
426    parser.add_option("--dir-entries", type="int", default=8192)
427
428    # Region buffer is a cache of region directory. Hence region
429    # directory is inclusive with respect to region directory.
430    # However, region directory is non-inclusive with respect to
431    # the caches in the system
432    parser.add_option("--region-dir-entries", type="int", default=1024)
433    parser.add_option("--region-buffer-entries", type="int", default=512)
434
435    parser.add_option("--always-migrate",
436        action="store_true", default=False)
437    parser.add_option("--symmetric-migrate",
438        action="store_true", default=False)
439    parser.add_option("--asymmetric-migrate",
440        action="store_true", default=False)
441    parser.add_option("--use-L3-on-WT", action="store_true", default=False)
442
443def create_system(options, full_system, system, dma_devices, bootmem,
444                  ruby_system):
445    if buildEnv['PROTOCOL'] != 'GPU_VIPER_Region':
446        panic("This script requires the GPU_VIPER_Region protocol to be built.")
447
448    cpu_sequencers = []
449
450    #
451    # The ruby network creation expects the list of nodes in the system to be
452    # consistent with the NetDest list.  Therefore the l1 controller nodes
453    # must be listed before the directory nodes and directory nodes before
454    # dma nodes, etc.
455    #
456    dir_cntrl_nodes = []
457
458    # For an odd number of CPUs, still create the right number of controllers
459    TCC_bits = int(math.log(options.num_tccs, 2))
460
461    #
462    # Must create the individual controllers before the network to ensure the
463    # controller constructors are called before the network constructor
464    #
465
466    # For an odd number of CPUs, still create the right number of controllers
467    crossbar_bw = 16 * options.num_compute_units #Assuming a 2GHz clock
468    cpuCluster = Cluster(extBW = (crossbar_bw), intBW=crossbar_bw)
469    for i in xrange((options.num_cpus + 1) / 2):
470
471        cp_cntrl = CPCntrl()
472        cp_cntrl.create(options, ruby_system, system)
473
474        rb_cntrl = RBCntrl()
475        rb_cntrl.create(options, ruby_system, system)
476        rb_cntrl.number_of_TBEs = 256
477        rb_cntrl.isOnCPU = True
478
479        cp_cntrl.regionBufferNum = rb_cntrl.version
480
481        exec("system.cp_cntrl%d = cp_cntrl" % i)
482        exec("system.rb_cntrl%d = rb_cntrl" % i)
483        #
484        # Add controllers and sequencers to the appropriate lists
485        #
486        cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1])
487
488        # Connect the CP controllers and the network
489        cp_cntrl.requestFromCore = MessageBuffer()
490        cp_cntrl.requestFromCore.master = ruby_system.network.slave
491
492        cp_cntrl.responseFromCore = MessageBuffer()
493        cp_cntrl.responseFromCore.master = ruby_system.network.slave
494
495        cp_cntrl.unblockFromCore = MessageBuffer()
496        cp_cntrl.unblockFromCore.master = ruby_system.network.slave
497
498        cp_cntrl.probeToCore = MessageBuffer()
499        cp_cntrl.probeToCore.slave = ruby_system.network.master
500
501        cp_cntrl.responseToCore = MessageBuffer()
502        cp_cntrl.responseToCore.slave = ruby_system.network.master
503
504        cp_cntrl.mandatoryQueue = MessageBuffer()
505        cp_cntrl.triggerQueue = MessageBuffer(ordered = True)
506
507        # Connect the RB controllers to the ruby network
508        rb_cntrl.requestFromCore = MessageBuffer(ordered = True)
509        rb_cntrl.requestFromCore.slave = ruby_system.network.master
510
511        rb_cntrl.responseFromCore = MessageBuffer()
512        rb_cntrl.responseFromCore.slave = ruby_system.network.master
513
514        rb_cntrl.requestToNetwork = MessageBuffer()
515        rb_cntrl.requestToNetwork.master = ruby_system.network.slave
516
517        rb_cntrl.notifyFromRegionDir = MessageBuffer()
518        rb_cntrl.notifyFromRegionDir.slave = ruby_system.network.master
519
520        rb_cntrl.probeFromRegionDir = MessageBuffer()
521        rb_cntrl.probeFromRegionDir.slave = ruby_system.network.master
522
523        rb_cntrl.unblockFromDir = MessageBuffer()
524        rb_cntrl.unblockFromDir.slave = ruby_system.network.master
525
526        rb_cntrl.responseToRegDir = MessageBuffer()
527        rb_cntrl.responseToRegDir.master = ruby_system.network.slave
528
529        rb_cntrl.triggerQueue = MessageBuffer(ordered = True)
530
531        cpuCluster.add(cp_cntrl)
532        cpuCluster.add(rb_cntrl)
533
534    gpuCluster = Cluster(extBW = (crossbar_bw), intBW = crossbar_bw)
535    for i in xrange(options.num_compute_units):
536
537        tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
538                             issue_latency = 1,
539                             number_of_TBEs = 2560)
540        # TBEs set to max outstanding requests
541        tcp_cntrl.create(options, ruby_system, system)
542        tcp_cntrl.WB = options.WB_L1
543        tcp_cntrl.disableL1 = False
544
545        exec("system.tcp_cntrl%d = tcp_cntrl" % i)
546        #
547        # Add controllers and sequencers to the appropriate lists
548        #
549        cpu_sequencers.append(tcp_cntrl.coalescer)
550
551        # Connect the CP (TCP) controllers to the ruby network
552        tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
553        tcp_cntrl.requestFromTCP.master = ruby_system.network.slave
554
555        tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
556        tcp_cntrl.responseFromTCP.master = ruby_system.network.slave
557
558        tcp_cntrl.unblockFromCore = MessageBuffer()
559        tcp_cntrl.unblockFromCore.master = ruby_system.network.slave
560
561        tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
562        tcp_cntrl.probeToTCP.slave = ruby_system.network.master
563
564        tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
565        tcp_cntrl.responseToTCP.slave = ruby_system.network.master
566
567        tcp_cntrl.mandatoryQueue = MessageBuffer()
568
569        gpuCluster.add(tcp_cntrl)
570
571    for i in xrange(options.num_sqc):
572
573        sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
574        sqc_cntrl.create(options, ruby_system, system)
575
576        exec("system.sqc_cntrl%d = sqc_cntrl" % i)
577        #
578        # Add controllers and sequencers to the appropriate lists
579        #
580        cpu_sequencers.append(sqc_cntrl.sequencer)
581
582        # Connect the SQC controller to the ruby network
583        sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True)
584        sqc_cntrl.requestFromSQC.master = ruby_system.network.slave
585
586        sqc_cntrl.probeToSQC = MessageBuffer(ordered = True)
587        sqc_cntrl.probeToSQC.slave = ruby_system.network.master
588
589        sqc_cntrl.responseToSQC = MessageBuffer(ordered = True)
590        sqc_cntrl.responseToSQC.slave = ruby_system.network.master
591
592        sqc_cntrl.mandatoryQueue = MessageBuffer()
593
594        # SQC also in GPU cluster
595        gpuCluster.add(sqc_cntrl)
596
597    numa_bit = 6
598
599    for i in xrange(options.num_tccs):
600
601        tcc_cntrl = TCCCntrl()
602        tcc_cntrl.create(options, ruby_system, system)
603        tcc_cntrl.l2_request_latency = 1
604        tcc_cntrl.l2_response_latency = options.TCC_latency
605        tcc_cntrl.WB = options.WB_L2
606        tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units
607
608        # Connect the TCC controllers to the ruby network
609        tcc_cntrl.requestFromTCP = MessageBuffer(ordered = True)
610        tcc_cntrl.requestFromTCP.slave = ruby_system.network.master
611
612        tcc_cntrl.responseToCore = MessageBuffer(ordered = True)
613        tcc_cntrl.responseToCore.master = ruby_system.network.slave
614
615        tcc_cntrl.probeFromNB = MessageBuffer()
616        tcc_cntrl.probeFromNB.slave = ruby_system.network.master
617
618        tcc_cntrl.responseFromNB = MessageBuffer()
619        tcc_cntrl.responseFromNB.slave = ruby_system.network.master
620
621        tcc_cntrl.requestToNB = MessageBuffer(ordered = True)
622        tcc_cntrl.requestToNB.master = ruby_system.network.slave
623
624        tcc_cntrl.responseToNB = MessageBuffer()
625        tcc_cntrl.responseToNB.master = ruby_system.network.slave
626
627        tcc_cntrl.unblockToNB = MessageBuffer()
628        tcc_cntrl.unblockToNB.master = ruby_system.network.slave
629
630        tcc_cntrl.triggerQueue = MessageBuffer(ordered = True)
631
632        rb_cntrl = RBCntrl()
633        rb_cntrl.create(options, ruby_system, system)
634        rb_cntrl.number_of_TBEs = 2560 * options.num_compute_units
635        rb_cntrl.isOnCPU = False
636
637        # Connect the RB controllers to the ruby network
638        rb_cntrl.requestFromCore = MessageBuffer(ordered = True)
639        rb_cntrl.requestFromCore.slave = ruby_system.network.master
640
641        rb_cntrl.responseFromCore = MessageBuffer()
642        rb_cntrl.responseFromCore.slave = ruby_system.network.master
643
644        rb_cntrl.requestToNetwork = MessageBuffer()
645        rb_cntrl.requestToNetwork.master = ruby_system.network.slave
646
647        rb_cntrl.notifyFromRegionDir = MessageBuffer()
648        rb_cntrl.notifyFromRegionDir.slave = ruby_system.network.master
649
650        rb_cntrl.probeFromRegionDir = MessageBuffer()
651        rb_cntrl.probeFromRegionDir.slave = ruby_system.network.master
652
653        rb_cntrl.unblockFromDir = MessageBuffer()
654        rb_cntrl.unblockFromDir.slave = ruby_system.network.master
655
656        rb_cntrl.responseToRegDir = MessageBuffer()
657        rb_cntrl.responseToRegDir.master = ruby_system.network.slave
658
659        rb_cntrl.triggerQueue = MessageBuffer(ordered = True)
660
661        tcc_cntrl.regionBufferNum = rb_cntrl.version
662
663        exec("system.tcc_cntrl%d = tcc_cntrl" % i)
664        exec("system.tcc_rb_cntrl%d = rb_cntrl" % i)
665
666        # TCC cntrls added to the GPU cluster
667        gpuCluster.add(tcc_cntrl)
668        gpuCluster.add(rb_cntrl)
669
670    # Because of wire buffers, num_l3caches must equal num_dirs
671    # Region coherence only works with 1 dir
672    assert(options.num_l3caches == options.num_dirs == 1)
673
674    # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
675    # Clusters
676    mainCluster = Cluster(intBW = crossbar_bw)
677
678    dir_cntrl = DirCntrl()
679    dir_cntrl.create(options, ruby_system, system)
680    dir_cntrl.number_of_TBEs = 2560 * options.num_compute_units
681    dir_cntrl.useL3OnWT = options.use_L3_on_WT
682
683    # Connect the Directory controller to the ruby network
684    dir_cntrl.requestFromCores = MessageBuffer()
685    dir_cntrl.requestFromCores.slave = ruby_system.network.master
686
687    dir_cntrl.responseFromCores = MessageBuffer()
688    dir_cntrl.responseFromCores.slave = ruby_system.network.master
689
690    dir_cntrl.unblockFromCores = MessageBuffer()
691    dir_cntrl.unblockFromCores.slave = ruby_system.network.master
692
693    dir_cntrl.probeToCore = MessageBuffer()
694    dir_cntrl.probeToCore.master = ruby_system.network.slave
695
696    dir_cntrl.responseToCore = MessageBuffer()
697    dir_cntrl.responseToCore.master = ruby_system.network.slave
698
699    dir_cntrl.reqFromRegBuf = MessageBuffer()
700    dir_cntrl.reqFromRegBuf.slave = ruby_system.network.master
701
702    dir_cntrl.reqToRegDir = MessageBuffer(ordered = True)
703    dir_cntrl.reqToRegDir.master = ruby_system.network.slave
704
705    dir_cntrl.reqFromRegDir = MessageBuffer(ordered = True)
706    dir_cntrl.reqFromRegDir.slave = ruby_system.network.master
707
708    dir_cntrl.unblockToRegDir = MessageBuffer()
709    dir_cntrl.unblockToRegDir.master = ruby_system.network.slave
710
711    dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
712    dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
713    dir_cntrl.responseFromMemory = MessageBuffer()
714
715    exec("system.dir_cntrl%d = dir_cntrl" % i)
716    dir_cntrl_nodes.append(dir_cntrl)
717
718    mainCluster.add(dir_cntrl)
719
720    reg_cntrl = RegionCntrl(noTCCdir=True,TCC_select_num_bits = TCC_bits)
721    reg_cntrl.create(options, ruby_system, system)
722    reg_cntrl.number_of_TBEs = options.num_tbes
723    reg_cntrl.cpuRegionBufferNum = system.rb_cntrl0.version
724    reg_cntrl.gpuRegionBufferNum = system.tcc_rb_cntrl0.version
725
726    # Connect the Region Dir controllers to the ruby network
727    reg_cntrl.requestToDir = MessageBuffer(ordered = True)
728    reg_cntrl.requestToDir.master = ruby_system.network.slave
729
730    reg_cntrl.notifyToRBuffer = MessageBuffer()
731    reg_cntrl.notifyToRBuffer.master = ruby_system.network.slave
732
733    reg_cntrl.probeToRBuffer = MessageBuffer()
734    reg_cntrl.probeToRBuffer.master = ruby_system.network.slave
735
736    reg_cntrl.responseFromRBuffer = MessageBuffer()
737    reg_cntrl.responseFromRBuffer.slave = ruby_system.network.master
738
739    reg_cntrl.requestFromRegBuf = MessageBuffer()
740    reg_cntrl.requestFromRegBuf.slave = ruby_system.network.master
741
742    reg_cntrl.triggerQueue = MessageBuffer(ordered = True)
743
744    exec("system.reg_cntrl%d = reg_cntrl" % i)
745
746    mainCluster.add(reg_cntrl)
747
748    # Assuming no DMA devices
749    assert(len(dma_devices) == 0)
750
751    # Add cpu/gpu clusters to main cluster
752    mainCluster.add(cpuCluster)
753    mainCluster.add(gpuCluster)
754
755    ruby_system.network.number_of_virtual_networks = 10
756
757    return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
758