GPU_RfO.py revision 13400
1# Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 2# All rights reserved. 3# 4# For use for simulation and test purposes only 5# 6# Redistribution and use in source and binary forms, with or without 7# modification, are permitted provided that the following conditions are met: 8# 9# 1. Redistributions of source code must retain the above copyright notice, 10# this list of conditions and the following disclaimer. 11# 12# 2. Redistributions in binary form must reproduce the above copyright notice, 13# this list of conditions and the following disclaimer in the documentation 14# and/or other materials provided with the distribution. 15# 16# 3. Neither the name of the copyright holder nor the names of its 17# contributors may be used to endorse or promote products derived from this 18# software without specific prior written permission. 19# 20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 24# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30# POSSIBILITY OF SUCH DAMAGE. 31# 32# Authors: Lisa Hsu 33 34import math 35import m5 36from m5.objects import * 37from m5.defines import buildEnv 38from m5.util import addToPath 39from Ruby import create_topology 40from Ruby import send_evicts 41 42addToPath('../') 43 44from topologies.Cluster import Cluster 45from topologies.Crossbar import Crossbar 46 47class CntrlBase: 48 _seqs = 0 49 @classmethod 50 def seqCount(cls): 51 # Use SeqCount not class since we need global count 52 CntrlBase._seqs += 1 53 return CntrlBase._seqs - 1 54 55 _cntrls = 0 56 @classmethod 57 def cntrlCount(cls): 58 # Use CntlCount not class since we need global count 59 CntrlBase._cntrls += 1 60 return CntrlBase._cntrls - 1 61 62 _version = 0 63 @classmethod 64 def versionCount(cls): 65 cls._version += 1 # Use count for this particular type 66 return cls._version - 1 67 68class TccDirCache(RubyCache): 69 size = "512kB" 70 assoc = 16 71 resourceStalls = False 72 def create(self, options): 73 self.size = MemorySize(options.tcc_size) 74 self.size.value += (options.num_compute_units * 75 (MemorySize(options.tcp_size).value) * 76 options.tcc_dir_factor) / long(options.num_tccs) 77 self.start_index_bit = math.log(options.cacheline_size, 2) + \ 78 math.log(options.num_tccs, 2) 79 self.replacement_policy = PseudoLRUReplacementPolicy() 80 81class L1DCache(RubyCache): 82 resourceStalls = False 83 def create(self, options): 84 self.size = MemorySize(options.l1d_size) 85 self.assoc = options.l1d_assoc 86 self.replacement_policy = PseudoLRUReplacementPolicy() 87 88class L1ICache(RubyCache): 89 resourceStalls = False 90 def create(self, options): 91 self.size = MemorySize(options.l1i_size) 92 self.assoc = options.l1i_assoc 93 self.replacement_policy = PseudoLRUReplacementPolicy() 94 95class L2Cache(RubyCache): 96 resourceStalls = False 97 def create(self, options): 98 self.size = MemorySize(options.l2_size) 99 self.assoc = options.l2_assoc 100 self.replacement_policy = PseudoLRUReplacementPolicy() 101 102 103class CPCntrl(CorePair_Controller, CntrlBase): 104 105 def create(self, options, ruby_system, system): 106 self.version = self.versionCount() 107 108 self.L1Icache = L1ICache() 109 self.L1Icache.create(options) 110 self.L1D0cache = L1DCache() 111 self.L1D0cache.create(options) 112 self.L1D1cache = L1DCache() 113 self.L1D1cache.create(options) 114 self.L2cache = L2Cache() 115 self.L2cache.create(options) 116 117 self.sequencer = RubySequencer() 118 self.sequencer.icache_hit_latency = 2 119 self.sequencer.dcache_hit_latency = 2 120 self.sequencer.version = self.seqCount() 121 self.sequencer.icache = self.L1Icache 122 self.sequencer.dcache = self.L1D0cache 123 self.sequencer.ruby_system = ruby_system 124 self.sequencer.coreid = 0 125 self.sequencer.is_cpu_sequencer = True 126 127 self.sequencer1 = RubySequencer() 128 self.sequencer1.version = self.seqCount() 129 self.sequencer1.icache = self.L1Icache 130 self.sequencer1.dcache = self.L1D1cache 131 self.sequencer1.icache_hit_latency = 2 132 self.sequencer1.dcache_hit_latency = 2 133 self.sequencer1.ruby_system = ruby_system 134 self.sequencer1.coreid = 1 135 self.sequencer1.is_cpu_sequencer = True 136 137 self.issue_latency = options.cpu_to_dir_latency 138 self.send_evictions = send_evicts(options) 139 140 self.ruby_system = ruby_system 141 142 if options.recycle_latency: 143 self.recycle_latency = options.recycle_latency 144 145class TCPCache(RubyCache): 146 assoc = 8 147 dataArrayBanks = 16 148 tagArrayBanks = 4 149 dataAccessLatency = 4 150 tagAccessLatency = 1 151 def create(self, options): 152 self.size = MemorySize(options.tcp_size) 153 self.replacement_policy = PseudoLRUReplacementPolicy() 154 155class TCPCntrl(TCP_Controller, CntrlBase): 156 157 def create(self, options, ruby_system, system): 158 self.version = self.versionCount() 159 160 self.L1cache = TCPCache(tagAccessLatency = options.TCP_latency) 161 self.L1cache.resourceStalls = options.no_resource_stalls 162 self.L1cache.create(options) 163 164 self.coalescer = RubyGPUCoalescer() 165 self.coalescer.version = self.seqCount() 166 self.coalescer.icache = self.L1cache 167 self.coalescer.dcache = self.L1cache 168 self.coalescer.ruby_system = ruby_system 169 self.coalescer.support_inst_reqs = False 170 self.coalescer.is_cpu_sequencer = False 171 self.coalescer.max_outstanding_requests = options.simds_per_cu * \ 172 options.wfs_per_simd * \ 173 options.wf_size 174 175 self.sequencer = RubySequencer() 176 self.sequencer.version = self.seqCount() 177 self.sequencer.icache = self.L1cache 178 self.sequencer.dcache = self.L1cache 179 self.sequencer.ruby_system = ruby_system 180 self.sequencer.is_cpu_sequencer = True 181 182 self.use_seq_not_coal = False 183 184 self.ruby_system = ruby_system 185 186 if options.recycle_latency: 187 self.recycle_latency = options.recycle_latency 188 189 def createCP(self, options, ruby_system, system): 190 self.version = self.versionCount() 191 192 self.L1cache = TCPCache(tagAccessLatency = options.TCP_latency) 193 self.L1cache.resourceStalls = options.no_resource_stalls 194 self.L1cache.create(options) 195 196 self.coalescer = RubyGPUCoalescer() 197 self.coalescer.version = self.seqCount() 198 self.coalescer.icache = self.L1cache 199 self.coalescer.dcache = self.L1cache 200 self.coalescer.ruby_system = ruby_system 201 self.coalescer.support_inst_reqs = False 202 self.coalescer.is_cpu_sequencer = False 203 204 self.sequencer = RubySequencer() 205 self.sequencer.version = self.seqCount() 206 self.sequencer.icache = self.L1cache 207 self.sequencer.dcache = self.L1cache 208 self.sequencer.ruby_system = ruby_system 209 self.sequencer.is_cpu_sequencer = True 210 211 self.use_seq_not_coal = True 212 213 self.ruby_system = ruby_system 214 215 if options.recycle_latency: 216 self.recycle_latency = options.recycle_latency 217 218class SQCCache(RubyCache): 219 size = "32kB" 220 assoc = 8 221 dataArrayBanks = 16 222 tagArrayBanks = 4 223 dataAccessLatency = 4 224 tagAccessLatency = 1 225 def create(self, options): 226 self.replacement_policy = PseudoLRUReplacementPolicy() 227 228class SQCCntrl(SQC_Controller, CntrlBase): 229 230 def create(self, options, ruby_system, system): 231 self.version = self.versionCount() 232 233 self.L1cache = SQCCache() 234 self.L1cache.create(options) 235 self.L1cache.resourceStalls = options.no_resource_stalls 236 237 self.sequencer = RubySequencer() 238 239 self.sequencer.version = self.seqCount() 240 self.sequencer.icache = self.L1cache 241 self.sequencer.dcache = self.L1cache 242 self.sequencer.ruby_system = ruby_system 243 self.sequencer.support_data_reqs = False 244 self.sequencer.is_cpu_sequencer = False 245 246 self.ruby_system = ruby_system 247 248 if options.recycle_latency: 249 self.recycle_latency = options.recycle_latency 250 251 def createCP(self, options, ruby_system, system): 252 self.version = self.versionCount() 253 254 self.L1cache = SQCCache() 255 self.L1cache.create(options) 256 self.L1cache.resourceStalls = options.no_resource_stalls 257 258 self.sequencer = RubySequencer() 259 260 self.sequencer.version = self.seqCount() 261 self.sequencer.icache = self.L1cache 262 self.sequencer.dcache = self.L1cache 263 self.sequencer.ruby_system = ruby_system 264 self.sequencer.support_data_reqs = False 265 266 self.ruby_system = ruby_system 267 268 if options.recycle_latency: 269 self.recycle_latency = options.recycle_latency 270 271 272class TCC(RubyCache): 273 assoc = 16 274 dataAccessLatency = 8 275 tagAccessLatency = 2 276 resourceStalls = True 277 def create(self, options): 278 self.size = MemorySize(options.tcc_size) 279 self.size = self.size / options.num_tccs 280 self.dataArrayBanks = 256 / options.num_tccs #number of data banks 281 self.tagArrayBanks = 256 / options.num_tccs #number of tag banks 282 if ((self.size.value / long(self.assoc)) < 128): 283 self.size.value = long(128 * self.assoc) 284 self.start_index_bit = math.log(options.cacheline_size, 2) + \ 285 math.log(options.num_tccs, 2) 286 self.replacement_policy = PseudoLRUReplacementPolicy() 287 288class TCCCntrl(TCC_Controller, CntrlBase): 289 def create(self, options, ruby_system, system): 290 self.version = self.versionCount() 291 self.L2cache = TCC() 292 self.L2cache.create(options) 293 self.l2_response_latency = options.TCC_latency 294 295 self.number_of_TBEs = 2048 296 297 self.ruby_system = ruby_system 298 299 if options.recycle_latency: 300 self.recycle_latency = options.recycle_latency 301 302 def connectWireBuffers(self, req_to_tccdir, resp_to_tccdir, 303 tcc_unblock_to_tccdir, req_to_tcc, 304 probe_to_tcc, resp_to_tcc): 305 self.w_reqToTCCDir = req_to_tccdir 306 self.w_respToTCCDir = resp_to_tccdir 307 self.w_TCCUnblockToTCCDir = tcc_unblock_to_tccdir 308 self.w_reqToTCC = req_to_tcc 309 self.w_probeToTCC = probe_to_tcc 310 self.w_respToTCC = resp_to_tcc 311 312class TCCDirCntrl(TCCdir_Controller, CntrlBase): 313 def create(self, options, ruby_system, system): 314 self.version = self.versionCount() 315 316 self.directory = TccDirCache() 317 self.directory.create(options) 318 319 self.number_of_TBEs = 1024 320 321 self.ruby_system = ruby_system 322 323 if options.recycle_latency: 324 self.recycle_latency = options.recycle_latency 325 326 def connectWireBuffers(self, req_to_tccdir, resp_to_tccdir, 327 tcc_unblock_to_tccdir, req_to_tcc, 328 probe_to_tcc, resp_to_tcc): 329 self.w_reqToTCCDir = req_to_tccdir 330 self.w_respToTCCDir = resp_to_tccdir 331 self.w_TCCUnblockToTCCDir = tcc_unblock_to_tccdir 332 self.w_reqToTCC = req_to_tcc 333 self.w_probeToTCC = probe_to_tcc 334 self.w_respToTCC = resp_to_tcc 335 336class L3Cache(RubyCache): 337 assoc = 8 338 dataArrayBanks = 256 339 tagArrayBanks = 256 340 341 def create(self, options, ruby_system, system): 342 self.size = MemorySize(options.l3_size) 343 self.size.value /= options.num_dirs 344 self.dataArrayBanks /= options.num_dirs 345 self.tagArrayBanks /= options.num_dirs 346 self.dataArrayBanks /= options.num_dirs 347 self.tagArrayBanks /= options.num_dirs 348 self.dataAccessLatency = options.l3_data_latency 349 self.tagAccessLatency = options.l3_tag_latency 350 self.resourceStalls = options.no_resource_stalls 351 self.replacement_policy = PseudoLRUReplacementPolicy() 352 353class L3Cntrl(L3Cache_Controller, CntrlBase): 354 def create(self, options, ruby_system, system): 355 self.version = self.versionCount() 356 self.L3cache = L3Cache() 357 self.L3cache.create(options, ruby_system, system) 358 359 self.l3_response_latency = max(self.L3cache.dataAccessLatency, 360 self.L3cache.tagAccessLatency) 361 self.ruby_system = ruby_system 362 363 if options.recycle_latency: 364 self.recycle_latency = options.recycle_latency 365 366 def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir, 367 req_to_l3, probe_to_l3, resp_to_l3): 368 self.reqToDir = req_to_dir 369 self.respToDir = resp_to_dir 370 self.l3UnblockToDir = l3_unblock_to_dir 371 self.reqToL3 = req_to_l3 372 self.probeToL3 = probe_to_l3 373 self.respToL3 = resp_to_l3 374 375class DirCntrl(Directory_Controller, CntrlBase): 376 def create(self, options, dir_ranges, ruby_system, system): 377 self.version = self.versionCount() 378 379 self.response_latency = 30 380 381 self.addr_ranges = dir_ranges 382 self.directory = RubyDirectoryMemory() 383 384 self.L3CacheMemory = L3Cache() 385 self.L3CacheMemory.create(options, ruby_system, system) 386 387 self.l3_hit_latency = max(self.L3CacheMemory.dataAccessLatency, 388 self.L3CacheMemory.tagAccessLatency) 389 390 self.number_of_TBEs = options.num_tbes 391 392 self.ruby_system = ruby_system 393 394 if options.recycle_latency: 395 self.recycle_latency = options.recycle_latency 396 397 def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir, 398 req_to_l3, probe_to_l3, resp_to_l3): 399 self.reqToDir = req_to_dir 400 self.respToDir = resp_to_dir 401 self.l3UnblockToDir = l3_unblock_to_dir 402 self.reqToL3 = req_to_l3 403 self.probeToL3 = probe_to_l3 404 self.respToL3 = resp_to_l3 405 406 407 408def define_options(parser): 409 parser.add_option("--num-subcaches", type="int", default=4) 410 parser.add_option("--l3-data-latency", type="int", default=20) 411 parser.add_option("--l3-tag-latency", type="int", default=15) 412 parser.add_option("--cpu-to-dir-latency", type="int", default=15) 413 parser.add_option("--gpu-to-dir-latency", type="int", default=160) 414 parser.add_option("--no-resource-stalls", action="store_false", 415 default=True) 416 parser.add_option("--num-tbes", type="int", default=256) 417 parser.add_option("--l2-latency", type="int", default=50) # load to use 418 parser.add_option("--num-tccs", type="int", default=1, 419 help="number of TCC directories and banks in the GPU") 420 parser.add_option("--TCP_latency", type="int", default=4, 421 help="TCP latency") 422 parser.add_option("--TCC_latency", type="int", default=16, 423 help="TCC latency") 424 parser.add_option("--tcc-size", type='string', default='256kB', 425 help="agregate tcc size") 426 parser.add_option("--tcp-size", type='string', default='16kB', 427 help="tcp size") 428 parser.add_option("--tcc-dir-factor", type='int', default=4, 429 help="TCCdir size = factor *(TCPs + TCC)") 430 431def create_system(options, full_system, system, dma_devices, bootmem, 432 ruby_system): 433 if buildEnv['PROTOCOL'] != 'GPU_RfO': 434 panic("This script requires the GPU_RfO protocol to be built.") 435 436 cpu_sequencers = [] 437 438 # 439 # The ruby network creation expects the list of nodes in the system to be 440 # consistent with the NetDest list. Therefore the l1 controller nodes 441 # must be listed before the directory nodes and directory nodes before 442 # dma nodes, etc. 443 # 444 cp_cntrl_nodes = [] 445 tcp_cntrl_nodes = [] 446 sqc_cntrl_nodes = [] 447 tcc_cntrl_nodes = [] 448 tccdir_cntrl_nodes = [] 449 dir_cntrl_nodes = [] 450 l3_cntrl_nodes = [] 451 452 # 453 # Must create the individual controllers before the network to ensure the 454 # controller constructors are called before the network constructor 455 # 456 457 TCC_bits = int(math.log(options.num_tccs, 2)) 458 459 # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu 460 # Clusters 461 mainCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s 462 463 if options.numa_high_bit: 464 numa_bit = options.numa_high_bit 465 else: 466 # if the numa_bit is not specified, set the directory bits as the 467 # lowest bits above the block offset bits, and the numa_bit as the 468 # highest of those directory bits 469 dir_bits = int(math.log(options.num_dirs, 2)) 470 block_size_bits = int(math.log(options.cacheline_size, 2)) 471 numa_bit = block_size_bits + dir_bits - 1 472 473 for i in xrange(options.num_dirs): 474 dir_ranges = [] 475 for r in system.mem_ranges: 476 addr_range = m5.objects.AddrRange(r.start, size = r.size(), 477 intlvHighBit = numa_bit, 478 intlvBits = dir_bits, 479 intlvMatch = i) 480 dir_ranges.append(addr_range) 481 482 dir_cntrl = DirCntrl(TCC_select_num_bits = TCC_bits) 483 dir_cntrl.create(options, dir_ranges, ruby_system, system) 484 dir_cntrl.number_of_TBEs = 2560 * options.num_compute_units 485 #Enough TBEs for all TCP TBEs 486 487 # Connect the Directory controller to the ruby network 488 dir_cntrl.requestFromCores = MessageBuffer(ordered = True) 489 dir_cntrl.requestFromCores.slave = ruby_system.network.master 490 491 dir_cntrl.responseFromCores = MessageBuffer() 492 dir_cntrl.responseFromCores.slave = ruby_system.network.master 493 494 dir_cntrl.unblockFromCores = MessageBuffer() 495 dir_cntrl.unblockFromCores.slave = ruby_system.network.master 496 497 dir_cntrl.probeToCore = MessageBuffer() 498 dir_cntrl.probeToCore.master = ruby_system.network.slave 499 500 dir_cntrl.responseToCore = MessageBuffer() 501 dir_cntrl.responseToCore.master = ruby_system.network.slave 502 503 dir_cntrl.triggerQueue = MessageBuffer(ordered = True) 504 dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True) 505 dir_cntrl.responseFromMemory = MessageBuffer() 506 507 exec("system.dir_cntrl%d = dir_cntrl" % i) 508 dir_cntrl_nodes.append(dir_cntrl) 509 510 mainCluster.add(dir_cntrl) 511 512 # For an odd number of CPUs, still create the right number of controllers 513 cpuCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s 514 for i in xrange((options.num_cpus + 1) / 2): 515 516 cp_cntrl = CPCntrl() 517 cp_cntrl.create(options, ruby_system, system) 518 519 exec("system.cp_cntrl%d = cp_cntrl" % i) 520 # 521 # Add controllers and sequencers to the appropriate lists 522 # 523 cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1]) 524 525 # Connect the CP controllers and the network 526 cp_cntrl.requestFromCore = MessageBuffer() 527 cp_cntrl.requestFromCore.master = ruby_system.network.slave 528 529 cp_cntrl.responseFromCore = MessageBuffer() 530 cp_cntrl.responseFromCore.master = ruby_system.network.slave 531 532 cp_cntrl.unblockFromCore = MessageBuffer() 533 cp_cntrl.unblockFromCore.master = ruby_system.network.slave 534 535 cp_cntrl.probeToCore = MessageBuffer() 536 cp_cntrl.probeToCore.slave = ruby_system.network.master 537 538 cp_cntrl.responseToCore = MessageBuffer() 539 cp_cntrl.responseToCore.slave = ruby_system.network.master 540 541 cp_cntrl.mandatoryQueue = MessageBuffer() 542 cp_cntrl.triggerQueue = MessageBuffer(ordered = True) 543 544 cpuCluster.add(cp_cntrl) 545 546 gpuCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s 547 548 for i in xrange(options.num_compute_units): 549 550 tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, 551 number_of_TBEs = 2560) # max outstanding requests 552 tcp_cntrl.create(options, ruby_system, system) 553 554 exec("system.tcp_cntrl%d = tcp_cntrl" % i) 555 # 556 # Add controllers and sequencers to the appropriate lists 557 # 558 cpu_sequencers.append(tcp_cntrl.coalescer) 559 tcp_cntrl_nodes.append(tcp_cntrl) 560 561 # Connect the TCP controller to the ruby network 562 tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True) 563 tcp_cntrl.requestFromTCP.master = ruby_system.network.slave 564 565 tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True) 566 tcp_cntrl.responseFromTCP.master = ruby_system.network.slave 567 568 tcp_cntrl.unblockFromCore = MessageBuffer(ordered = True) 569 tcp_cntrl.unblockFromCore.master = ruby_system.network.slave 570 571 tcp_cntrl.probeToTCP = MessageBuffer(ordered = True) 572 tcp_cntrl.probeToTCP.slave = ruby_system.network.master 573 574 tcp_cntrl.responseToTCP = MessageBuffer(ordered = True) 575 tcp_cntrl.responseToTCP.slave = ruby_system.network.master 576 577 tcp_cntrl.mandatoryQueue = MessageBuffer() 578 579 gpuCluster.add(tcp_cntrl) 580 581 for i in xrange(options.num_sqc): 582 583 sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) 584 sqc_cntrl.create(options, ruby_system, system) 585 586 exec("system.sqc_cntrl%d = sqc_cntrl" % i) 587 # 588 # Add controllers and sequencers to the appropriate lists 589 # 590 cpu_sequencers.append(sqc_cntrl.sequencer) 591 592 # Connect the SQC controller to the ruby network 593 sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True) 594 sqc_cntrl.requestFromSQC.master = ruby_system.network.slave 595 596 sqc_cntrl.responseFromSQC = MessageBuffer(ordered = True) 597 sqc_cntrl.responseFromSQC.master = ruby_system.network.slave 598 599 sqc_cntrl.unblockFromCore = MessageBuffer(ordered = True) 600 sqc_cntrl.unblockFromCore.master = ruby_system.network.slave 601 602 sqc_cntrl.probeToSQC = MessageBuffer(ordered = True) 603 sqc_cntrl.probeToSQC.slave = ruby_system.network.master 604 605 sqc_cntrl.responseToSQC = MessageBuffer(ordered = True) 606 sqc_cntrl.responseToSQC.slave = ruby_system.network.master 607 608 sqc_cntrl.mandatoryQueue = MessageBuffer() 609 610 # SQC also in GPU cluster 611 gpuCluster.add(sqc_cntrl) 612 613 for i in xrange(options.num_cp): 614 615 tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, 616 number_of_TBEs = 2560) # max outstanding requests 617 tcp_cntrl.createCP(options, ruby_system, system) 618 619 exec("system.tcp_cntrl%d = tcp_cntrl" % (options.num_compute_units + i)) 620 # 621 # Add controllers and sequencers to the appropriate lists 622 # 623 cpu_sequencers.append(tcp_cntrl.sequencer) 624 tcp_cntrl_nodes.append(tcp_cntrl) 625 626 # Connect the TCP controller to the ruby network 627 tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True) 628 tcp_cntrl.requestFromTCP.master = ruby_system.network.slave 629 630 tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True) 631 tcp_cntrl.responseFromTCP.master = ruby_system.network.slave 632 633 tcp_cntrl.unblockFromCore = MessageBuffer(ordered = True) 634 tcp_cntrl.unblockFromCore.master = ruby_system.network.slave 635 636 tcp_cntrl.probeToTCP = MessageBuffer(ordered = True) 637 tcp_cntrl.probeToTCP.slave = ruby_system.network.master 638 639 tcp_cntrl.responseToTCP = MessageBuffer(ordered = True) 640 tcp_cntrl.responseToTCP.slave = ruby_system.network.master 641 642 tcp_cntrl.mandatoryQueue = MessageBuffer() 643 644 gpuCluster.add(tcp_cntrl) 645 646 sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) 647 sqc_cntrl.createCP(options, ruby_system, system) 648 649 exec("system.sqc_cntrl%d = sqc_cntrl" % (options.num_compute_units + i)) 650 # 651 # Add controllers and sequencers to the appropriate lists 652 # 653 cpu_sequencers.append(sqc_cntrl.sequencer) 654 655 # Connect the SQC controller to the ruby network 656 sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True) 657 sqc_cntrl.requestFromSQC.master = ruby_system.network.slave 658 659 sqc_cntrl.responseFromSQC = MessageBuffer(ordered = True) 660 sqc_cntrl.responseFromSQC.master = ruby_system.network.slave 661 662 sqc_cntrl.unblockFromCore = MessageBuffer(ordered = True) 663 sqc_cntrl.unblockFromCore.master = ruby_system.network.slave 664 665 sqc_cntrl.probeToSQC = MessageBuffer(ordered = True) 666 sqc_cntrl.probeToSQC.slave = ruby_system.network.master 667 668 sqc_cntrl.responseToSQC = MessageBuffer(ordered = True) 669 sqc_cntrl.responseToSQC.slave = ruby_system.network.master 670 671 sqc_cntrl.mandatoryQueue = MessageBuffer() 672 673 # SQC also in GPU cluster 674 gpuCluster.add(sqc_cntrl) 675 676 for i in xrange(options.num_tccs): 677 678 tcc_cntrl = TCCCntrl(TCC_select_num_bits = TCC_bits, 679 number_of_TBEs = options.num_compute_units * 2560) 680 #Enough TBEs for all TCP TBEs 681 tcc_cntrl.create(options, ruby_system, system) 682 tcc_cntrl_nodes.append(tcc_cntrl) 683 684 tccdir_cntrl = TCCDirCntrl(TCC_select_num_bits = TCC_bits, 685 number_of_TBEs = options.num_compute_units * 2560) 686 #Enough TBEs for all TCP TBEs 687 tccdir_cntrl.create(options, ruby_system, system) 688 tccdir_cntrl_nodes.append(tccdir_cntrl) 689 690 exec("system.tcc_cntrl%d = tcc_cntrl" % i) 691 exec("system.tccdir_cntrl%d = tccdir_cntrl" % i) 692 693 # connect all of the wire buffers between L3 and dirs up 694 req_to_tccdir = RubyWireBuffer() 695 resp_to_tccdir = RubyWireBuffer() 696 tcc_unblock_to_tccdir = RubyWireBuffer() 697 req_to_tcc = RubyWireBuffer() 698 probe_to_tcc = RubyWireBuffer() 699 resp_to_tcc = RubyWireBuffer() 700 701 tcc_cntrl.connectWireBuffers(req_to_tccdir, resp_to_tccdir, 702 tcc_unblock_to_tccdir, req_to_tcc, 703 probe_to_tcc, resp_to_tcc) 704 tccdir_cntrl.connectWireBuffers(req_to_tccdir, resp_to_tccdir, 705 tcc_unblock_to_tccdir, req_to_tcc, 706 probe_to_tcc, resp_to_tcc) 707 708 # Connect the TCC controller to the ruby network 709 tcc_cntrl.responseFromTCC = MessageBuffer(ordered = True) 710 tcc_cntrl.responseFromTCC.master = ruby_system.network.slave 711 712 tcc_cntrl.responseToTCC = MessageBuffer(ordered = True) 713 tcc_cntrl.responseToTCC.slave = ruby_system.network.master 714 715 # Connect the TCC Dir controller to the ruby network 716 tccdir_cntrl.requestFromTCP = MessageBuffer(ordered = True) 717 tccdir_cntrl.requestFromTCP.slave = ruby_system.network.master 718 719 tccdir_cntrl.responseFromTCP = MessageBuffer(ordered = True) 720 tccdir_cntrl.responseFromTCP.slave = ruby_system.network.master 721 722 tccdir_cntrl.unblockFromTCP = MessageBuffer(ordered = True) 723 tccdir_cntrl.unblockFromTCP.slave = ruby_system.network.master 724 725 tccdir_cntrl.probeToCore = MessageBuffer(ordered = True) 726 tccdir_cntrl.probeToCore.master = ruby_system.network.slave 727 728 tccdir_cntrl.responseToCore = MessageBuffer(ordered = True) 729 tccdir_cntrl.responseToCore.master = ruby_system.network.slave 730 731 tccdir_cntrl.probeFromNB = MessageBuffer() 732 tccdir_cntrl.probeFromNB.slave = ruby_system.network.master 733 734 tccdir_cntrl.responseFromNB = MessageBuffer() 735 tccdir_cntrl.responseFromNB.slave = ruby_system.network.master 736 737 tccdir_cntrl.requestToNB = MessageBuffer() 738 tccdir_cntrl.requestToNB.master = ruby_system.network.slave 739 740 tccdir_cntrl.responseToNB = MessageBuffer() 741 tccdir_cntrl.responseToNB.master = ruby_system.network.slave 742 743 tccdir_cntrl.unblockToNB = MessageBuffer() 744 tccdir_cntrl.unblockToNB.master = ruby_system.network.slave 745 746 tccdir_cntrl.triggerQueue = MessageBuffer(ordered = True) 747 748 # TCC cntrls added to the GPU cluster 749 gpuCluster.add(tcc_cntrl) 750 gpuCluster.add(tccdir_cntrl) 751 752 # Assuming no DMA devices 753 assert(len(dma_devices) == 0) 754 755 # Add cpu/gpu clusters to main cluster 756 mainCluster.add(cpuCluster) 757 mainCluster.add(gpuCluster) 758 759 ruby_system.network.number_of_virtual_networks = 10 760 761 return (cpu_sequencers, dir_cntrl_nodes, mainCluster) 762