Deleted Added
sdiff udiff text old ( 11837:17b37f38944a ) new ( 12340:a52f6d327259 )
full compact
1# Copyright (c) 2012-2013 ARM Limited
2# All rights reserved.
3#
4# The license below extends only to copyright in the software and shall
5# not be construed as granting a license to any other intellectual
6# property including but not limited to intellectual property relating
7# to a hardware implementation of the functionality of the software
8# licensed hereunder. You may use the software subject to the license

--- 108 unchanged lines hidden (view full) ---

117# connected.Through each crossbar only local vaults can be accessed. But to
118# support this architecture we need a crossbar between serial links and
119# processor.
120#
121# mixed: This is a hybrid architecture. It has 4 crossbars inside the HMC.
122# 2 Crossbars are connected to only local vaults. From other 2 crossbar, a
123# request can be forwarded to any other vault.
124
125import argparse
126
127import m5
128from m5.objects import *
129from m5.util import *
130
131
132def add_options(parser):
133 # *****************************CROSSBAR PARAMETERS*************************
134 # Flit size of the main interconnect [1]
135 parser.add_argument("--xbar-width", default=32, action="store", type=int,
136 help="Data width of the main XBar (Bytes)")
137
138 # Clock frequency of the main interconnect [1]
139 # This crossbar, is placed on the logic-based of the HMC and it has its
140 # own voltage and clock domains, different from the DRAM dies or from the
141 # host.
142 parser.add_argument("--xbar-frequency", default='1GHz', type=str,
143 help="Clock Frequency of the main XBar")
144
145 # Arbitration latency of the HMC XBar [1]
146 parser.add_argument("--xbar-frontend-latency", default=1, action="store",
147 type=int, help="Arbitration latency of the XBar")
148
149 # Latency to forward a packet via the interconnect [1](two levels of FIFOs
150 # at the input and output of the inteconnect)
151 parser.add_argument("--xbar-forward-latency", default=2, action="store",
152 type=int, help="Forward latency of the XBar")
153
154 # Latency to forward a response via the interconnect [1](two levels of
155 # FIFOs at the input and output of the inteconnect)
156 parser.add_argument("--xbar-response-latency", default=2, action="store",
157 type=int, help="Response latency of the XBar")
158
159 # number of cross which connects 16 Vaults to serial link[7]
160 parser.add_argument("--number-mem-crossbar", default=4, action="store",
161 type=int, help="Number of crossbar in HMC")
162
163 # *****************************SERIAL LINK PARAMETERS**********************
164 # Number of serial links controllers [1]
165 parser.add_argument("--num-links-controllers", default=4, action="store",
166 type=int, help="Number of serial links")
167
168 # Number of packets (not flits) to store at the request side of the serial
169 # link. This number should be adjusted to achive required bandwidth
170 parser.add_argument("--link-buffer-size-req", default=10, action="store",
171 type=int, help="Number of packets to buffer at the\
172 request side of the serial link")
173
174 # Number of packets (not flits) to store at the response side of the serial
175 # link. This number should be adjusted to achive required bandwidth
176 parser.add_argument("--link-buffer-size-rsp", default=10, action="store",
177 type=int, help="Number of packets to buffer at the\
178 response side of the serial link")
179
180 # Latency of the serial link composed by SER/DES latency (1.6ns [4]) plus
181 # the PCB trace latency (3ns Estimated based on [5])
182 parser.add_argument("--link-latency", default='4.6ns', type=str,
183 help="Latency of the serial links")
184
185 # Clock frequency of the each serial link(SerDes) [1]
186 parser.add_argument("--link-frequency", default='10GHz', type=str,
187 help="Clock Frequency of the serial links")
188
189 # Clock frequency of serial link Controller[6]
190 # clk_hmc[Mhz]= num_lanes_per_link * lane_speed [Gbits/s] /
191 # data_path_width * 10^6
192 # clk_hmc[Mhz]= 16 * 10 Gbps / 256 * 10^6 = 625 Mhz
193 parser.add_argument("--link-controller-frequency", default='625MHz',
194 type=str, help="Clock Frequency of the link\
195 controller")
196
197 # Latency of the serial link controller to process the packets[1][6]
198 # (ClockDomain = 625 Mhz )
199 # used here for calculations only
200 parser.add_argument("--link-ctrl-latency", default=4, action="store",
201 type=int, help="The number of cycles required for the\
202 controller to process the packet")
203
204 # total_ctrl_latency = link_ctrl_latency + link_latency
205 # total_ctrl_latency = 4(Cycles) * 1.6 ns + 4.6 ns
206 parser.add_argument("--total-ctrl-latency", default='11ns', type=str,
207 help="The latency experienced by every packet\
208 regardless of size of packet")
209
210 # Number of parallel lanes in each serial link [1]
211 parser.add_argument("--num-lanes-per-link", default=16, action="store",
212 type=int, help="Number of lanes per each link")
213
214 # Number of serial links [1]
215 parser.add_argument("--num-serial-links", default=4, action="store",
216 type=int, help="Number of serial links")
217
218 # speed of each lane of serial link - SerDes serial interface 10 Gb/s
219 parser.add_argument("--serial-link-speed", default=10, action="store",
220 type=int, help="Gbs/s speed of each lane of serial\
221 link")
222
223 # address range for each of the serial links
224 parser.add_argument("--serial-link-addr-range", default='1GB', type=str,
225 help="memory range for each of the serial links.\
226 Default: 1GB")
227
228 # *****************************PERFORMANCE MONITORING*********************
229 # The main monitor behind the HMC Controller
230 parser.add_argument("--enable-global-monitor", action="store_true",
231 help="The main monitor behind the HMC Controller")
232
233 # The link performance monitors
234 parser.add_argument("--enable-link-monitor", action="store_true",
235 help="The link monitors")
236
237 # link aggregator enable - put a cross between buffers & links
238 parser.add_argument("--enable-link-aggr", action="store_true", help="The\
239 crossbar between port and Link Controller")
240
241 parser.add_argument("--enable-buff-div", action="store_true",
242 help="Memory Range of Buffer is ivided between total\
243 range")
244
245 # *****************************HMC ARCHITECTURE **************************
246 # Memory chunk for 16 vault - numbers of vault / number of crossbars
247 parser.add_argument("--mem-chunk", default=4, action="store", type=int,
248 help="Chunk of memory range for each cross bar in\
249 arch 0")
250
251 # size of req buffer within crossbar, used for modelling extra latency
252 # when the reuqest go to non-local vault
253 parser.add_argument("--xbar-buffer-size-req", default=10, action="store",
254 type=int, help="Number of packets to buffer at the\
255 request side of the crossbar")
256
257 # size of response buffer within crossbar, used for modelling extra latency
258 # when the response received from non-local vault
259 parser.add_argument("--xbar-buffer-size-resp", default=10, action="store",
260 type=int, help="Number of packets to buffer at the\
261 response side of the crossbar")
262 # HMC device architecture. It affects the HMC host controller as well
263 parser.add_argument("--arch", type=str, choices=["same", "distributed",
264 "mixed"], default="distributed", help="same: HMC with\
265 4 links, all with same range.\ndistributed: HMC with\
266 4 links with distributed range.\nmixed: mixed with\
267 same and distributed range.\nDefault: distributed")
268 # HMC device - number of vaults
269 parser.add_argument("--hmc-dev-num-vaults", default=16, action="store",
270 type=int, help="number of independent vaults within\
271 the HMC device. Note: each vault has a memory\
272 controller (valut controller)\nDefault: 16")
273 # HMC device - vault capacity or size
274 parser.add_argument("--hmc-dev-vault-size", default='256MB', type=str,
275 help="vault storage capacity in bytes. Default:\
276 256MB")
277 parser.add_argument("--mem-type", type=str, choices=["HMC_2500_1x32"],
278 default="HMC_2500_1x32", help="type of HMC memory to\
279 use. Default: HMC_2500_1x32")
280 parser.add_argument("--mem-channels", default=1, action="store", type=int,
281 help="Number of memory channels")
282 parser.add_argument("--mem-ranks", default=1, action="store", type=int,
283 help="Number of ranks to iterate across")
284 parser.add_argument("--burst-length", default=256, action="store",
285 type=int, help="burst length in bytes. Note: the\
286 cache line size will be set to this value.\nDefault:\
287 256")
288
289
290# configure HMC host controller
291def config_hmc_host_ctrl(opt, system):
292
293 # create HMC host controller
294 system.hmc_host = SubSystem()
295
296 # Create additional crossbar for arch1
297 if opt.arch == "distributed" or opt.arch == "mixed":
298 clk = '100GHz'
299 vd = VoltageDomain(voltage='1V')
300 # Create additional crossbar for arch1
301 system.membus = NoncoherentXBar(width=8)
302 system.membus.badaddr_responder = BadAddr()
303 system.membus.default = Self.badaddr_responder.pio
304 system.membus.width = 8
305 system.membus.frontend_latency = 3
306 system.membus.forward_latency = 4
307 system.membus.response_latency = 2
308 cd = SrcClockDomain(clock=clk, voltage_domain=vd)
309 system.membus.clk_domain = cd
310
311 # create memory ranges for the serial links
312 slar = convert.toMemorySize(opt.serial_link_addr_range)
313 # Memmory ranges of serial link for arch-0. Same as the ranges of vault
314 # controllers (4 vaults to 1 serial link)
315 if opt.arch == "same":
316 ser_ranges = [AddrRange(0, (4*slar)-1) for i in
317 range(opt.num_serial_links)]
318 # Memmory ranges of serial link for arch-1. Distributed range accross
319 # links
320 if opt.arch == "distributed":
321 ser_ranges = [AddrRange(i*slar, ((i+1)*slar)-1) for i in
322 range(opt.num_serial_links)]
323 # Memmory ranges of serial link for arch-2 'Mixed' address distribution
324 # over links
325 if opt.arch == "mixed":
326 ser_range0 = AddrRange(0, (1*slar)-1)
327 ser_range1 = AddrRange(1*slar, 2*slar-1)
328 ser_range2 = AddrRange(0, (4*slar)-1)
329 ser_range3 = AddrRange(0, (4*slar)-1)
330 ser_ranges = [ser_range0, ser_range1, ser_range2, ser_range3]
331
332 # Serial link Controller with 16 SerDes links at 10 Gbps with serial link
333 # ranges w.r.t to architecture
334 sl = [SerialLink(ranges=ser_ranges[i],
335 req_size=opt.link_buffer_size_req,
336 resp_size=opt.link_buffer_size_rsp,
337 num_lanes=opt.num_lanes_per_link,
338 link_speed=opt.serial_link_speed,
339 delay=opt.total_ctrl_latency) for i in
340 xrange(opt.num_serial_links)]
341 system.hmc_host.seriallink = sl
342
343 # enable global monitor
344 if opt.enable_global_monitor:
345 system.hmc_host.lmonitor = [CommMonitor() for i in
346 xrange(opt.num_serial_links)]
347
348 # set the clock frequency for serial link
349 for i in xrange(opt.num_serial_links):
350 clk = opt.link_controller_frequency
351 vd = VoltageDomain(voltage='1V')
352 scd = SrcClockDomain(clock=clk, voltage_domain=vd)
353 system.hmc_host.seriallink[i].clk_domain = scd
354
355 # Connect membus/traffic gen to Serial Link Controller for differrent HMC
356 # architectures
357 hh = system.hmc_host
358 if opt.arch == "distributed":
359 mb = system.membus
360 for i in xrange(opt.num_links_controllers):
361 if opt.enable_global_monitor:
362 mb.master = hh.lmonitor[i].slave
363 hh.lmonitor[i].master = hh.seriallink[i].slave
364 else:
365 mb.master = hh.seriallink[i].slave
366 if opt.arch == "mixed":
367 mb = system.membus
368 if opt.enable_global_monitor:
369 mb.master = hh.lmonitor[0].slave
370 hh.lmonitor[0].master = hh.seriallink[0].slave
371 mb.master = hh.lmonitor[1].slave
372 hh.lmonitor[1].master = hh.seriallink[1].slave
373 else:
374 mb.master = hh.seriallink[0].slave
375 mb.master = hh.seriallink[1].slave
376
377 if opt.arch == "same":
378 for i in xrange(opt.num_links_controllers):
379 if opt.enable_global_monitor:
380 hh.lmonitor[i].master = hh.seriallink[i].slave
381
382 return system
383
384
385# Create an HMC device
386def config_hmc_dev(opt, system, hmc_host):
387
388 # create HMC device
389 system.hmc_dev = SubSystem()
390
391 # create memory ranges for the vault controllers
392 arv = convert.toMemorySize(opt.hmc_dev_vault_size)
393 addr_ranges_vaults = [AddrRange(i*arv, ((i+1)*arv-1)) for i in
394 range(opt.hmc_dev_num_vaults)]
395 system.mem_ranges = addr_ranges_vaults
396
397 if opt.enable_link_monitor:
398 lm = [CommMonitor() for i in xrange(opt.num_links_controllers)]
399 system.hmc_dev.lmonitor = lm
400
401 # 4 HMC Crossbars located in its logic-base (LoB)
402 xb = [NoncoherentXBar(width=opt.xbar_width,
403 frontend_latency=opt.xbar_frontend_latency,
404 forward_latency=opt.xbar_forward_latency,
405 response_latency=opt.xbar_response_latency) for i in
406 xrange(opt.number_mem_crossbar)]
407 system.hmc_dev.xbar = xb
408
409 for i in xrange(opt.number_mem_crossbar):
410 clk = opt.xbar_frequency
411 vd = VoltageDomain(voltage='1V')
412 scd = SrcClockDomain(clock=clk, voltage_domain=vd)
413 system.hmc_dev.xbar[i].clk_domain = scd
414
415 # Attach 4 serial link to 4 crossbar/s
416 for i in xrange(opt.num_serial_links):
417 if opt.enable_link_monitor:
418 system.hmc_host.seriallink[i].master = \
419 system.hmc_dev.lmonitor[i].slave
420 system.hmc_dev.lmonitor[i].master = system.hmc_dev.xbar[i].slave
421 else:
422 system.hmc_host.seriallink[i].master = system.hmc_dev.xbar[i].slave
423
424 # Connecting xbar with each other for request arriving at the wrong xbar,
425 # then it will be forward to correct xbar. Bridge is used to connect xbars
426 if opt.arch == "same":
427 numx = len(system.hmc_dev.xbar)
428
429 # create a list of buffers
430 system.hmc_dev.buffers = [Bridge(req_size=opt.xbar_buffer_size_req,
431 resp_size=opt.xbar_buffer_size_resp)
432 for i in xrange(numx*(opt.mem_chunk-1))]
433
434 # Buffer iterator
435 it = iter(range(len(system.hmc_dev.buffers)))
436
437 # necesarry to add system_port to one of the xbar
438 system.system_port = system.hmc_dev.xbar[3].slave
439
440 # iterate over all the crossbars and connect them as required
441 for i in range(numx):
442 for j in range(numx):
443 # connect xbar to all other xbars except itself
444 if i != j:
445 # get the next index of buffer
446 index = it.next()
447
448 # Change the default values for ranges of bridge
449 system.hmc_dev.buffers[index].ranges = system.mem_ranges[
450 j * int(opt.mem_chunk):
451 (j + 1) * int(opt.mem_chunk)]
452
453 # Connect the bridge between corssbars
454 system.hmc_dev.xbar[i].master = system.hmc_dev.buffers[
455 index].slave
456 system.hmc_dev.buffers[
457 index].master = system.hmc_dev.xbar[j].slave
458 else:
459 # Don't connect the xbar to itself
460 pass
461
462 # Two crossbars are connected to all other crossbars-Other 2 vault
463 # can only direct traffic to it local vaults
464 if opt.arch == "mixed":
465 system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4])
466 system.hmc_dev.xbar[3].master = system.hmc_dev.buffer30.slave
467 system.hmc_dev.buffer30.master = system.hmc_dev.xbar[0].slave
468
469 system.hmc_dev.buffer31 = Bridge(ranges=system.mem_ranges[4:8])
470 system.hmc_dev.xbar[3].master = system.hmc_dev.buffer31.slave
471 system.hmc_dev.buffer31.master = system.hmc_dev.xbar[1].slave
472
473 system.hmc_dev.buffer32 = Bridge(ranges=system.mem_ranges[8:12])
474 system.hmc_dev.xbar[3].master = system.hmc_dev.buffer32.slave
475 system.hmc_dev.buffer32.master = system.hmc_dev.xbar[2].slave
476
477 system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4])
478 system.hmc_dev.xbar[2].master = system.hmc_dev.buffer20.slave
479 system.hmc_dev.buffer20.master = system.hmc_dev.xbar[0].slave
480
481 system.hmc_dev.buffer21 = Bridge(ranges=system.mem_ranges[4:8])
482 system.hmc_dev.xbar[2].master = system.hmc_dev.buffer21.slave
483 system.hmc_dev.buffer21.master = system.hmc_dev.xbar[1].slave
484
485 system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16])
486 system.hmc_dev.xbar[2].master = system.hmc_dev.buffer23.slave
487 system.hmc_dev.buffer23.master = system.hmc_dev.xbar[3].slave