HMC.py (11551:d24ad08b22b0) HMC.py (11837:17b37f38944a)
1# Copyright (c) 2012-2013 ARM Limited
2# All rights reserved.
3#
4# The license below extends only to copyright in the software and shall
5# not be construed as granting a license to any other intellectual
6# property including but not limited to intellectual property relating
7# to a hardware implementation of the functionality of the software
8# licensed hereunder. You may use the software subject to the license
9# terms below provided that you ensure that this notice is replicated
10# unmodified and in its entirety in all distributions of the software,
11# modified or unmodified, in source code or in binary form.
12#
13# Copyright (c) 2015 The University of Bologna
14# All rights reserved.
15#
16# Redistribution and use in source and binary forms, with or without
17# modification, are permitted provided that the following conditions are
18# met: redistributions of source code must retain the above copyright
19# notice, this list of conditions and the following disclaimer;
20# redistributions in binary form must reproduce the above copyright
21# notice, this list of conditions and the following disclaimer in the
22# documentation and/or other materials provided with the distribution;
23# neither the name of the copyright holders nor the names of its
24# contributors may be used to endorse or promote products derived from
25# this software without specific prior written permission.
26#
27# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38#
39# Authors: Erfan Azarkhish
40# Abdul Mutaal Ahmad
41
42# A Simplified model of a complete HMC device. Based on:
43# [1] http://www.hybridmemorycube.org/specification-download/
44# [2] High performance AXI-4.0 based interconnect for extensible smart memory
45# cubes(E. Azarkhish et. al)
46# [3] Low-Power Hybrid Memory Cubes With Link Power Management and Two-Level
47# Prefetching (J. Ahn et. al)
48# [4] Memory-centric system interconnect design with Hybrid Memory Cubes
49# (G. Kim et. al)
50# [5] Near Data Processing, Are we there yet? (M. Gokhale)
51# http://www.cs.utah.edu/wondp/gokhale.pdf
52# [6] openHMC - A Configurable Open-Source Hybrid Memory Cube Controller
53# (J. Schmidt)
54# [7] Hybrid Memory Cube performance characterization on data-centric
55# workloads (M. Gokhale)
56#
57# This script builds a complete HMC device composed of vault controllers,
58# serial links, the main internal crossbar, and an external hmc controller.
59#
60# - VAULT CONTROLLERS:
1# Copyright (c) 2012-2013 ARM Limited
2# All rights reserved.
3#
4# The license below extends only to copyright in the software and shall
5# not be construed as granting a license to any other intellectual
6# property including but not limited to intellectual property relating
7# to a hardware implementation of the functionality of the software
8# licensed hereunder. You may use the software subject to the license
9# terms below provided that you ensure that this notice is replicated
10# unmodified and in its entirety in all distributions of the software,
11# modified or unmodified, in source code or in binary form.
12#
13# Copyright (c) 2015 The University of Bologna
14# All rights reserved.
15#
16# Redistribution and use in source and binary forms, with or without
17# modification, are permitted provided that the following conditions are
18# met: redistributions of source code must retain the above copyright
19# notice, this list of conditions and the following disclaimer;
20# redistributions in binary form must reproduce the above copyright
21# notice, this list of conditions and the following disclaimer in the
22# documentation and/or other materials provided with the distribution;
23# neither the name of the copyright holders nor the names of its
24# contributors may be used to endorse or promote products derived from
25# this software without specific prior written permission.
26#
27# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38#
39# Authors: Erfan Azarkhish
40# Abdul Mutaal Ahmad
41
42# A Simplified model of a complete HMC device. Based on:
43# [1] http://www.hybridmemorycube.org/specification-download/
44# [2] High performance AXI-4.0 based interconnect for extensible smart memory
45# cubes(E. Azarkhish et. al)
46# [3] Low-Power Hybrid Memory Cubes With Link Power Management and Two-Level
47# Prefetching (J. Ahn et. al)
48# [4] Memory-centric system interconnect design with Hybrid Memory Cubes
49# (G. Kim et. al)
50# [5] Near Data Processing, Are we there yet? (M. Gokhale)
51# http://www.cs.utah.edu/wondp/gokhale.pdf
52# [6] openHMC - A Configurable Open-Source Hybrid Memory Cube Controller
53# (J. Schmidt)
54# [7] Hybrid Memory Cube performance characterization on data-centric
55# workloads (M. Gokhale)
56#
57# This script builds a complete HMC device composed of vault controllers,
58# serial links, the main internal crossbar, and an external hmc controller.
59#
60# - VAULT CONTROLLERS:
61# Instances of the HMC_2500_x32 class with their functionality specified in
61# Instances of the HMC_2500_1x32 class with their functionality specified in
62# dram_ctrl.cc
63#
64# - THE MAIN XBAR:
65# This component is simply an instance of the NoncoherentXBar class, and its
66# parameters are tuned to [2].
67#
68# - SERIAL LINKS CONTROLLER:
69# SerialLink is a simple variation of the Bridge class, with the ability to
70# account for the latency of packet serialization and controller latency. We
71# assume that the serializer component at the transmitter side does not need
72# to receive the whole packet to start the serialization. But the
73# deserializer waits for the complete packet to check its integrity first.
74#
75# * Bandwidth of the serial links is not modeled in the SerialLink component
76# itself.
77#
78# * Latency of serial link controller is composed of SerDes latency + link
79# controller
80#
81# * It is inferred from the standard [1] and the literature [3] that serial
82# links share the same address range and packets can travel over any of
83# them so a load distribution mechanism is required among them.
84#
85# -----------------------------------------
86# | Host/HMC Controller |
87# | ---------------------- |
88# | | Link Aggregator | opt |
89# | ---------------------- |
90# | ---------------------- |
91# | | Serial Link + Ser | * 4 |
92# | ---------------------- |
93# |---------------------------------------
94# -----------------------------------------
95# | Device
96# | ---------------------- |
97# | | Xbar | * 4 |
98# | ---------------------- |
99# | ---------------------- |
100# | | Vault Controller | * 16 |
101# | ---------------------- |
102# | ---------------------- |
103# | | Memory | |
104# | ---------------------- |
105# |---------------------------------------|
106#
107# In this version we have present 3 different HMC archiecture along with
108# alongwith their corresponding test script.
109#
110# same: It has 4 crossbars in HMC memory. All the crossbars are connected
111# to each other, providing complete memory range. This archicture also covers
112# the added latency for sending a request to non-local vault(bridge in b/t
113# crossbars). All the 4 serial links can access complete memory. So each
114# link can be connected to separate processor.
115#
116# distributed: It has 4 crossbars inside the HMC. Crossbars are not
117# connected.Through each crossbar only local vaults can be accessed. But to
118# support this architecture we need a crossbar between serial links and
119# processor.
120#
121# mixed: This is a hybrid architecture. It has 4 crossbars inside the HMC.
122# 2 Crossbars are connected to only local vaults. From other 2 crossbar, a
123# request can be forwarded to any other vault.
124
125import optparse
126
127import m5
128from m5.objects import *
129
130# A single Hybrid Memory Cube (HMC)
131class HMCSystem(SubSystem):
132 #*****************************CROSSBAR PARAMETERS*************************
133 # Flit size of the main interconnect [1]
134 xbar_width = Param.Unsigned(32, "Data width of the main XBar (Bytes)")
135
136 # Clock frequency of the main interconnect [1]
137 # This crossbar, is placed on the logic-based of the HMC and it has its
138 # own voltage and clock domains, different from the DRAM dies or from the
139 # host.
140 xbar_frequency = Param.Frequency('1GHz', "Clock Frequency of the main "
141 "XBar")
142
143 # Arbitration latency of the HMC XBar [1]
144 xbar_frontend_latency = Param.Cycles(1, "Arbitration latency of the XBar")
145
146 # Latency to forward a packet via the interconnect [1](two levels of FIFOs
147 # at the input and output of the inteconnect)
148 xbar_forward_latency = Param.Cycles(2, "Forward latency of the XBar")
149
150 # Latency to forward a response via the interconnect [1](two levels of
151 # FIFOs at the input and output of the inteconnect)
152 xbar_response_latency = Param.Cycles(2, "Response latency of the XBar")
153
154 # number of cross which connects 16 Vaults to serial link[7]
155 number_mem_crossbar = Param.Unsigned(4, "Number of crossbar in HMC"
156 )
157
158 #*****************************SERIAL LINK PARAMETERS***********************
159 # Number of serial links controllers [1]
160 num_links_controllers = Param.Unsigned(4, "Number of serial links")
161
162 # Number of packets (not flits) to store at the request side of the serial
163 # link. This number should be adjusted to achive required bandwidth
164 link_buffer_size_req = Param.Unsigned(10, "Number of packets to buffer "
165 "at the request side of the serial link")
166
167 # Number of packets (not flits) to store at the response side of the serial
168 # link. This number should be adjusted to achive required bandwidth
169 link_buffer_size_rsp = Param.Unsigned(10, "Number of packets to buffer "
170 "at the response side of the serial link")
171
172 # Latency of the serial link composed by SER/DES latency (1.6ns [4]) plus
173 # the PCB trace latency (3ns Estimated based on [5])
174 link_latency = Param.Latency('4.6ns', "Latency of the serial links")
175
176 # Clock frequency of the each serial link(SerDes) [1]
177 link_frequency = Param.Frequency('10GHz', "Clock Frequency of the serial"
178 "links")
179
180 # Clock frequency of serial link Controller[6]
181 # clk_hmc[Mhz]= num_lanes_per_link * lane_speed [Gbits/s] /
182 # data_path_width * 10^6
183 # clk_hmc[Mhz]= 16 * 10 Gbps / 256 * 10^6 = 625 Mhz
184 link_controller_frequency = Param.Frequency('625MHz',
185 "Clock Frequency of the link controller")
186
187 # Latency of the serial link controller to process the packets[1][6]
188 # (ClockDomain = 625 Mhz )
189 # used here for calculations only
190 link_ctrl_latency = Param.Cycles(4, "The number of cycles required for the"
191 "controller to process the packet")
192
193 # total_ctrl_latency = link_ctrl_latency + link_latency
194 # total_ctrl_latency = 4(Cycles) * 1.6 ns + 4.6 ns
195 total_ctrl_latency = Param.Latency('11ns', "The latency experienced by"
196 "every packet regardless of size of packet")
197
198 # Number of parallel lanes in each serial link [1]
199 num_lanes_per_link = Param.Unsigned( 16, "Number of lanes per each link")
200
201 # Number of serial links [1]
202 num_serial_links = Param.Unsigned(4, "Number of serial links")
203
204 # speed of each lane of serial link - SerDes serial interface 10 Gb/s
205 serial_link_speed = Param.UInt64(10, "Gbs/s speed of each lane of"
206 "serial link")
207
208 #*****************************PERFORMANCE MONITORING************************
209 # The main monitor behind the HMC Controller
210 enable_global_monitor = Param.Bool(False, "The main monitor behind the "
211 "HMC Controller")
212
213 # The link performance monitors
214 enable_link_monitor = Param.Bool(False, "The link monitors" )
215
216 # link aggregator enable - put a cross between buffers & links
217 enable_link_aggr = Param.Bool(False, "The crossbar between port and "
218 "Link Controller")
219
220 enable_buff_div = Param.Bool(True, "Memory Range of Buffer is"
221 "divided between total range")
222
223 #*****************************HMC ARCHITECTURE ************************
224 # Memory chunk for 16 vault - numbers of vault / number of crossbars
225 mem_chunk = Param.Unsigned(4, "Chunk of memory range for each cross bar "
226 "in arch 0")
227
228 # size of req buffer within crossbar, used for modelling extra latency
229 # when the reuqest go to non-local vault
230 xbar_buffer_size_req = Param.Unsigned(10, "Number of packets to buffer "
231 "at the request side of the crossbar")
232
233 # size of response buffer within crossbar, used for modelling extra latency
234 # when the response received from non-local vault
235 xbar_buffer_size_resp = Param.Unsigned(10, "Number of packets to buffer "
236 "at the response side of the crossbar")
237
238# configure host system with Serial Links
239def config_host_hmc(options, system):
240
241 system.hmc_host=HMCSystem()
242
243 try:
244 system.hmc_host.enable_global_monitor = options.enable_global_monitor
245 except:
246 pass;
247
248 try:
249 system.hmc_host.enable_link_monitor = options.enable_link_monitor
250 except:
251 pass;
252
253 # Serial link Controller with 16 SerDes links at 10 Gbps
254 # with serial link ranges w.r.t to architecture
255 system.hmc_host.seriallink = [SerialLink(ranges = options.ser_ranges[i],
256 req_size=system.hmc_host.link_buffer_size_req,
257 resp_size=system.hmc_host.link_buffer_size_rsp,
258 num_lanes=system.hmc_host.num_lanes_per_link,
259 link_speed=system.hmc_host.serial_link_speed,
260 delay=system.hmc_host.total_ctrl_latency)
261 for i in xrange(system.hmc_host.num_serial_links)]
262
263 # enable global monitor
264 if system.hmc_host.enable_global_monitor:
265 system.hmc_host.lmonitor = [ CommMonitor()
266 for i in xrange(system.hmc_host.num_serial_links)]
267
268 # set the clock frequency for serial link
269 for i in xrange(system.hmc_host.num_serial_links):
270 system.hmc_host.seriallink[i].clk_domain = SrcClockDomain(clock=system.
271 hmc_host.link_controller_frequency, voltage_domain=
272 VoltageDomain(voltage = '1V'))
273
274 # Connect membus/traffic gen to Serial Link Controller for differrent HMC
275 # architectures
276 if options.arch == "distributed":
277 for i in xrange(system.hmc_host.num_links_controllers):
278 if system.hmc_host.enable_global_monitor:
279 system.membus.master = system.hmc_host.lmonitor[i].slave
280 system.hmc_host.lmonitor[i].master = \
281 system.hmc_host.seriallink[i].slave
282 else:
283 system.membus.master = system.hmc_host.seriallink[i].slave
284 if options.arch == "mixed":
285 if system.hmc_host.enable_global_monitor:
286 system.membus.master = system.hmc_host.lmonitor[0].slave
287 system.hmc_host.lmonitor[0].master = \
288 system.hmc_host.seriallink[0].slave
289
290 system.membus.master = system.hmc_host.lmonitor[1].slave
291 system.hmc_host.lmonitor[1].master = \
292 system.hmc_host.seriallink[1].slave
293
294 system.tgen[2].port = system.hmc_host.lmonitor[2].slave
295 system.hmc_host.lmonitor[2].master = \
296 system.hmc_host.seriallink[2].slave
297
298 system.tgen[3].port = system.hmc_host.lmonitor[3].slave
299 system.hmc_host.lmonitor[3].master = \
300 system.hmc_host.seriallink[3].slave
301 else:
302 system.membus.master = system.hmc_host.seriallink[0].slave
303 system.membus.master = system.hmc_host.seriallink[1].slave
304 system.tgen[2].port = system.hmc_host.seriallink[2].slave
305 system.tgen[3].port = system.hmc_host.seriallink[3].slave
306 if options.arch == "same" :
307 for i in xrange(system.hmc_host.num_links_controllers):
308 if system.hmc_host.enable_global_monitor:
309 system.tgen[i].port = system.hmc_host.lmonitor[i].slave
310 system.hmc_host.lmonitor[i].master = \
311 system.hmc_host.seriallink[i].slave
312 else:
313 system.tgen[i].port = system.hmc_host.seriallink[i].slave
314
315 return system
316
317# Create an HMC device and attach it to the current system
318def config_hmc(options, system, hmc_host):
319
320 # Create HMC device
321 system.hmc_dev = HMCSystem()
322
323 # Global monitor
324 try:
325 system.hmc_dev.enable_global_monitor = options.enable_global_monitor
326 except:
327 pass;
328
329 try:
330 system.hmc_dev.enable_link_monitor = options.enable_link_monitor
331 except:
332 pass;
333
334
335 if system.hmc_dev.enable_link_monitor:
336 system.hmc_dev.lmonitor = [ CommMonitor()
337 for i in xrange(system.hmc_dev.num_links_controllers)]
338
339 # 4 HMC Crossbars located in its logic-base (LoB)
340 system.hmc_dev.xbar = [ NoncoherentXBar(width=system.hmc_dev.xbar_width,
341 frontend_latency=system.hmc_dev.xbar_frontend_latency,
342 forward_latency=system.hmc_dev.xbar_forward_latency,
343 response_latency=system.hmc_dev.xbar_response_latency )
344 for i in xrange(system.hmc_host.number_mem_crossbar)]
345
346 for i in xrange(system.hmc_dev.number_mem_crossbar):
347 system.hmc_dev.xbar[i].clk_domain = SrcClockDomain(
348 clock=system.hmc_dev.xbar_frequency,voltage_domain=
349 VoltageDomain(voltage='1V'))
350
351 # Attach 4 serial link to 4 crossbar/s
352 for i in xrange(system.hmc_dev.num_serial_links):
353 if system.hmc_dev.enable_link_monitor:
354 system.hmc_host.seriallink[i].master = \
355 system.hmc_dev.lmonitor[i].slave
356 system.hmc_dev.lmonitor[i].master = system.hmc_dev.xbar[i].slave
357 else:
358 system.hmc_host.seriallink[i].master = system.hmc_dev.xbar[i].slave
359
360 # Connecting xbar with each other for request arriving at the wrong xbar,
361 # then it will be forward to correct xbar. Bridge is used to connect xbars
362 if options.arch == "same":
363 numx = len(system.hmc_dev.xbar)
364
365 # create a list of buffers
366 system.hmc_dev.buffers = [ Bridge(
367 req_size=system.hmc_dev.xbar_buffer_size_req,
368 resp_size=system.hmc_dev.xbar_buffer_size_resp)
369 for i in xrange(numx * (system.hmc_dev.mem_chunk - 1))]
370
371 # Buffer iterator
372 it = iter(range(len(system.hmc_dev.buffers)))
373
374 # necesarry to add system_port to one of the xbar
375 system.system_port = system.hmc_dev.xbar[3].slave
376
377 # iterate over all the crossbars and connect them as required
378 for i in range(numx):
379 for j in range(numx):
380 # connect xbar to all other xbars except itself
381 if i != j:
382 # get the next index of buffer
383 index = it.next()
384
385 # Change the default values for ranges of bridge
386 system.hmc_dev.buffers[index].ranges = system.mem_ranges[
387 j * int(system.hmc_dev.mem_chunk):
388 (j + 1) * int(system.hmc_dev.mem_chunk)]
389
390 # Connect the bridge between corssbars
391 system.hmc_dev.xbar[i].master = system.hmc_dev.buffers[
392 index].slave
393 system.hmc_dev.buffers[
394 index].master = system.hmc_dev.xbar[j].slave
395 else:
396 # Don't connect the xbar to itself
397 pass
398
399 # Two crossbars are connected to all other crossbars-Other 2 vault
400 # can only direct traffic to it local vaults
401 if options.arch == "mixed":
402
403 system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4])
404 system.hmc_dev.xbar[3].master = system.hmc_dev.buffer30.slave
405 system.hmc_dev.buffer30.master = system.hmc_dev.xbar[0].slave
406
407 system.hmc_dev.buffer31 = Bridge(ranges=system.mem_ranges[4:8])
408 system.hmc_dev.xbar[3].master = system.hmc_dev.buffer31.slave
409 system.hmc_dev.buffer31.master = system.hmc_dev.xbar[1].slave
410
411 system.hmc_dev.buffer32 = Bridge(ranges=system.mem_ranges[8:12])
412 system.hmc_dev.xbar[3].master = system.hmc_dev.buffer32.slave
413 system.hmc_dev.buffer32.master = system.hmc_dev.xbar[2].slave
414
415
416 system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4])
417 system.hmc_dev.xbar[2].master = system.hmc_dev.buffer20.slave
418 system.hmc_dev.buffer20.master = system.hmc_dev.xbar[0].slave
419
420 system.hmc_dev.buffer21 = Bridge(ranges=system.mem_ranges[4:8])
421 system.hmc_dev.xbar[2].master = system.hmc_dev.buffer21.slave
422 system.hmc_dev.buffer21.master = system.hmc_dev.xbar[1].slave
423
424 system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16])
425 system.hmc_dev.xbar[2].master = system.hmc_dev.buffer23.slave
426 system.hmc_dev.buffer23.master = system.hmc_dev.xbar[3].slave
427
62# dram_ctrl.cc
63#
64# - THE MAIN XBAR:
65# This component is simply an instance of the NoncoherentXBar class, and its
66# parameters are tuned to [2].
67#
68# - SERIAL LINKS CONTROLLER:
69# SerialLink is a simple variation of the Bridge class, with the ability to
70# account for the latency of packet serialization and controller latency. We
71# assume that the serializer component at the transmitter side does not need
72# to receive the whole packet to start the serialization. But the
73# deserializer waits for the complete packet to check its integrity first.
74#
75# * Bandwidth of the serial links is not modeled in the SerialLink component
76# itself.
77#
78# * Latency of serial link controller is composed of SerDes latency + link
79# controller
80#
81# * It is inferred from the standard [1] and the literature [3] that serial
82# links share the same address range and packets can travel over any of
83# them so a load distribution mechanism is required among them.
84#
85# -----------------------------------------
86# | Host/HMC Controller |
87# | ---------------------- |
88# | | Link Aggregator | opt |
89# | ---------------------- |
90# | ---------------------- |
91# | | Serial Link + Ser | * 4 |
92# | ---------------------- |
93# |---------------------------------------
94# -----------------------------------------
95# | Device
96# | ---------------------- |
97# | | Xbar | * 4 |
98# | ---------------------- |
99# | ---------------------- |
100# | | Vault Controller | * 16 |
101# | ---------------------- |
102# | ---------------------- |
103# | | Memory | |
104# | ---------------------- |
105# |---------------------------------------|
106#
107# In this version we have present 3 different HMC archiecture along with
108# alongwith their corresponding test script.
109#
110# same: It has 4 crossbars in HMC memory. All the crossbars are connected
111# to each other, providing complete memory range. This archicture also covers
112# the added latency for sending a request to non-local vault(bridge in b/t
113# crossbars). All the 4 serial links can access complete memory. So each
114# link can be connected to separate processor.
115#
116# distributed: It has 4 crossbars inside the HMC. Crossbars are not
117# connected.Through each crossbar only local vaults can be accessed. But to
118# support this architecture we need a crossbar between serial links and
119# processor.
120#
121# mixed: This is a hybrid architecture. It has 4 crossbars inside the HMC.
122# 2 Crossbars are connected to only local vaults. From other 2 crossbar, a
123# request can be forwarded to any other vault.
124
125import optparse
126
127import m5
128from m5.objects import *
129
130# A single Hybrid Memory Cube (HMC)
131class HMCSystem(SubSystem):
132 #*****************************CROSSBAR PARAMETERS*************************
133 # Flit size of the main interconnect [1]
134 xbar_width = Param.Unsigned(32, "Data width of the main XBar (Bytes)")
135
136 # Clock frequency of the main interconnect [1]
137 # This crossbar, is placed on the logic-based of the HMC and it has its
138 # own voltage and clock domains, different from the DRAM dies or from the
139 # host.
140 xbar_frequency = Param.Frequency('1GHz', "Clock Frequency of the main "
141 "XBar")
142
143 # Arbitration latency of the HMC XBar [1]
144 xbar_frontend_latency = Param.Cycles(1, "Arbitration latency of the XBar")
145
146 # Latency to forward a packet via the interconnect [1](two levels of FIFOs
147 # at the input and output of the inteconnect)
148 xbar_forward_latency = Param.Cycles(2, "Forward latency of the XBar")
149
150 # Latency to forward a response via the interconnect [1](two levels of
151 # FIFOs at the input and output of the inteconnect)
152 xbar_response_latency = Param.Cycles(2, "Response latency of the XBar")
153
154 # number of cross which connects 16 Vaults to serial link[7]
155 number_mem_crossbar = Param.Unsigned(4, "Number of crossbar in HMC"
156 )
157
158 #*****************************SERIAL LINK PARAMETERS***********************
159 # Number of serial links controllers [1]
160 num_links_controllers = Param.Unsigned(4, "Number of serial links")
161
162 # Number of packets (not flits) to store at the request side of the serial
163 # link. This number should be adjusted to achive required bandwidth
164 link_buffer_size_req = Param.Unsigned(10, "Number of packets to buffer "
165 "at the request side of the serial link")
166
167 # Number of packets (not flits) to store at the response side of the serial
168 # link. This number should be adjusted to achive required bandwidth
169 link_buffer_size_rsp = Param.Unsigned(10, "Number of packets to buffer "
170 "at the response side of the serial link")
171
172 # Latency of the serial link composed by SER/DES latency (1.6ns [4]) plus
173 # the PCB trace latency (3ns Estimated based on [5])
174 link_latency = Param.Latency('4.6ns', "Latency of the serial links")
175
176 # Clock frequency of the each serial link(SerDes) [1]
177 link_frequency = Param.Frequency('10GHz', "Clock Frequency of the serial"
178 "links")
179
180 # Clock frequency of serial link Controller[6]
181 # clk_hmc[Mhz]= num_lanes_per_link * lane_speed [Gbits/s] /
182 # data_path_width * 10^6
183 # clk_hmc[Mhz]= 16 * 10 Gbps / 256 * 10^6 = 625 Mhz
184 link_controller_frequency = Param.Frequency('625MHz',
185 "Clock Frequency of the link controller")
186
187 # Latency of the serial link controller to process the packets[1][6]
188 # (ClockDomain = 625 Mhz )
189 # used here for calculations only
190 link_ctrl_latency = Param.Cycles(4, "The number of cycles required for the"
191 "controller to process the packet")
192
193 # total_ctrl_latency = link_ctrl_latency + link_latency
194 # total_ctrl_latency = 4(Cycles) * 1.6 ns + 4.6 ns
195 total_ctrl_latency = Param.Latency('11ns', "The latency experienced by"
196 "every packet regardless of size of packet")
197
198 # Number of parallel lanes in each serial link [1]
199 num_lanes_per_link = Param.Unsigned( 16, "Number of lanes per each link")
200
201 # Number of serial links [1]
202 num_serial_links = Param.Unsigned(4, "Number of serial links")
203
204 # speed of each lane of serial link - SerDes serial interface 10 Gb/s
205 serial_link_speed = Param.UInt64(10, "Gbs/s speed of each lane of"
206 "serial link")
207
208 #*****************************PERFORMANCE MONITORING************************
209 # The main monitor behind the HMC Controller
210 enable_global_monitor = Param.Bool(False, "The main monitor behind the "
211 "HMC Controller")
212
213 # The link performance monitors
214 enable_link_monitor = Param.Bool(False, "The link monitors" )
215
216 # link aggregator enable - put a cross between buffers & links
217 enable_link_aggr = Param.Bool(False, "The crossbar between port and "
218 "Link Controller")
219
220 enable_buff_div = Param.Bool(True, "Memory Range of Buffer is"
221 "divided between total range")
222
223 #*****************************HMC ARCHITECTURE ************************
224 # Memory chunk for 16 vault - numbers of vault / number of crossbars
225 mem_chunk = Param.Unsigned(4, "Chunk of memory range for each cross bar "
226 "in arch 0")
227
228 # size of req buffer within crossbar, used for modelling extra latency
229 # when the reuqest go to non-local vault
230 xbar_buffer_size_req = Param.Unsigned(10, "Number of packets to buffer "
231 "at the request side of the crossbar")
232
233 # size of response buffer within crossbar, used for modelling extra latency
234 # when the response received from non-local vault
235 xbar_buffer_size_resp = Param.Unsigned(10, "Number of packets to buffer "
236 "at the response side of the crossbar")
237
238# configure host system with Serial Links
239def config_host_hmc(options, system):
240
241 system.hmc_host=HMCSystem()
242
243 try:
244 system.hmc_host.enable_global_monitor = options.enable_global_monitor
245 except:
246 pass;
247
248 try:
249 system.hmc_host.enable_link_monitor = options.enable_link_monitor
250 except:
251 pass;
252
253 # Serial link Controller with 16 SerDes links at 10 Gbps
254 # with serial link ranges w.r.t to architecture
255 system.hmc_host.seriallink = [SerialLink(ranges = options.ser_ranges[i],
256 req_size=system.hmc_host.link_buffer_size_req,
257 resp_size=system.hmc_host.link_buffer_size_rsp,
258 num_lanes=system.hmc_host.num_lanes_per_link,
259 link_speed=system.hmc_host.serial_link_speed,
260 delay=system.hmc_host.total_ctrl_latency)
261 for i in xrange(system.hmc_host.num_serial_links)]
262
263 # enable global monitor
264 if system.hmc_host.enable_global_monitor:
265 system.hmc_host.lmonitor = [ CommMonitor()
266 for i in xrange(system.hmc_host.num_serial_links)]
267
268 # set the clock frequency for serial link
269 for i in xrange(system.hmc_host.num_serial_links):
270 system.hmc_host.seriallink[i].clk_domain = SrcClockDomain(clock=system.
271 hmc_host.link_controller_frequency, voltage_domain=
272 VoltageDomain(voltage = '1V'))
273
274 # Connect membus/traffic gen to Serial Link Controller for differrent HMC
275 # architectures
276 if options.arch == "distributed":
277 for i in xrange(system.hmc_host.num_links_controllers):
278 if system.hmc_host.enable_global_monitor:
279 system.membus.master = system.hmc_host.lmonitor[i].slave
280 system.hmc_host.lmonitor[i].master = \
281 system.hmc_host.seriallink[i].slave
282 else:
283 system.membus.master = system.hmc_host.seriallink[i].slave
284 if options.arch == "mixed":
285 if system.hmc_host.enable_global_monitor:
286 system.membus.master = system.hmc_host.lmonitor[0].slave
287 system.hmc_host.lmonitor[0].master = \
288 system.hmc_host.seriallink[0].slave
289
290 system.membus.master = system.hmc_host.lmonitor[1].slave
291 system.hmc_host.lmonitor[1].master = \
292 system.hmc_host.seriallink[1].slave
293
294 system.tgen[2].port = system.hmc_host.lmonitor[2].slave
295 system.hmc_host.lmonitor[2].master = \
296 system.hmc_host.seriallink[2].slave
297
298 system.tgen[3].port = system.hmc_host.lmonitor[3].slave
299 system.hmc_host.lmonitor[3].master = \
300 system.hmc_host.seriallink[3].slave
301 else:
302 system.membus.master = system.hmc_host.seriallink[0].slave
303 system.membus.master = system.hmc_host.seriallink[1].slave
304 system.tgen[2].port = system.hmc_host.seriallink[2].slave
305 system.tgen[3].port = system.hmc_host.seriallink[3].slave
306 if options.arch == "same" :
307 for i in xrange(system.hmc_host.num_links_controllers):
308 if system.hmc_host.enable_global_monitor:
309 system.tgen[i].port = system.hmc_host.lmonitor[i].slave
310 system.hmc_host.lmonitor[i].master = \
311 system.hmc_host.seriallink[i].slave
312 else:
313 system.tgen[i].port = system.hmc_host.seriallink[i].slave
314
315 return system
316
317# Create an HMC device and attach it to the current system
318def config_hmc(options, system, hmc_host):
319
320 # Create HMC device
321 system.hmc_dev = HMCSystem()
322
323 # Global monitor
324 try:
325 system.hmc_dev.enable_global_monitor = options.enable_global_monitor
326 except:
327 pass;
328
329 try:
330 system.hmc_dev.enable_link_monitor = options.enable_link_monitor
331 except:
332 pass;
333
334
335 if system.hmc_dev.enable_link_monitor:
336 system.hmc_dev.lmonitor = [ CommMonitor()
337 for i in xrange(system.hmc_dev.num_links_controllers)]
338
339 # 4 HMC Crossbars located in its logic-base (LoB)
340 system.hmc_dev.xbar = [ NoncoherentXBar(width=system.hmc_dev.xbar_width,
341 frontend_latency=system.hmc_dev.xbar_frontend_latency,
342 forward_latency=system.hmc_dev.xbar_forward_latency,
343 response_latency=system.hmc_dev.xbar_response_latency )
344 for i in xrange(system.hmc_host.number_mem_crossbar)]
345
346 for i in xrange(system.hmc_dev.number_mem_crossbar):
347 system.hmc_dev.xbar[i].clk_domain = SrcClockDomain(
348 clock=system.hmc_dev.xbar_frequency,voltage_domain=
349 VoltageDomain(voltage='1V'))
350
351 # Attach 4 serial link to 4 crossbar/s
352 for i in xrange(system.hmc_dev.num_serial_links):
353 if system.hmc_dev.enable_link_monitor:
354 system.hmc_host.seriallink[i].master = \
355 system.hmc_dev.lmonitor[i].slave
356 system.hmc_dev.lmonitor[i].master = system.hmc_dev.xbar[i].slave
357 else:
358 system.hmc_host.seriallink[i].master = system.hmc_dev.xbar[i].slave
359
360 # Connecting xbar with each other for request arriving at the wrong xbar,
361 # then it will be forward to correct xbar. Bridge is used to connect xbars
362 if options.arch == "same":
363 numx = len(system.hmc_dev.xbar)
364
365 # create a list of buffers
366 system.hmc_dev.buffers = [ Bridge(
367 req_size=system.hmc_dev.xbar_buffer_size_req,
368 resp_size=system.hmc_dev.xbar_buffer_size_resp)
369 for i in xrange(numx * (system.hmc_dev.mem_chunk - 1))]
370
371 # Buffer iterator
372 it = iter(range(len(system.hmc_dev.buffers)))
373
374 # necesarry to add system_port to one of the xbar
375 system.system_port = system.hmc_dev.xbar[3].slave
376
377 # iterate over all the crossbars and connect them as required
378 for i in range(numx):
379 for j in range(numx):
380 # connect xbar to all other xbars except itself
381 if i != j:
382 # get the next index of buffer
383 index = it.next()
384
385 # Change the default values for ranges of bridge
386 system.hmc_dev.buffers[index].ranges = system.mem_ranges[
387 j * int(system.hmc_dev.mem_chunk):
388 (j + 1) * int(system.hmc_dev.mem_chunk)]
389
390 # Connect the bridge between corssbars
391 system.hmc_dev.xbar[i].master = system.hmc_dev.buffers[
392 index].slave
393 system.hmc_dev.buffers[
394 index].master = system.hmc_dev.xbar[j].slave
395 else:
396 # Don't connect the xbar to itself
397 pass
398
399 # Two crossbars are connected to all other crossbars-Other 2 vault
400 # can only direct traffic to it local vaults
401 if options.arch == "mixed":
402
403 system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4])
404 system.hmc_dev.xbar[3].master = system.hmc_dev.buffer30.slave
405 system.hmc_dev.buffer30.master = system.hmc_dev.xbar[0].slave
406
407 system.hmc_dev.buffer31 = Bridge(ranges=system.mem_ranges[4:8])
408 system.hmc_dev.xbar[3].master = system.hmc_dev.buffer31.slave
409 system.hmc_dev.buffer31.master = system.hmc_dev.xbar[1].slave
410
411 system.hmc_dev.buffer32 = Bridge(ranges=system.mem_ranges[8:12])
412 system.hmc_dev.xbar[3].master = system.hmc_dev.buffer32.slave
413 system.hmc_dev.buffer32.master = system.hmc_dev.xbar[2].slave
414
415
416 system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4])
417 system.hmc_dev.xbar[2].master = system.hmc_dev.buffer20.slave
418 system.hmc_dev.buffer20.master = system.hmc_dev.xbar[0].slave
419
420 system.hmc_dev.buffer21 = Bridge(ranges=system.mem_ranges[4:8])
421 system.hmc_dev.xbar[2].master = system.hmc_dev.buffer21.slave
422 system.hmc_dev.buffer21.master = system.hmc_dev.xbar[1].slave
423
424 system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16])
425 system.hmc_dev.xbar[2].master = system.hmc_dev.buffer23.slave
426 system.hmc_dev.buffer23.master = system.hmc_dev.xbar[3].slave
427