Deleted Added
sdiff udiff text old ( 10152:52c552138ba1 ) new ( 10234:5cb711fa6176 )
full compact
1/*****************************************************************************
2 * McPAT
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
5 * All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the

--- 7 unchanged lines hidden (view full) ---

20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 ***************************************************************************/
31#include <algorithm>
32#include <cassert>
33#include <cmath>
34#include <iostream>
35#include <string>
36
37#include "XML_Parse.h"
38#include "basic_circuit.h"
39#include "basic_components.h"
40#include "const.h"
41#include "io.h"
42#include "logic.h"
43#include "memoryctrl.h"
44#include "parameter.h"
45
46/* overview of MC models:
47 * McPAT memory controllers are modeled according to large number of industrial data points.

--- 16 unchanged lines hidden (view full) ---

64 * memory controllers as the DDR2/DDR3-Lite protocol controllers (except DesignWare DDR2/DDR3-Lite memory
65 * controllers, all memory controller IP in Cadence ChipEstimator Tool are backend memory controllers such as
66 * DDRC 1600A and DDRC 800A). Thus,to some extend the area and power difference between DesignWare
67 * DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite protocol controllers can be an estimation to the
68 * frontend power and area, which is very close the analitically modeled results of the frontend for Niagara2@65nm
69 *
70 */
71
72MCBackend::MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_)
73:l_ip(*interface_ip_),
74 mc_type(mc_type_),
75 mcp(mcp_)
76{
77
78 local_result = init_interface(&l_ip);
79 compute();
80
81}
82
83
84void MCBackend::compute()
85{
86 //double max_row_addr_width = 20.0;//Current address 12~18bits
87 double C_MCB, mc_power, backend_dyn, backend_gates;//, refresh_period,refresh_freq;//Equivalent per bit Cap for backend,
88 double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
89 double NMOS_sizing, PMOS_sizing;
90
91 if (mc_type == MC)
92 {
93 if (mcp.type == 0)
94 {
95 //area = (2.2927*log(peakDataTransferRate)-14.504)*memDataWidth/144.0*(l_ip.F_sz_um/0.09);
96 area.set_area((2.7927*log(mcp.peakDataTransferRate*2)-19.862)/2.0*mcp.dataBusWidth/128.0*(l_ip.F_sz_um/0.09)*mcp.num_channels*1e6);//um^2
97 //assuming the approximately same scaling factor as seen in processors.
98 //C_MCB=0.2/1.3/1.3/266/64/0.09*g_ip.F_sz_um;//based on AMD Geode processor which has a very basic mc on chip.
99 //C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process.
100 //mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et
101 mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend
102 C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065;
103 power_t.readOp.dynamic = C_MCB*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(mcp.dataBusWidth/*+mcp.addressBusWidth*/);//per access energy in memory controller
104 power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
105 power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
106
107 }
108 else
109 { NMOS_sizing = g_tp.min_w_nmos_;
110 PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
111 area.set_area(0.15*mcp.dataBusWidth/72.0*(l_ip.F_sz_um/0.065)* (l_ip.F_sz_um/0.065)*mcp.num_channels*1e6);//um^2
112 backend_dyn = 0.9e-9/800e6*mcp.clockRate/12800*mcp.peakDataTransferRate*mcp.dataBusWidth/72.0*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(l_ip.F_sz_nm/65.0);//Average on DDR2/3 protocol controller and DDRC 1600/800A in Cadence ChipEstimate
113 //Scaling to technology and DIMM feature. The base IP support DDR3-1600(PC3 12800)
114 backend_gates = 50000*mcp.dataBusWidth/64.0;//5000 is from Cadence ChipEstimator
115
116 power_t.readOp.dynamic = backend_dyn;
117 power_t.readOp.leakage = (backend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
118 power_t.readOp.gate_leakage = (backend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
119
120 }
121 }
122 else
123 {//skip old model
124 cout<<"Unknown memory controllers"<<endl;exit(0);
125 area.set_area(0.243*mcp.dataBusWidth/8);//area based on Cadence ChipEstimator for 8bit bus
126 //mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend
127 C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065;
128 power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
129 power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
130 power_t.readOp.dynamic *= 1.2;
131 power_t.readOp.leakage *= 1.2;
132 power_t.readOp.gate_leakage *= 1.2;
133 //flash controller has about 20% more backend power since BCH ECC in flash is complex and power hungry
134 }
135 double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
136 power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
137}
138
139void MCBackend::computeEnergy(bool is_tdp)
140{
141 //backend uses internal data buswidth
142 if (is_tdp)
143 {
144 //init stats for Peak
145 stats_t.readAc.access = 0.5*mcp.num_channels;
146 stats_t.writeAc.access = 0.5*mcp.num_channels;
147 tdp_stats = stats_t;
148 }
149 else
150 {
151 //init stats for runtime power (RTP)
152 stats_t.readAc.access = mcp.reads;
153 stats_t.writeAc.access = mcp.writes;
154 tdp_stats = stats_t;
155 }
156 if (is_tdp)
157 {
158 power = power_t;
159 power.readOp.dynamic = (stats_t.readAc.access + stats_t.writeAc.access)*power_t.readOp.dynamic;
160
161 }
162 else
163 {
164 rt_power.readOp.dynamic = (stats_t.readAc.access + stats_t.writeAc.access)*mcp.llcBlockSize*8.0/mcp.dataBusWidth*power_t.readOp.dynamic;
165 rt_power = rt_power + power_t*pppm_lkg;
166 rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime;
167 //Assume 10% of peak power is consumed by routine job including memory refreshing and scrubbing
168 }
169}
170
171
172MCPHY::MCPHY(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_)
173:l_ip(*interface_ip_),
174 mc_type(mc_type_),
175 mcp(mcp_)
176{
177
178 local_result = init_interface(&l_ip);
179 compute();
180}
181
182void MCPHY::compute()
183{
184 //PHY uses internal data buswidth but the actuall off-chip datawidth is 64bits + ecc
185 double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio() ;
186 /*
187 * according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006;
188 * From Cadence ChipEstimator for normal I/O around 0.4~0.8 mW/Gb/s
189 */
190 double power_per_gb_per_s, phy_dyn,phy_gates, NMOS_sizing, PMOS_sizing;
191
192 if (mc_type == MC)
193 {
194 if (mcp.type == 0)
195 {
196 power_per_gb_per_s = mcp.LVDS? 0.01:0.04;
197 //Based on die photos from Niagara 1 and 2.
198 //TODO merge this into undifferentiated core.PHY only achieves square root of the ideal scaling.
199 //area = (6.4323*log(peakDataTransferRate)-34.76)*memDataWidth/128.0*(l_ip.F_sz_um/0.09);
200 area.set_area((6.4323*log(mcp.peakDataTransferRate*2)-48.134)*mcp.dataBusWidth/128.0*(l_ip.F_sz_um/0.09)*mcp.num_channels*1e6/2);//TODO:/2
201 //This is from curve fitting based on Niagara 1 and 2's PHY die photo.
202 //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down
203 //power.readOp.dynamic = 0.02*memAccesses*llcBlocksize*8;//change from Bytes to bits.
204 power_t.readOp.dynamic = power_per_gb_per_s*sqrt(l_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
205 power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
206 power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
207
208 }
209 else
210 {
211 NMOS_sizing = g_tp.min_w_nmos_;
212 PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
213 //Designware/synopsis 16bit DDR3 PHY is 1.3mm (WITH IOs) at 40nm for upto DDR3 2133 (PC3 17066)
214 double non_IO_percentage = 0.2;
215 area.set_area(1.3*non_IO_percentage/2133.0e6*mcp.clockRate/17066*mcp.peakDataTransferRate*mcp.dataBusWidth/16.0*(l_ip.F_sz_um/0.040)* (l_ip.F_sz_um/0.040)*mcp.num_channels*1e6);//um^2
216 phy_gates = 200000*mcp.dataBusWidth/64.0;
217 power_per_gb_per_s = 0.01;
218 //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down
219 power_t.readOp.dynamic = power_per_gb_per_s*(l_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
220 power_t.readOp.leakage = (mcp.withPHY? phy_gates:0)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
221 power_t.readOp.gate_leakage = (mcp.withPHY? phy_gates:0)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
222 }
223
224 }
225 else
226 {
227 area.set_area(0.4e6/2*mcp.dataBusWidth/8);//area based on Cadence ChipEstimator for 8bit bus
228 }
229
230// double phy_factor = (int)ceil(mcp.dataBusWidth/72.0);//Previous phy power numbers are based on 72 bit DIMM interface
231// power_t.readOp.dynamic *= phy_factor;
232// power_t.readOp.leakage *= phy_factor;
233// power_t.readOp.gate_leakage *= phy_factor;
234
235 double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
236 power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
237}
238
239
240void MCPHY::computeEnergy(bool is_tdp)
241{
242 if (is_tdp)
243 {
244 //init stats for Peak
245 stats_t.readAc.access = 0.5*mcp.num_channels; //time share on buses
246 stats_t.writeAc.access = 0.5*mcp.num_channels;
247 tdp_stats = stats_t;
248 }
249 else
250 {
251 //init stats for runtime power (RTP)
252 stats_t.readAc.access = mcp.reads;
253 stats_t.writeAc.access = mcp.writes;
254 tdp_stats = stats_t;
255 }
256
257 if (is_tdp)
258 {
259 double data_transfer_unit = (mc_type == MC)? 72:16;/*DIMM data width*/
260 power = power_t;
261 power.readOp.dynamic = power.readOp.dynamic * (mcp.peakDataTransferRate*8*1e6/1e9/*change to Gbs*/)*mcp.dataBusWidth/data_transfer_unit*mcp.num_channels/mcp.clockRate;
262 // divide by clock rate is for match the final computation where *clock is used
263 //(stats_t.readAc.access*power_t.readOp.dynamic+
264// stats_t.writeAc.access*power_t.readOp.dynamic);
265
266 }
267 else
268 {
269 rt_power = power_t;
270// rt_power.readOp.dynamic = (stats_t.readAc.access*power_t.readOp.dynamic+
271// stats_t.writeAc.access*power_t.readOp.dynamic);
272
273 rt_power.readOp.dynamic=power_t.readOp.dynamic*(stats_t.readAc.access + stats_t.writeAc.access)*(mcp.llcBlockSize)*8/1e9/mcp.executionTime*(mcp.executionTime);
274 rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime;
275 }
276}
277
278MCFrontEnd::MCFrontEnd(ParseXML *XML_interface,InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_)
279:XML(XML_interface),
280 interface_ip(*interface_ip_),
281 mc_type(mc_type_),
282 mcp(mcp_),
283 MC_arb(0),
284 frontendBuffer(0),
285 readBuffer(0),
286 writeBuffer(0)
287{
288 /* All computations are for a single MC
289 *
290 */
291
292 int tag, data;
293 bool is_default =true;//indication for default setup
294
295 /* MC frontend engine channels share the same engines but logically partitioned
296 * For all hardware inside MC. different channels do not share resources.
297 * TODO: add docodeing/mux stage to steer memory requests to different channels.
298 */
299
300 //memory request reorder buffer
301 tag = mcp.addressBusWidth + EXTRA_TAG_BITS + mcp.opcodeW;
302 data = int(ceil((XML->sys.physical_address_width + mcp.opcodeW)/8.0));
303 interface_ip.cache_sz = data*XML->sys.mc.req_window_size_per_channel;
304 interface_ip.line_sz = data;
305 interface_ip.assoc = 0;
306 interface_ip.nbanks = 1;
307 interface_ip.out_w = interface_ip.line_sz*8;
308 interface_ip.specific_tag = 1;
309 interface_ip.tag_w = tag;
310 interface_ip.access_mode = 0;
311 interface_ip.throughput = 1.0/mcp.clockRate;
312 interface_ip.latency = 1.0/mcp.clockRate;
313 interface_ip.is_cache = true;
314 interface_ip.pure_cam = false;
315 interface_ip.pure_ram = false;
316 interface_ip.obj_func_dyn_energy = 0;
317 interface_ip.obj_func_dyn_power = 0;
318 interface_ip.obj_func_leak_power = 0;
319 interface_ip.obj_func_cycle_t = 1;
320 interface_ip.num_rw_ports = 0;
321 interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc;
322 interface_ip.num_wr_ports = interface_ip.num_rd_ports;
323 interface_ip.num_se_rd_ports = 0;
324 interface_ip.num_search_ports = XML->sys.mc.memory_channels_per_mc;
325 frontendBuffer = new ArrayST(&interface_ip, "MC ReorderBuffer", Uncore_device);
326 frontendBuffer->area.set_area(frontendBuffer->area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
327 area.set_area(area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
328
329 //selection and arbitration logic
330 MC_arb = new selection_logic(is_default, XML->sys.mc.req_window_size_per_channel,1,&interface_ip, Uncore_device);
331
332 //read buffers.
333 data = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte
334 interface_ip.cache_sz = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize;
335 interface_ip.line_sz = data;
336 interface_ip.assoc = 1;
337 interface_ip.nbanks = 1;
338 interface_ip.out_w = interface_ip.line_sz*8;
339 interface_ip.access_mode = 1;
340 interface_ip.throughput = 1.0/mcp.clockRate;
341 interface_ip.latency = 1.0/mcp.clockRate;
342 interface_ip.is_cache = false;
343 interface_ip.pure_cam = false;
344 interface_ip.pure_ram = true;
345 interface_ip.obj_func_dyn_energy = 0;
346 interface_ip.obj_func_dyn_power = 0;
347 interface_ip.obj_func_leak_power = 0;
348 interface_ip.obj_func_cycle_t = 1;
349 interface_ip.num_rw_ports = 0;//XML->sys.mc.memory_channels_per_mc*2>2?2:XML->sys.mc.memory_channels_per_mc*2;
350 interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc;
351 interface_ip.num_wr_ports = interface_ip.num_rd_ports;
352 interface_ip.num_se_rd_ports = 0;
353 readBuffer = new ArrayST(&interface_ip, "MC ReadBuffer", Uncore_device);
354 readBuffer->area.set_area(readBuffer->area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
355 area.set_area(area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
356
357 //write buffer
358 data = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte
359 interface_ip.cache_sz = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize;
360 interface_ip.line_sz = data;
361 interface_ip.assoc = 1;
362 interface_ip.nbanks = 1;
363 interface_ip.out_w = interface_ip.line_sz*8;
364 interface_ip.access_mode = 0;
365 interface_ip.throughput = 1.0/mcp.clockRate;
366 interface_ip.latency = 1.0/mcp.clockRate;
367 interface_ip.obj_func_dyn_energy = 0;
368 interface_ip.obj_func_dyn_power = 0;
369 interface_ip.obj_func_leak_power = 0;
370 interface_ip.obj_func_cycle_t = 1;
371 interface_ip.num_rw_ports = 0;
372 interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc;
373 interface_ip.num_wr_ports = interface_ip.num_rd_ports;
374 interface_ip.num_se_rd_ports = 0;
375 writeBuffer = new ArrayST(&interface_ip, "MC writeBuffer", Uncore_device);
376 writeBuffer->area.set_area(writeBuffer->area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
377 area.set_area(area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
378}
379
380void MCFrontEnd::computeEnergy(bool is_tdp)
381{
382 if (is_tdp)
383 {
384 //init stats for Peak
385 frontendBuffer->stats_t.readAc.access = frontendBuffer->l_ip.num_search_ports;
386 frontendBuffer->stats_t.writeAc.access = frontendBuffer->l_ip.num_wr_ports;
387 frontendBuffer->tdp_stats = frontendBuffer->stats_t;
388
389 readBuffer->stats_t.readAc.access = readBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle;
390 readBuffer->stats_t.writeAc.access = readBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle;
391 readBuffer->tdp_stats = readBuffer->stats_t;
392
393 writeBuffer->stats_t.readAc.access = writeBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle;
394 writeBuffer->stats_t.writeAc.access = writeBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle;
395 writeBuffer->tdp_stats = writeBuffer->stats_t;
396
397 }
398 else
399 {
400 //init stats for runtime power (RTP)
401 frontendBuffer->stats_t.readAc.access = XML->sys.mc.memory_reads *mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72;
402 //For each channel, each memory word need to check the address data to achieve best scheduling results.
403 //and this need to be done on all physical DIMMs in each logical memory DIMM *mcp.dataBusWidth/72
404 frontendBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72;
405 frontendBuffer->rtp_stats = frontendBuffer->stats_t;
406
407 readBuffer->stats_t.readAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first
408 readBuffer->stats_t.writeAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first
409 readBuffer->rtp_stats = readBuffer->stats_t;
410
411 writeBuffer->stats_t.readAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth;
412 writeBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth;
413 writeBuffer->rtp_stats = writeBuffer->stats_t;
414 }
415
416 frontendBuffer->power_t.reset();
417 readBuffer->power_t.reset();
418 writeBuffer->power_t.reset();
419
420// frontendBuffer->power_t.readOp.dynamic += (frontendBuffer->stats_t.readAc.access*
421// (frontendBuffer->local_result.power.searchOp.dynamic+frontendBuffer->local_result.power.readOp.dynamic)+
422// frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic);
423
424 frontendBuffer->power_t.readOp.dynamic += (frontendBuffer->stats_t.readAc.access +
425 frontendBuffer->stats_t.writeAc.access)*frontendBuffer->local_result.power.searchOp.dynamic
426 + frontendBuffer->stats_t.readAc.access * frontendBuffer->local_result.power.readOp.dynamic
427 + frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic;
428
429 readBuffer->power_t.readOp.dynamic += (readBuffer->stats_t.readAc.access*
430 readBuffer->local_result.power.readOp.dynamic+
431 readBuffer->stats_t.writeAc.access*readBuffer->local_result.power.writeOp.dynamic);
432 writeBuffer->power_t.readOp.dynamic += (writeBuffer->stats_t.readAc.access*
433 writeBuffer->local_result.power.readOp.dynamic+
434 writeBuffer->stats_t.writeAc.access*writeBuffer->local_result.power.writeOp.dynamic);
435
436 if (is_tdp)
437 {
438 power = power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t +
439 (frontendBuffer->local_result.power +
440 readBuffer->local_result.power +
441 writeBuffer->local_result.power)*pppm_lkg;
442
443 }
444 else
445 {
446 rt_power = rt_power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t +
447 (frontendBuffer->local_result.power +
448 readBuffer->local_result.power +
449 writeBuffer->local_result.power)*pppm_lkg;
450 rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime;
451 }
452}
453
454void MCFrontEnd::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
455{
456 string indent_str(indent, ' ');
457 string indent_str_next(indent+2, ' ');
458
459 if (is_tdp)
460 {
461 cout << indent_str << "Front End ROB:" << endl;
462 cout << indent_str_next << "Area = " << frontendBuffer->area.get_area()*1e-6<< " mm^2" << endl;
463 cout << indent_str_next << "Peak Dynamic = " << frontendBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl;
464 cout << indent_str_next << "Subthreshold Leakage = " << frontendBuffer->power.readOp.leakage <<" W" << endl;
465 cout << indent_str_next << "Gate Leakage = " << frontendBuffer->power.readOp.gate_leakage << " W" << endl;
466 cout << indent_str_next << "Runtime Dynamic = " << frontendBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
467
468 cout <<endl;
469 cout << indent_str<< "Read Buffer:" << endl;
470 cout << indent_str_next << "Area = " << readBuffer->area.get_area()*1e-6 << " mm^2" << endl;
471 cout << indent_str_next << "Peak Dynamic = " << readBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl;
472 cout << indent_str_next << "Subthreshold Leakage = " << readBuffer->power.readOp.leakage << " W" << endl;
473 cout << indent_str_next << "Gate Leakage = " << readBuffer->power.readOp.gate_leakage << " W" << endl;
474 cout << indent_str_next << "Runtime Dynamic = " << readBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
475 cout <<endl;
476 cout << indent_str << "Write Buffer:" << endl;
477 cout << indent_str_next << "Area = " << writeBuffer->area.get_area() *1e-6 << " mm^2" << endl;
478 cout << indent_str_next << "Peak Dynamic = " << writeBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl;
479 cout << indent_str_next << "Subthreshold Leakage = " << writeBuffer->power.readOp.leakage << " W" << endl;
480 cout << indent_str_next << "Gate Leakage = " << writeBuffer->power.readOp.gate_leakage << " W" << endl;
481 cout << indent_str_next << "Runtime Dynamic = " << writeBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
482 cout <<endl;
483 }
484 else
485 {
486 cout << indent_str << "Front End ROB:" << endl;
487 cout << indent_str_next << "Area = " << frontendBuffer->area.get_area()*1e-6<< " mm^2" << endl;
488 cout << indent_str_next << "Peak Dynamic = " << frontendBuffer->rt_power.readOp.dynamic*mcp.clockRate << " W" << endl;
489 cout << indent_str_next << "Subthreshold Leakage = " << frontendBuffer->rt_power.readOp.leakage <<" W" << endl;
490 cout << indent_str_next << "Gate Leakage = " << frontendBuffer->rt_power.readOp.gate_leakage << " W" << endl;
491 cout <<endl;
492 cout << indent_str<< "Read Buffer:" << endl;
493 cout << indent_str_next << "Area = " << readBuffer->area.get_area()*1e-6 << " mm^2" << endl;
494 cout << indent_str_next << "Peak Dynamic = " << readBuffer->rt_power.readOp.dynamic*mcp.clockRate << " W" << endl;
495 cout << indent_str_next << "Subthreshold Leakage = " << readBuffer->rt_power.readOp.leakage << " W" << endl;
496 cout << indent_str_next << "Gate Leakage = " << readBuffer->rt_power.readOp.gate_leakage << " W" << endl;
497 cout <<endl;
498 cout << indent_str << "Write Buffer:" << endl;
499 cout << indent_str_next << "Area = " << writeBuffer->area.get_area() *1e-6 << " mm^2" << endl;
500 cout << indent_str_next << "Peak Dynamic = " << writeBuffer->rt_power.readOp.dynamic*mcp.clockRate << " W" << endl;
501 cout << indent_str_next << "Subthreshold Leakage = " << writeBuffer->rt_power.readOp.leakage << " W" << endl;
502 cout << indent_str_next << "Gate Leakage = " << writeBuffer->rt_power.readOp.gate_leakage << " W" << endl;
503 }
504
505}
506
507
508MemoryController::MemoryController(ParseXML *XML_interface,InputParameter* interface_ip_, enum MemoryCtrl_type mc_type_)
509:XML(XML_interface),
510 interface_ip(*interface_ip_),
511 mc_type(mc_type_),
512 frontend(0),
513 transecEngine(0),
514 PHY(0),
515 pipeLogic(0)
516{
517 /* All computations are for a single MC
518 *
519 */
520 interface_ip.wire_is_mat_type = 2;
521 interface_ip.wire_os_mat_type = 2;
522 interface_ip.wt =Global;
523 set_mc_param();
524 frontend = new MCFrontEnd(XML, &interface_ip, mcp, mc_type);
525 area.set_area(area.get_area()+ frontend->area.get_area());
526 transecEngine = new MCBackend(&interface_ip, mcp, mc_type);
527 area.set_area(area.get_area()+ transecEngine->area.get_area());
528 if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
529 {
530 PHY = new MCPHY(&interface_ip, mcp, mc_type);
531 area.set_area(area.get_area()+ PHY->area.get_area());
532 }
533 //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc.
534// transecEngine.initialize(&interface_ip);
535// transecEngine.peakDataTransferRate = XML->sys.mem.peak_transfer_rate;
536// transecEngine.memDataWidth = dataBusWidth;
537// transecEngine.memRank = XML->sys.mem.number_ranks;
538// //transecEngine.memAccesses=XML->sys.mc.memory_accesses;
539// //transecEngine.llcBlocksize=llcBlockSize;
540// transecEngine.compute();
541// transecEngine.area.set_area(XML->sys.mc.memory_channels_per_mc*transecEngine.area.get_area()) ;
542// area.set_area(area.get_area()+ transecEngine.area.get_area());
543// ///cout<<"area="<<area<<endl;
544////
545// //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers
546// PHY.initialize(&interface_ip);
547// PHY.peakDataTransferRate = XML->sys.mem.peak_transfer_rate;
548// PHY.memDataWidth = dataBusWidth;
549// //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power
550// //PHY.llcBlocksize=llcBlockSize;
551// PHY.compute();
552// PHY.area.set_area(XML->sys.mc.memory_channels_per_mc*PHY.area.get_area()) ;
553// area.set_area(area.get_area()+ PHY.area.get_area());
554 ///cout<<"area="<<area<<endl;
555//
556// interface_ip.pipeline_stages = 5;//normal memory controller has five stages in the pipeline.
557// interface_ip.per_stage_vector = addressBusWidth + XML->sys.core[0].opcode_width + dataBusWidth;
558// pipeLogic = new pipeline(is_default, &interface_ip);
559// //pipeLogic.init_pipeline(is_default, &interface_ip);
560// pipeLogic->compute_pipeline();
561// area.set_area(area.get_area()+ pipeLogic->area.get_area()*1e-6);
562// area.set_area((area.get_area()+mc_area*1e-6)*1.1);//placement and routing overhead
563//
564//
565//// //clock
566//// clockNetwork.init_wire_external(is_default, &interface_ip);
567//// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb
568//// clockNetwork.end_wiring_level =5;//toplevel metal
569//// clockNetwork.start_wiring_level =5;//toplevel metal
570//// clockNetwork.num_regs = pipeLogic.tot_stage_vector;
571//// clockNetwork.optimize_wire();
572
573
574}
575void MemoryController::computeEnergy(bool is_tdp)
576{
577
578 frontend->computeEnergy(is_tdp);
579 transecEngine->computeEnergy(is_tdp);
580 if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
581 {
582 PHY->computeEnergy(is_tdp);
583 }
584 if (is_tdp)
585 {
586 power = power + frontend->power + transecEngine->power;
587 if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
588 {
589 power = power + PHY->power;
590 }
591 }
592 else
593 {
594 rt_power = rt_power + frontend->rt_power + transecEngine->rt_power;
595 if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
596 {
597 rt_power = rt_power + PHY->rt_power;
598 }
599 }
600}
601
602void MemoryController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
603{
604 string indent_str(indent, ' ');
605 string indent_str_next(indent+2, ' ');
606 bool long_channel = XML->sys.longer_channel_device;
607
608 if (is_tdp)
609 {
610 cout << "Memory Controller:" << endl;
611 cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
612 cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*mcp.clockRate << " W" << endl;
613 cout << indent_str<< "Subthreshold Leakage = "
614 << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
615 //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
616 cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
617 cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
618 cout<<endl;
619 cout << indent_str << "Front End Engine:" << endl;
620 cout << indent_str_next << "Area = " << frontend->area.get_area()*1e-6<< " mm^2" << endl;
621 cout << indent_str_next << "Peak Dynamic = " << frontend->power.readOp.dynamic*mcp.clockRate << " W" << endl;
622 cout << indent_str_next << "Subthreshold Leakage = "
623 << (long_channel? frontend->power.readOp.longer_channel_leakage:frontend->power.readOp.leakage) <<" W" << endl;
624 cout << indent_str_next << "Gate Leakage = " << frontend->power.readOp.gate_leakage << " W" << endl;
625 cout << indent_str_next << "Runtime Dynamic = " << frontend->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
626 cout <<endl;
627 if (plevel >2){
628 frontend->displayEnergy(indent+4,is_tdp);
629 }
630 cout << indent_str << "Transaction Engine:" << endl;
631 cout << indent_str_next << "Area = " << transecEngine->area.get_area()*1e-6<< " mm^2" << endl;
632 cout << indent_str_next << "Peak Dynamic = " << transecEngine->power.readOp.dynamic*mcp.clockRate << " W" << endl;
633 cout << indent_str_next << "Subthreshold Leakage = "
634 << (long_channel? transecEngine->power.readOp.longer_channel_leakage:transecEngine->power.readOp.leakage) <<" W" << endl;
635 cout << indent_str_next << "Gate Leakage = " << transecEngine->power.readOp.gate_leakage << " W" << endl;
636 cout << indent_str_next << "Runtime Dynamic = " << transecEngine->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
637 cout <<endl;
638 if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
639 {
640 cout << indent_str << "PHY:" << endl;
641 cout << indent_str_next << "Area = " << PHY->area.get_area()*1e-6<< " mm^2" << endl;
642 cout << indent_str_next << "Peak Dynamic = " << PHY->power.readOp.dynamic*mcp.clockRate << " W" << endl;
643 cout << indent_str_next << "Subthreshold Leakage = "
644 << (long_channel? PHY->power.readOp.longer_channel_leakage:PHY->power.readOp.leakage) <<" W" << endl;
645 cout << indent_str_next << "Gate Leakage = " << PHY->power.readOp.gate_leakage << " W" << endl;
646 cout << indent_str_next << "Runtime Dynamic = " << PHY->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
647 cout <<endl;
648 }
649 }
650 else
651 {
652 cout << "Memory Controller:" << endl;
653 cout << indent_str_next << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
654 cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*mcp.clockRate << " W" << endl;
655 cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage <<" W" << endl;
656 cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
657 cout<<endl;
658 }
659
660}
661
662void MemoryController::set_mc_param()
663{
664
665 if (mc_type==MC)
666 {
667 mcp.clockRate =XML->sys.mc.mc_clock*2;//DDR double pumped
668 mcp.clockRate *= 1e6;
669 mcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
670
671 mcp.llcBlockSize =int(ceil(XML->sys.mc.llc_line_length/8.0))+XML->sys.mc.llc_line_length;//ecc overhead
672 mcp.dataBusWidth =int(ceil(XML->sys.mc.databus_width/8.0)) + XML->sys.mc.databus_width;
673 mcp.addressBusWidth =int(ceil(XML->sys.mc.addressbus_width));//XML->sys.physical_address_width;
674 mcp.opcodeW =16;
675 mcp.num_mcs = XML->sys.mc.number_mcs;
676 mcp.num_channels = XML->sys.mc.memory_channels_per_mc;
677 mcp.reads = XML->sys.mc.memory_reads;
678 mcp.writes = XML->sys.mc.memory_writes;
679 //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc.
680 mcp.peakDataTransferRate = XML->sys.mc.peak_transfer_rate;
681 mcp.memRank = XML->sys.mc.number_ranks;
682 //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers
683 //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power
684 //PHY.llcBlocksize=llcBlockSize;
685 mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared
686 mcp.LVDS = XML->sys.mc.LVDS;
687 mcp.type = XML->sys.mc.type;
688 mcp.withPHY = XML->sys.mc.withPHY;
689 }
690// else if (mc_type==FLASHC)
691// {
692// mcp.clockRate =XML->sys.flashc.mc_clock*2;//DDR double pumped
693// mcp.clockRate *= 1e6;
694// mcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
695//
696// mcp.llcBlockSize =int(ceil(XML->sys.flashc.llc_line_length/8.0))+XML->sys.flashc.llc_line_length;//ecc overhead
697// mcp.dataBusWidth =int(ceil(XML->sys.flashc.databus_width/8.0)) + XML->sys.flashc.databus_width;
698// mcp.addressBusWidth =int(ceil(XML->sys.flashc.addressbus_width));//XML->sys.physical_address_width;
699// mcp.opcodeW =16;
700// mcp.num_mcs = XML->sys.flashc.number_mcs;
701// mcp.num_channels = XML->sys.flashc.memory_channels_per_mc;
702// mcp.reads = XML->sys.flashc.memory_reads;
703// mcp.writes = XML->sys.flashc.memory_writes;
704// //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc.
705// mcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate;
706// mcp.memRank = XML->sys.flashc.number_ranks;
707// //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers
708// //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power
709// //PHY.llcBlocksize=llcBlockSize;
710// mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared
711// mcp.LVDS = XML->sys.flashc.LVDS;
712// mcp.type = XML->sys.flashc.type;
713// }
714 else
715 {
716 cout<<"Unknown memory controller type: neither DRAM controller nor Flash controller" <<endl;
717 exit(0);
718 }
719}
720
721MCFrontEnd ::~MCFrontEnd(){
722
723 if(MC_arb) {delete MC_arb; MC_arb = 0;}
724 if(frontendBuffer) {delete frontendBuffer; frontendBuffer = 0;}
725 if(readBuffer) {delete readBuffer; readBuffer = 0;}
726 if(writeBuffer) {delete writeBuffer; writeBuffer = 0;}
727}
728
729MemoryController ::~MemoryController(){
730
731 if(frontend) {delete frontend; frontend = 0;}
732 if(transecEngine) {delete transecEngine; transecEngine = 0;}
733 if(PHY) {delete PHY; PHY = 0;}
734 if(pipeLogic) {delete pipeLogic; pipeLogic = 0;}
735}
736