memoryctrl.cc revision 10152:52c552138ba1
1/*****************************************************************************
2 *                                McPAT
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *                          All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution;
14 * neither the name of the copyright holders nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 *
30 ***************************************************************************/
31#include <algorithm>
32#include <cassert>
33#include <cmath>
34#include <iostream>
35#include <string>
36
37#include "XML_Parse.h"
38#include "basic_circuit.h"
39#include "basic_components.h"
40#include "const.h"
41#include "io.h"
42#include "logic.h"
43#include "memoryctrl.h"
44#include "parameter.h"
45
46/* overview of MC models:
47 * McPAT memory controllers are modeled according to large number of industrial data points.
48 * The Basic memory controller architecture is base on the Synopsis designs
49 * (DesignWare DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite protocol controllers)
50 * as in Cadence ChipEstimator Tool
51 *
52 * An MC has 3 parts as shown in this design. McPAT models both high performance MC
53 * based on Niagara processor designs and curving and low power MC based on data points in
54 * Cadence ChipEstimator Tool.
55 *
56 * The frontend is modeled analytically, the backend is modeled empirically according to
57 * DDR2/DDR3-Lite protocol controllers in Cadence ChipEstimator Tool
58 * The PHY is modeled based on
59 * "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006,
60 * and A 14mW 6.25Gb/s Transceiver in 90nm CMOS for Serial Chip-to-Chip Communication," ISSCC 2007
61 *
62 * In Cadence ChipEstimator Tool there are two types of memory controllers: the full memory controllers
63 * that includes the frontend as the DesignWare DDR2/DDR3-Lite memory controllers and the backend only
64 * memory controllers as the DDR2/DDR3-Lite protocol controllers (except DesignWare DDR2/DDR3-Lite memory
65 * controllers, all memory controller IP in Cadence ChipEstimator Tool are backend memory controllers such as
66 * DDRC 1600A and DDRC 800A). Thus,to some extend the area and power difference between DesignWare
67 * DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite protocol controllers can be an estimation to the
68 * frontend power and area, which is very close the analitically modeled results of the frontend for Niagara2@65nm
69 *
70 */
71
72MCBackend::MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_)
73:l_ip(*interface_ip_),
74 mc_type(mc_type_),
75 mcp(mcp_)
76{
77
78  local_result = init_interface(&l_ip);
79  compute();
80
81}
82
83
84void MCBackend::compute()
85{
86  //double max_row_addr_width = 20.0;//Current address 12~18bits
87  double C_MCB, mc_power, backend_dyn, backend_gates;//, refresh_period,refresh_freq;//Equivalent per bit Cap for backend,
88  double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
89  double NMOS_sizing, PMOS_sizing;
90
91  if (mc_type == MC)
92  {
93          if (mcp.type == 0)
94          {
95                  //area = (2.2927*log(peakDataTransferRate)-14.504)*memDataWidth/144.0*(l_ip.F_sz_um/0.09);
96                  area.set_area((2.7927*log(mcp.peakDataTransferRate*2)-19.862)/2.0*mcp.dataBusWidth/128.0*(l_ip.F_sz_um/0.09)*mcp.num_channels*1e6);//um^2
97                  //assuming the approximately same scaling factor as seen in processors.
98                  //C_MCB=0.2/1.3/1.3/266/64/0.09*g_ip.F_sz_um;//based on AMD Geode processor which has a very basic mc on chip.
99                  //C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process.
100                  //mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et
101                  mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend
102                  C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065;
103                  power_t.readOp.dynamic = C_MCB*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(mcp.dataBusWidth/*+mcp.addressBusWidth*/);//per access energy in memory controller
104                  power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
105                  power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
106
107          }
108          else
109          {   NMOS_sizing 	  = g_tp.min_w_nmos_;
110                  PMOS_sizing	  = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
111                  area.set_area(0.15*mcp.dataBusWidth/72.0*(l_ip.F_sz_um/0.065)* (l_ip.F_sz_um/0.065)*mcp.num_channels*1e6);//um^2
112                  backend_dyn = 0.9e-9/800e6*mcp.clockRate/12800*mcp.peakDataTransferRate*mcp.dataBusWidth/72.0*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(l_ip.F_sz_nm/65.0);//Average on DDR2/3 protocol controller and DDRC 1600/800A in Cadence ChipEstimate
113                  //Scaling to technology and DIMM feature. The base IP support DDR3-1600(PC3 12800)
114                  backend_gates = 50000*mcp.dataBusWidth/64.0;//5000 is from Cadence ChipEstimator
115
116                  power_t.readOp.dynamic = backend_dyn;
117                  power_t.readOp.leakage = (backend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
118                  power_t.readOp.gate_leakage = (backend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
119
120          }
121  }
122  else
123  {//skip old model
124          cout<<"Unknown memory controllers"<<endl;exit(0);
125          area.set_area(0.243*mcp.dataBusWidth/8);//area based on Cadence ChipEstimator for 8bit bus
126          //mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend
127          C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065;
128          power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
129          power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
130          power_t.readOp.dynamic *= 1.2;
131          power_t.readOp.leakage *= 1.2;
132          power_t.readOp.gate_leakage *= 1.2;
133          //flash controller has about 20% more backend power since BCH ECC in flash is complex and power hungry
134  }
135  double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
136  power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
137}
138
139void MCBackend::computeEnergy(bool is_tdp)
140{
141        //backend uses internal data buswidth
142        if (is_tdp)
143        {
144                //init stats for Peak
145                stats_t.readAc.access   = 0.5*mcp.num_channels;
146                stats_t.writeAc.access  = 0.5*mcp.num_channels;
147                tdp_stats = stats_t;
148        }
149        else
150        {
151                //init stats for runtime power (RTP)
152                stats_t.readAc.access   = mcp.reads;
153                stats_t.writeAc.access  = mcp.writes;
154                tdp_stats = stats_t;
155        }
156        if (is_tdp)
157    {
158                power = power_t;
159                power.readOp.dynamic	= (stats_t.readAc.access + stats_t.writeAc.access)*power_t.readOp.dynamic;
160
161    }
162    else
163    {
164        rt_power.readOp.dynamic	= (stats_t.readAc.access + stats_t.writeAc.access)*mcp.llcBlockSize*8.0/mcp.dataBusWidth*power_t.readOp.dynamic;
165        rt_power = rt_power + power_t*pppm_lkg;
166        rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime;
167        //Assume 10% of peak power is consumed by routine job including memory refreshing and scrubbing
168    }
169}
170
171
172MCPHY::MCPHY(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_)
173:l_ip(*interface_ip_),
174 mc_type(mc_type_),
175 mcp(mcp_)
176{
177
178  local_result = init_interface(&l_ip);
179  compute();
180}
181
182void MCPHY::compute()
183{
184  //PHY uses internal data buswidth but the actuall off-chip datawidth is 64bits + ecc
185  double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio() ;
186  /*
187   * according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006;
188   * From Cadence ChipEstimator for normal I/O around 0.4~0.8 mW/Gb/s
189   */
190  double power_per_gb_per_s, phy_dyn,phy_gates, NMOS_sizing, PMOS_sizing;
191
192  if (mc_type == MC)
193  {
194          if (mcp.type == 0)
195          {
196                  power_per_gb_per_s = mcp.LVDS? 0.01:0.04;
197                  //Based on die photos from Niagara 1 and 2.
198                  //TODO merge this into undifferentiated core.PHY only achieves square root of the ideal scaling.
199                  //area = (6.4323*log(peakDataTransferRate)-34.76)*memDataWidth/128.0*(l_ip.F_sz_um/0.09);
200                  area.set_area((6.4323*log(mcp.peakDataTransferRate*2)-48.134)*mcp.dataBusWidth/128.0*(l_ip.F_sz_um/0.09)*mcp.num_channels*1e6/2);//TODO:/2
201                  //This is from curve fitting based on Niagara 1 and 2's PHY die photo.
202                  //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down
203                  //power.readOp.dynamic = 0.02*memAccesses*llcBlocksize*8;//change from Bytes to bits.
204                  power_t.readOp.dynamic = power_per_gb_per_s*sqrt(l_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
205                  power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
206                  power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
207
208          }
209          else
210          {
211                  NMOS_sizing 	  = g_tp.min_w_nmos_;
212                  PMOS_sizing	  = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
213                  //Designware/synopsis 16bit DDR3 PHY is 1.3mm (WITH IOs) at 40nm for upto DDR3 2133 (PC3 17066)
214                  double non_IO_percentage = 0.2;
215                  area.set_area(1.3*non_IO_percentage/2133.0e6*mcp.clockRate/17066*mcp.peakDataTransferRate*mcp.dataBusWidth/16.0*(l_ip.F_sz_um/0.040)* (l_ip.F_sz_um/0.040)*mcp.num_channels*1e6);//um^2
216                  phy_gates = 200000*mcp.dataBusWidth/64.0;
217                  power_per_gb_per_s = 0.01;
218                  //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down
219                  power_t.readOp.dynamic = power_per_gb_per_s*(l_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
220                  power_t.readOp.leakage = (mcp.withPHY? phy_gates:0)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
221                  power_t.readOp.gate_leakage = (mcp.withPHY? phy_gates:0)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
222          }
223
224  }
225  else
226  {
227          area.set_area(0.4e6/2*mcp.dataBusWidth/8);//area based on Cadence ChipEstimator for 8bit bus
228  }
229
230//  double phy_factor = (int)ceil(mcp.dataBusWidth/72.0);//Previous phy power numbers are based on 72 bit DIMM interface
231//  power_t.readOp.dynamic *= phy_factor;
232//  power_t.readOp.leakage *= phy_factor;
233//  power_t.readOp.gate_leakage *= phy_factor;
234
235  double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
236  power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
237}
238
239
240void MCPHY::computeEnergy(bool is_tdp)
241{
242        if (is_tdp)
243        {
244                //init stats for Peak
245                stats_t.readAc.access   = 0.5*mcp.num_channels; //time share on buses
246                stats_t.writeAc.access  = 0.5*mcp.num_channels;
247                tdp_stats = stats_t;
248        }
249        else
250        {
251                //init stats for runtime power (RTP)
252                stats_t.readAc.access   = mcp.reads;
253                stats_t.writeAc.access  = mcp.writes;
254                tdp_stats = stats_t;
255        }
256
257        if (is_tdp)
258    {
259                double data_transfer_unit = (mc_type == MC)? 72:16;/*DIMM data width*/
260                power = power_t;
261                power.readOp.dynamic	= power.readOp.dynamic * (mcp.peakDataTransferRate*8*1e6/1e9/*change to Gbs*/)*mcp.dataBusWidth/data_transfer_unit*mcp.num_channels/mcp.clockRate;
262                // divide by clock rate is for match the final computation where *clock is used
263                //(stats_t.readAc.access*power_t.readOp.dynamic+
264//					stats_t.writeAc.access*power_t.readOp.dynamic);
265
266    }
267    else
268    {
269        rt_power = power_t;
270//    	rt_power.readOp.dynamic	= (stats_t.readAc.access*power_t.readOp.dynamic+
271//    						stats_t.writeAc.access*power_t.readOp.dynamic);
272
273        rt_power.readOp.dynamic=power_t.readOp.dynamic*(stats_t.readAc.access + stats_t.writeAc.access)*(mcp.llcBlockSize)*8/1e9/mcp.executionTime*(mcp.executionTime);
274        rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime;
275    }
276}
277
278MCFrontEnd::MCFrontEnd(ParseXML *XML_interface,InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_)
279:XML(XML_interface),
280 interface_ip(*interface_ip_),
281 mc_type(mc_type_),
282 mcp(mcp_),
283 MC_arb(0),
284 frontendBuffer(0),
285 readBuffer(0),
286 writeBuffer(0)
287{
288  /* All computations are for a single MC
289   *
290   */
291
292  int tag, data;
293  bool is_default =true;//indication for default setup
294
295  /* MC frontend engine channels share the same engines but logically partitioned
296   * For all hardware inside MC. different channels do not share resources.
297   * TODO: add docodeing/mux stage to steer memory requests to different channels.
298   */
299
300  //memory request reorder buffer
301  tag							   = mcp.addressBusWidth  + EXTRA_TAG_BITS + mcp.opcodeW;
302  data    					 	   = int(ceil((XML->sys.physical_address_width + mcp.opcodeW)/8.0));
303  interface_ip.cache_sz            = data*XML->sys.mc.req_window_size_per_channel;
304  interface_ip.line_sz             = data;
305  interface_ip.assoc               = 0;
306  interface_ip.nbanks              = 1;
307  interface_ip.out_w               = interface_ip.line_sz*8;
308  interface_ip.specific_tag        = 1;
309  interface_ip.tag_w               = tag;
310  interface_ip.access_mode         = 0;
311  interface_ip.throughput          = 1.0/mcp.clockRate;
312  interface_ip.latency             = 1.0/mcp.clockRate;
313  interface_ip.is_cache			   = true;
314  interface_ip.pure_cam            = false;
315  interface_ip.pure_ram            = false;
316  interface_ip.obj_func_dyn_energy = 0;
317  interface_ip.obj_func_dyn_power  = 0;
318  interface_ip.obj_func_leak_power = 0;
319  interface_ip.obj_func_cycle_t    = 1;
320  interface_ip.num_rw_ports        = 0;
321  interface_ip.num_rd_ports        = XML->sys.mc.memory_channels_per_mc;
322  interface_ip.num_wr_ports        = interface_ip.num_rd_ports;
323  interface_ip.num_se_rd_ports     = 0;
324  interface_ip.num_search_ports     = XML->sys.mc.memory_channels_per_mc;
325  frontendBuffer = new ArrayST(&interface_ip, "MC ReorderBuffer", Uncore_device);
326  frontendBuffer->area.set_area(frontendBuffer->area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
327  area.set_area(area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
328
329  //selection and arbitration logic
330  MC_arb = new selection_logic(is_default, XML->sys.mc.req_window_size_per_channel,1,&interface_ip, Uncore_device);
331
332  //read buffers.
333  data    					 	   = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte
334  interface_ip.cache_sz            = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize;
335  interface_ip.line_sz             = data;
336  interface_ip.assoc               = 1;
337  interface_ip.nbanks              = 1;
338  interface_ip.out_w               = interface_ip.line_sz*8;
339  interface_ip.access_mode         = 1;
340  interface_ip.throughput          = 1.0/mcp.clockRate;
341  interface_ip.latency             = 1.0/mcp.clockRate;
342  interface_ip.is_cache			   = false;
343  interface_ip.pure_cam            = false;
344  interface_ip.pure_ram            = true;
345  interface_ip.obj_func_dyn_energy = 0;
346  interface_ip.obj_func_dyn_power  = 0;
347  interface_ip.obj_func_leak_power = 0;
348  interface_ip.obj_func_cycle_t    = 1;
349  interface_ip.num_rw_ports        = 0;//XML->sys.mc.memory_channels_per_mc*2>2?2:XML->sys.mc.memory_channels_per_mc*2;
350  interface_ip.num_rd_ports        = XML->sys.mc.memory_channels_per_mc;
351  interface_ip.num_wr_ports        = interface_ip.num_rd_ports;
352  interface_ip.num_se_rd_ports     = 0;
353  readBuffer = new ArrayST(&interface_ip, "MC ReadBuffer", Uncore_device);
354  readBuffer->area.set_area(readBuffer->area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
355  area.set_area(area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
356
357  //write buffer
358  data    					 	   = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte
359  interface_ip.cache_sz            = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize;
360  interface_ip.line_sz             = data;
361  interface_ip.assoc               = 1;
362  interface_ip.nbanks              = 1;
363  interface_ip.out_w               = interface_ip.line_sz*8;
364  interface_ip.access_mode         = 0;
365  interface_ip.throughput          = 1.0/mcp.clockRate;
366  interface_ip.latency             = 1.0/mcp.clockRate;
367  interface_ip.obj_func_dyn_energy = 0;
368  interface_ip.obj_func_dyn_power  = 0;
369  interface_ip.obj_func_leak_power = 0;
370  interface_ip.obj_func_cycle_t    = 1;
371  interface_ip.num_rw_ports        = 0;
372  interface_ip.num_rd_ports        = XML->sys.mc.memory_channels_per_mc;
373  interface_ip.num_wr_ports        = interface_ip.num_rd_ports;
374  interface_ip.num_se_rd_ports     = 0;
375  writeBuffer = new ArrayST(&interface_ip, "MC writeBuffer", Uncore_device);
376  writeBuffer->area.set_area(writeBuffer->area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
377  area.set_area(area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc);
378}
379
380void MCFrontEnd::computeEnergy(bool is_tdp)
381{
382        if (is_tdp)
383            {
384                //init stats for Peak
385                frontendBuffer->stats_t.readAc.access  = frontendBuffer->l_ip.num_search_ports;
386                frontendBuffer->stats_t.writeAc.access = frontendBuffer->l_ip.num_wr_ports;
387                frontendBuffer->tdp_stats = frontendBuffer->stats_t;
388
389                readBuffer->stats_t.readAc.access  = readBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle;
390                readBuffer->stats_t.writeAc.access = readBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle;
391                readBuffer->tdp_stats = readBuffer->stats_t;
392
393                writeBuffer->stats_t.readAc.access  = writeBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle;
394                writeBuffer->stats_t.writeAc.access = writeBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle;
395                writeBuffer->tdp_stats = writeBuffer->stats_t;
396
397            }
398            else
399            {
400                //init stats for runtime power (RTP)
401                frontendBuffer->stats_t.readAc.access  = XML->sys.mc.memory_reads *mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72;
402                //For each channel, each memory word need to check the address data to achieve best scheduling results.
403                //and this need to be done on all physical DIMMs in each logical memory DIMM *mcp.dataBusWidth/72
404                frontendBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72;
405                frontendBuffer->rtp_stats = frontendBuffer->stats_t;
406
407                readBuffer->stats_t.readAc.access  = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first
408                readBuffer->stats_t.writeAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first
409                readBuffer->rtp_stats = readBuffer->stats_t;
410
411                writeBuffer->stats_t.readAc.access  = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth;
412                writeBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth;
413                writeBuffer->rtp_stats = writeBuffer->stats_t;
414            }
415
416        frontendBuffer->power_t.reset();
417        readBuffer->power_t.reset();
418        writeBuffer->power_t.reset();
419
420//	frontendBuffer->power_t.readOp.dynamic	+= (frontendBuffer->stats_t.readAc.access*
421//			(frontendBuffer->local_result.power.searchOp.dynamic+frontendBuffer->local_result.power.readOp.dynamic)+
422//    		frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic);
423
424                frontendBuffer->power_t.readOp.dynamic	+= (frontendBuffer->stats_t.readAc.access +
425                                  frontendBuffer->stats_t.writeAc.access)*frontendBuffer->local_result.power.searchOp.dynamic
426                                + frontendBuffer->stats_t.readAc.access * frontendBuffer->local_result.power.readOp.dynamic
427                                + frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic;
428
429        readBuffer->power_t.readOp.dynamic	+= (readBuffer->stats_t.readAc.access*
430                        readBuffer->local_result.power.readOp.dynamic+
431                readBuffer->stats_t.writeAc.access*readBuffer->local_result.power.writeOp.dynamic);
432        writeBuffer->power_t.readOp.dynamic	+= (writeBuffer->stats_t.readAc.access*
433                        writeBuffer->local_result.power.readOp.dynamic+
434                writeBuffer->stats_t.writeAc.access*writeBuffer->local_result.power.writeOp.dynamic);
435
436        if (is_tdp)
437    {
438        power = power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t +
439                (frontendBuffer->local_result.power +
440                                readBuffer->local_result.power +
441                                writeBuffer->local_result.power)*pppm_lkg;
442
443    }
444    else
445    {
446        rt_power = rt_power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t +
447                (frontendBuffer->local_result.power +
448                                readBuffer->local_result.power +
449                                writeBuffer->local_result.power)*pppm_lkg;
450        rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime;
451    }
452}
453
454void MCFrontEnd::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
455{
456        string indent_str(indent, ' ');
457        string indent_str_next(indent+2, ' ');
458
459        if (is_tdp)
460        {
461                cout << indent_str << "Front End ROB:" << endl;
462                cout << indent_str_next << "Area = " << frontendBuffer->area.get_area()*1e-6<< " mm^2" << endl;
463                cout << indent_str_next << "Peak Dynamic = " << frontendBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl;
464                cout << indent_str_next << "Subthreshold Leakage = " << frontendBuffer->power.readOp.leakage <<" W" << endl;
465                cout << indent_str_next << "Gate Leakage = " << frontendBuffer->power.readOp.gate_leakage << " W" << endl;
466                cout << indent_str_next << "Runtime Dynamic = " << frontendBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
467
468                cout <<endl;
469                cout << indent_str<< "Read Buffer:" << endl;
470                cout << indent_str_next << "Area = " << readBuffer->area.get_area()*1e-6  << " mm^2" << endl;
471                cout << indent_str_next << "Peak Dynamic = " << readBuffer->power.readOp.dynamic*mcp.clockRate  << " W" << endl;
472                cout << indent_str_next << "Subthreshold Leakage = " << readBuffer->power.readOp.leakage  << " W" << endl;
473                cout << indent_str_next << "Gate Leakage = " << readBuffer->power.readOp.gate_leakage  << " W" << endl;
474                cout << indent_str_next << "Runtime Dynamic = " << readBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
475                cout <<endl;
476                cout << indent_str << "Write Buffer:" << endl;
477                cout << indent_str_next << "Area = " << writeBuffer->area.get_area() *1e-6 << " mm^2" << endl;
478                cout << indent_str_next << "Peak Dynamic = " << writeBuffer->power.readOp.dynamic*mcp.clockRate  << " W" << endl;
479                cout << indent_str_next << "Subthreshold Leakage = " << writeBuffer->power.readOp.leakage  << " W" << endl;
480                cout << indent_str_next << "Gate Leakage = " << writeBuffer->power.readOp.gate_leakage  << " W" << endl;
481                cout << indent_str_next << "Runtime Dynamic = " << writeBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
482                cout <<endl;
483        }
484        else
485        {
486                cout << indent_str << "Front End ROB:" << endl;
487                cout << indent_str_next << "Area = " << frontendBuffer->area.get_area()*1e-6<< " mm^2" << endl;
488                cout << indent_str_next << "Peak Dynamic = " << frontendBuffer->rt_power.readOp.dynamic*mcp.clockRate << " W" << endl;
489                cout << indent_str_next << "Subthreshold Leakage = " << frontendBuffer->rt_power.readOp.leakage <<" W" << endl;
490                cout << indent_str_next << "Gate Leakage = " << frontendBuffer->rt_power.readOp.gate_leakage << " W" << endl;
491                cout <<endl;
492                cout << indent_str<< "Read Buffer:" << endl;
493                cout << indent_str_next << "Area = " << readBuffer->area.get_area()*1e-6  << " mm^2" << endl;
494                cout << indent_str_next << "Peak Dynamic = " << readBuffer->rt_power.readOp.dynamic*mcp.clockRate  << " W" << endl;
495                cout << indent_str_next << "Subthreshold Leakage = " << readBuffer->rt_power.readOp.leakage  << " W" << endl;
496                cout << indent_str_next << "Gate Leakage = " << readBuffer->rt_power.readOp.gate_leakage  << " W" << endl;
497                cout <<endl;
498                cout << indent_str << "Write Buffer:" << endl;
499                cout << indent_str_next << "Area = " << writeBuffer->area.get_area() *1e-6 << " mm^2" << endl;
500                cout << indent_str_next << "Peak Dynamic = " << writeBuffer->rt_power.readOp.dynamic*mcp.clockRate  << " W" << endl;
501                cout << indent_str_next << "Subthreshold Leakage = " << writeBuffer->rt_power.readOp.leakage  << " W" << endl;
502                cout << indent_str_next << "Gate Leakage = " << writeBuffer->rt_power.readOp.gate_leakage  << " W" << endl;
503        }
504
505}
506
507
508MemoryController::MemoryController(ParseXML *XML_interface,InputParameter* interface_ip_, enum MemoryCtrl_type mc_type_)
509:XML(XML_interface),
510 interface_ip(*interface_ip_),
511 mc_type(mc_type_),
512 frontend(0),
513 transecEngine(0),
514 PHY(0),
515 pipeLogic(0)
516{
517  /* All computations are for a single MC
518   *
519   */
520  interface_ip.wire_is_mat_type = 2;
521  interface_ip.wire_os_mat_type = 2;
522  interface_ip.wt               =Global;
523  set_mc_param();
524  frontend = new MCFrontEnd(XML, &interface_ip, mcp, mc_type);
525  area.set_area(area.get_area()+ frontend->area.get_area());
526  transecEngine = new MCBackend(&interface_ip, mcp, mc_type);
527  area.set_area(area.get_area()+ transecEngine->area.get_area());
528  if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
529  {
530          PHY = new MCPHY(&interface_ip, mcp, mc_type);
531          area.set_area(area.get_area()+ PHY->area.get_area());
532  }
533  //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc.
534//  transecEngine.initialize(&interface_ip);
535//  transecEngine.peakDataTransferRate = XML->sys.mem.peak_transfer_rate;
536//  transecEngine.memDataWidth = dataBusWidth;
537//  transecEngine.memRank = XML->sys.mem.number_ranks;
538//  //transecEngine.memAccesses=XML->sys.mc.memory_accesses;
539//  //transecEngine.llcBlocksize=llcBlockSize;
540//  transecEngine.compute();
541//  transecEngine.area.set_area(XML->sys.mc.memory_channels_per_mc*transecEngine.area.get_area()) ;
542//  area.set_area(area.get_area()+ transecEngine.area.get_area());
543//  ///cout<<"area="<<area<<endl;
544////
545//  //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers
546//  PHY.initialize(&interface_ip);
547//  PHY.peakDataTransferRate = XML->sys.mem.peak_transfer_rate;
548//  PHY.memDataWidth = dataBusWidth;
549//  //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power
550//  //PHY.llcBlocksize=llcBlockSize;
551//  PHY.compute();
552//  PHY.area.set_area(XML->sys.mc.memory_channels_per_mc*PHY.area.get_area()) ;
553//  area.set_area(area.get_area()+ PHY.area.get_area());
554  ///cout<<"area="<<area<<endl;
555//
556//  interface_ip.pipeline_stages = 5;//normal memory controller has five stages in the pipeline.
557//  interface_ip.per_stage_vector = addressBusWidth + XML->sys.core[0].opcode_width + dataBusWidth;
558//  pipeLogic = new pipeline(is_default, &interface_ip);
559//  //pipeLogic.init_pipeline(is_default, &interface_ip);
560//  pipeLogic->compute_pipeline();
561//  area.set_area(area.get_area()+ pipeLogic->area.get_area()*1e-6);
562//  area.set_area((area.get_area()+mc_area*1e-6)*1.1);//placement and routing overhead
563//
564//
565////  //clock
566////  clockNetwork.init_wire_external(is_default, &interface_ip);
567////  clockNetwork.clk_area           =area*1.1;//10% of placement overhead. rule of thumb
568////  clockNetwork.end_wiring_level   =5;//toplevel metal
569////  clockNetwork.start_wiring_level =5;//toplevel metal
570////  clockNetwork.num_regs           = pipeLogic.tot_stage_vector;
571////  clockNetwork.optimize_wire();
572
573
574}
575void MemoryController::computeEnergy(bool is_tdp)
576{
577
578        frontend->computeEnergy(is_tdp);
579        transecEngine->computeEnergy(is_tdp);
580        if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
581        {
582                PHY->computeEnergy(is_tdp);
583        }
584        if (is_tdp)
585        {
586                power = power + frontend->power + transecEngine->power;
587                if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
588                {
589                        power = power + PHY->power;
590                }
591        }
592        else
593        {
594                rt_power = rt_power + frontend->rt_power + transecEngine->rt_power;
595                if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
596                {
597                        rt_power = rt_power + PHY->rt_power;
598                }
599        }
600}
601
602void MemoryController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
603{
604        string indent_str(indent, ' ');
605        string indent_str_next(indent+2, ' ');
606        bool long_channel = XML->sys.longer_channel_device;
607
608        if (is_tdp)
609        {
610                cout << "Memory Controller:" << endl;
611                cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
612                cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*mcp.clockRate  << " W" << endl;
613                cout << indent_str<< "Subthreshold Leakage = "
614                        << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
615                //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
616                cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
617                cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
618                cout<<endl;
619                cout << indent_str << "Front End Engine:" << endl;
620                cout << indent_str_next << "Area = " << frontend->area.get_area()*1e-6<< " mm^2" << endl;
621                cout << indent_str_next << "Peak Dynamic = " << frontend->power.readOp.dynamic*mcp.clockRate << " W" << endl;
622                cout << indent_str_next << "Subthreshold Leakage = "
623                        << (long_channel? frontend->power.readOp.longer_channel_leakage:frontend->power.readOp.leakage) <<" W" << endl;
624                cout << indent_str_next << "Gate Leakage = " << frontend->power.readOp.gate_leakage << " W" << endl;
625                cout << indent_str_next << "Runtime Dynamic = " << frontend->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
626                cout <<endl;
627                if (plevel >2){
628                        frontend->displayEnergy(indent+4,is_tdp);
629                }
630                cout << indent_str << "Transaction Engine:" << endl;
631                cout << indent_str_next << "Area = " << transecEngine->area.get_area()*1e-6<< " mm^2" << endl;
632                cout << indent_str_next << "Peak Dynamic = " << transecEngine->power.readOp.dynamic*mcp.clockRate << " W" << endl;
633                cout << indent_str_next << "Subthreshold Leakage = "
634                        << (long_channel? transecEngine->power.readOp.longer_channel_leakage:transecEngine->power.readOp.leakage) <<" W" << endl;
635                cout << indent_str_next << "Gate Leakage = " << transecEngine->power.readOp.gate_leakage << " W" << endl;
636                cout << indent_str_next << "Runtime Dynamic = " << transecEngine->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
637                cout <<endl;
638                if (mcp.type==0 || (mcp.type==1&&mcp.withPHY))
639                {
640                        cout << indent_str << "PHY:" << endl;
641                        cout << indent_str_next << "Area = " << PHY->area.get_area()*1e-6<< " mm^2" << endl;
642                        cout << indent_str_next << "Peak Dynamic = " << PHY->power.readOp.dynamic*mcp.clockRate << " W" << endl;
643                        cout << indent_str_next << "Subthreshold Leakage = "
644                        << (long_channel? PHY->power.readOp.longer_channel_leakage:PHY->power.readOp.leakage) <<" W" << endl;
645                        cout << indent_str_next << "Gate Leakage = " << PHY->power.readOp.gate_leakage << " W" << endl;
646                        cout << indent_str_next << "Runtime Dynamic = " << PHY->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl;
647                        cout <<endl;
648                }
649        }
650        else
651        {
652                cout << "Memory Controller:" << endl;
653                cout << indent_str_next << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
654                cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*mcp.clockRate << " W" << endl;
655                cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage <<" W" << endl;
656                cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
657                cout<<endl;
658        }
659
660}
661
662void MemoryController::set_mc_param()
663{
664
665        if (mc_type==MC)
666        {
667          mcp.clockRate       =XML->sys.mc.mc_clock*2;//DDR double pumped
668          mcp.clockRate       *= 1e6;
669          mcp.executionTime   = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
670
671          mcp.llcBlockSize    =int(ceil(XML->sys.mc.llc_line_length/8.0))+XML->sys.mc.llc_line_length;//ecc overhead
672          mcp.dataBusWidth    =int(ceil(XML->sys.mc.databus_width/8.0)) + XML->sys.mc.databus_width;
673          mcp.addressBusWidth =int(ceil(XML->sys.mc.addressbus_width));//XML->sys.physical_address_width;
674          mcp.opcodeW         =16;
675          mcp.num_mcs         = XML->sys.mc.number_mcs;
676          mcp.num_channels    = XML->sys.mc.memory_channels_per_mc;
677          mcp.reads  = XML->sys.mc.memory_reads;
678          mcp.writes = XML->sys.mc.memory_writes;
679          //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc.
680          mcp.peakDataTransferRate = XML->sys.mc.peak_transfer_rate;
681          mcp.memRank = XML->sys.mc.number_ranks;
682          //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers
683          //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power
684          //PHY.llcBlocksize=llcBlockSize;
685          mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared
686          mcp.LVDS = XML->sys.mc.LVDS;
687          mcp.type = XML->sys.mc.type;
688          mcp.withPHY = XML->sys.mc.withPHY;
689        }
690//	else if (mc_type==FLASHC)
691//	{
692//		mcp.clockRate       =XML->sys.flashc.mc_clock*2;//DDR double pumped
693//		mcp.clockRate       *= 1e6;
694//		mcp.executionTime   = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
695//
696//		mcp.llcBlockSize    =int(ceil(XML->sys.flashc.llc_line_length/8.0))+XML->sys.flashc.llc_line_length;//ecc overhead
697//		mcp.dataBusWidth    =int(ceil(XML->sys.flashc.databus_width/8.0)) + XML->sys.flashc.databus_width;
698//		mcp.addressBusWidth =int(ceil(XML->sys.flashc.addressbus_width));//XML->sys.physical_address_width;
699//		mcp.opcodeW         =16;
700//		mcp.num_mcs         = XML->sys.flashc.number_mcs;
701//		mcp.num_channels    = XML->sys.flashc.memory_channels_per_mc;
702//		mcp.reads  = XML->sys.flashc.memory_reads;
703//		mcp.writes = XML->sys.flashc.memory_writes;
704//		//+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc.
705//		mcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate;
706//		mcp.memRank = XML->sys.flashc.number_ranks;
707//		//++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers
708//		//PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power
709//		//PHY.llcBlocksize=llcBlockSize;
710//		mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared
711//		mcp.LVDS = XML->sys.flashc.LVDS;
712//		mcp.type = XML->sys.flashc.type;
713//	}
714        else
715        {
716                cout<<"Unknown memory controller type: neither DRAM controller nor Flash controller" <<endl;
717                exit(0);
718        }
719}
720
721MCFrontEnd ::~MCFrontEnd(){
722
723        if(MC_arb) 	               {delete MC_arb; MC_arb = 0;}
724        if(frontendBuffer) 	       {delete frontendBuffer; frontendBuffer = 0;}
725        if(readBuffer) 	           {delete readBuffer; readBuffer = 0;}
726        if(writeBuffer) 	       {delete writeBuffer; writeBuffer = 0;}
727}
728
729MemoryController ::~MemoryController(){
730
731        if(frontend) 	               {delete frontend; frontend = 0;}
732        if(transecEngine) 	           {delete transecEngine; transecEngine = 0;}
733        if(PHY) 	                   {delete PHY; PHY = 0;}
734        if(pipeLogic) 	               {delete pipeLogic; pipeLogic = 0;}
735}
736
737