1/*****************************************************************************
2 *                                McPAT
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
6 *                          All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 ***************************************************************************/
32
33#include <algorithm>
34#include <cassert>
35#include <cmath>
36#include <iostream>
37#include <string>
38
39#include "basic_circuit.h"
40#include "basic_components.h"
41#include "common.h"
42#include "const.h"
43#include "io.h"
44#include "logic.h"
45#include "memoryctrl.h"
46#include "parameter.h"
47
48/* overview of MC models:
49 * McPAT memory controllers are modeled according to large number of industrial data points.
50 * The Basic memory controller architecture is base on the Synopsis designs
51 * (DesignWare DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite protocol controllers)
52 * as in Cadence ChipEstimator Tool
53 *
54 * An MC has 3 parts as shown in this design. McPAT models both high performance MC
55 * based on Niagara processor designs and curving and low power MC based on data points in
56 * Cadence ChipEstimator Tool.
57 *
58 * The frontend is modeled analytically, the backend is modeled empirically according to
59 * DDR2/DDR3-Lite protocol controllers in Cadence ChipEstimator Tool
60 * The PHY is modeled based on
61 * "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006,
62 * and A 14mW 6.25Gb/s Transceiver in 90nm CMOS for Serial Chip-to-Chip Communication," ISSCC 2007
63 *
64 * In Cadence ChipEstimator Tool there are two types of memory controllers: the full memory controllers
65 * that includes the frontend as the DesignWare DDR2/DDR3-Lite memory controllers and the backend only
66 * memory controllers as the DDR2/DDR3-Lite protocol controllers (except DesignWare DDR2/DDR3-Lite memory
67 * controllers, all memory controller IP in Cadence ChipEstimator Tool are backend memory controllers such as
68 * DDRC 1600A and DDRC 800A). Thus,to some extend the area and power difference between DesignWare
69 * DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite protocol controllers can be an estimation to the
70 * frontend power and area, which is very close the analitically modeled results of the frontend for Niagara2@65nm
71 *
72 */
73
74MCBackend::MCBackend(XMLNode* _xml_data, InputParameter* interface_ip_,
75                     const MCParameters & mcp_, const MCStatistics & mcs_)
76    : McPATComponent(_xml_data), l_ip(*interface_ip_), mcp(mcp_), mcs(mcs_) {
77    name = "Transaction Engine";
78    local_result = init_interface(&l_ip, name);
79
80    // Set up stats for the power calculations
81    tdp_stats.reset();
82    tdp_stats.readAc.access = 0.5 * mcp.num_channels * mcp.clockRate;
83    tdp_stats.writeAc.access = 0.5 * mcp.num_channels * mcp.clockRate;
84    rtp_stats.reset();
85    rtp_stats.readAc.access = mcs.reads;
86    rtp_stats.writeAc.access = mcs.writes;
87}
88
89void MCBackend::computeArea() {
90    // The area is in nm^2
91    if (mcp.mc_type == MC) {
92        if (mcp.type == 0) {
93            output_data.area = (2.7927 * log(mcp.peak_transfer_rate * 2) -
94                                19.862) / 2.0 * mcp.dataBusWidth / 128.0 *
95                (l_ip.F_sz_um / 0.09) * mcp.num_channels;
96        } else {
97            output_data.area = 0.15 * mcp.dataBusWidth / 72.0 *
98                (l_ip.F_sz_um / 0.065) * (l_ip.F_sz_um / 0.065) *
99                mcp.num_channels;
100        }
101    } else {
102        //skip old model
103        cout << "Unknown memory controllers" << endl;
104        exit(0);
105        //area based on Cadence ChipEstimator for 8bit bus
106        output_data.area = 0.243 * mcp.dataBusWidth / 8;
107    }
108}
109
110
111void MCBackend::computeEnergy() {
112    double C_MCB, mc_power;
113    double backend_dyn;
114    double backend_gates;
115    double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
116    double NMOS_sizing = g_tp.min_w_nmos_;
117    double PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
118    double area_um2 = output_data.area * 1e6;
119
120    if (mcp.mc_type == MC) {
121        if (mcp.type == 0) {
122            //assuming the approximately same scaling factor as seen in processors.
123            //C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process.
124            //mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et
125            mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend
126            C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065;
127            //per access energy in memory controller
128            power.readOp.dynamic = C_MCB * g_tp.peri_global.Vdd *
129                g_tp.peri_global.Vdd *
130                (mcp.dataBusWidth/*+mcp.addressBusWidth*/);
131            power.readOp.leakage = area_um2 / 2 *
132                (g_tp.scaling_factor.core_tx_density) *
133                cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
134                g_tp.peri_global.Vdd;//unit W
135            power.readOp.gate_leakage = area_um2 / 2 *
136                (g_tp.scaling_factor.core_tx_density) *
137                cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
138                g_tp.peri_global.Vdd;//unit W
139        } else {
140            //Average on DDR2/3 protocol controller and DDRC 1600/800A in
141            //Cadence ChipEstimate
142            backend_dyn = 0.9e-9 / 800e6 * mcp.clockRate / 12800 *
143                mcp.peak_transfer_rate* mcp.dataBusWidth / 72.0 *
144                g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / 1.1 *
145                (l_ip.F_sz_nm/65.0);
146            //Scaling to technology and DIMM feature. The base IP support
147            //DDR3-1600(PC3 12800)
148            //5000 is from Cadence ChipEstimator
149            backend_gates = 50000 * mcp.dataBusWidth / 64.0;
150
151            power.readOp.dynamic = backend_dyn;
152            power.readOp.leakage = (backend_gates) *
153                cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
154                g_tp.peri_global.Vdd;//unit W
155            power.readOp.gate_leakage = (backend_gates) *
156                cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
157                g_tp.peri_global.Vdd;//unit W
158          }
159    } else {
160        //skip old model
161        cout<<"Unknown memory controllers"<<endl;exit(0);
162        //mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend
163        C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065;
164        power.readOp.leakage = area_um2 / 2 *
165            (g_tp.scaling_factor.core_tx_density) *
166            cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
167            g_tp.peri_global.Vdd;//unit W
168        power.readOp.gate_leakage = area_um2 / 2 *
169            (g_tp.scaling_factor.core_tx_density) *
170            cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
171            g_tp.peri_global.Vdd;//unit W
172        power.readOp.dynamic *= 1.2;
173        power.readOp.leakage *= 1.2;
174        power.readOp.gate_leakage *= 1.2;
175        //flash controller has about 20% more backend power since BCH ECC in
176        //flash is complex and power hungry
177    }
178  double long_channel_device_reduction =
179      longer_channel_device_reduction(Uncore_device);
180  power.readOp.longer_channel_leakage = power.readOp.leakage *
181      long_channel_device_reduction;
182
183  // Output leakage power calculations
184  output_data.subthreshold_leakage_power =
185      longer_channel_device ? power.readOp.longer_channel_leakage :
186      power.readOp.leakage;
187  output_data.gate_leakage_power = power.readOp.gate_leakage;
188
189  // Peak dynamic power calculation
190  output_data.peak_dynamic_power = power.readOp.dynamic *
191      (tdp_stats.readAc.access + tdp_stats.writeAc.access);
192
193  // Runtime dynamic energy calculation
194  output_data.runtime_dynamic_energy =
195      power.readOp.dynamic *
196      (rtp_stats.readAc.access + rtp_stats.writeAc.access) *
197      mcp.llcBlockSize * BITS_PER_BYTE / mcp.dataBusWidth +
198      // Original McPAT code: Assume 10% of peak power is consumed by routine
199      // job including memory refreshing and scrubbing
200      power.readOp.dynamic * 0.1 * execution_time;
201}
202
203MCPHY::MCPHY(XMLNode* _xml_data, InputParameter* interface_ip_,
204             const MCParameters & mcp_, const MCStatistics & mcs_)
205    : McPATComponent(_xml_data), l_ip(*interface_ip_), mcp(mcp_), mcs(mcs_) {
206    name = "Physical Interface (PHY)";
207    local_result = init_interface(&l_ip, name);
208
209    // Set up stats for the power calculations
210    // TODO: Figure out why TDP stats aren't used
211    tdp_stats.reset();
212    tdp_stats.readAc.access = 0.5 * mcp.num_channels;
213    tdp_stats.writeAc.access = 0.5 * mcp.num_channels;
214    rtp_stats.reset();
215    rtp_stats.readAc.access = mcs.reads;
216    rtp_stats.writeAc.access = mcs.writes;
217}
218
219void MCPHY::computeArea() {
220    if (mcp.mc_type == MC) {
221        if (mcp.type == 0) {
222            //Based on die photos from Niagara 1 and 2.
223            //TODO merge this into undifferentiated core.PHY only achieves
224            //square root of the ideal scaling.
225            output_data.area = (6.4323 * log(mcp.peak_transfer_rate * 2) -
226                                48.134) * mcp.dataBusWidth / 128.0 *
227                (l_ip.F_sz_um / 0.09) * mcp.num_channels / 2;//TODO:/2
228        } else {
229            //Designware/synopsis 16bit DDR3 PHY is 1.3mm (WITH IOs) at 40nm
230            //for upto DDR3 2133 (PC3 17066)
231            double non_IO_percentage = 0.2;
232            output_data.area = 1.3 * non_IO_percentage / 2133.0e6 *
233                mcp.clockRate / 17066 * mcp.peak_transfer_rate *
234                mcp.dataBusWidth / 16.0 * (l_ip.F_sz_um / 0.040)*
235                (l_ip.F_sz_um / 0.040) * mcp.num_channels;//um^2
236        }
237    } else {
238        //area based on Cadence ChipEstimator for 8bit bus
239        output_data.area = 0.4e6 / 2 * mcp.dataBusWidth / 8 / 1e6;
240    }
241}
242
243void MCPHY::computeEnergy() {
244    //PHY uses internal data buswidth but the actuall off-chip datawidth is 64bits + ecc
245    double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
246    /*
247     * according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006;
248     * From Cadence ChipEstimator for normal I/O around 0.4~0.8 mW/Gb/s
249     */
250    double power_per_gb_per_s, phy_dyn,phy_gates;
251    double NMOS_sizing = g_tp.min_w_nmos_;
252    double PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
253    double area_um2 = output_data.area * 1e6;
254
255    if (mcp.mc_type == MC) {
256        if (mcp.type == 0) {
257            power_per_gb_per_s = mcp.LVDS ? 0.01 : 0.04;
258            //This is from curve fitting based on Niagara 1 and 2's PHY die photo.
259            //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down
260            //power.readOp.dynamic = 0.02*memAccesses*llcBlocksize*8;//change from Bytes to bits.
261            power.readOp.dynamic = power_per_gb_per_s *
262                sqrt(l_ip.F_sz_um / 0.09) * g_tp.peri_global.Vdd / 1.2 *
263                g_tp.peri_global.Vdd / 1.2;
264            power.readOp.leakage = area_um2 / 2 *
265                (g_tp.scaling_factor.core_tx_density) *
266                cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
267                g_tp.peri_global.Vdd;//unit W
268            power.readOp.gate_leakage = area_um2 / 2 *
269                (g_tp.scaling_factor.core_tx_density) *
270                cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) *
271                g_tp.peri_global.Vdd;//unit W
272        } else {
273            phy_gates = 200000 * mcp.dataBusWidth / 64.0;
274            power_per_gb_per_s = 0.01;
275            //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down
276            power.readOp.dynamic = power_per_gb_per_s * (l_ip.F_sz_um / 0.09) *
277                g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
278            power.readOp.leakage = (mcp.withPHY ? phy_gates : 0) *
279                cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
280                g_tp.peri_global.Vdd;//unit W
281            power.readOp.gate_leakage = (mcp.withPHY ? phy_gates : 0) *
282                cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
283                g_tp.peri_global.Vdd;//unit W
284        }
285    }
286
287//  double phy_factor = (int)ceil(mcp.dataBusWidth/72.0);//Previous phy power numbers are based on 72 bit DIMM interface
288//  power_t.readOp.dynamic *= phy_factor;
289//  power_t.readOp.leakage *= phy_factor;
290//  power_t.readOp.gate_leakage *= phy_factor;
291
292    double long_channel_device_reduction =
293        longer_channel_device_reduction(Uncore_device);
294    power.readOp.longer_channel_leakage =
295        power.readOp.leakage * long_channel_device_reduction;
296
297    // Leakage power calculations
298    output_data.subthreshold_leakage_power =
299        longer_channel_device ? power.readOp.longer_channel_leakage :
300        power.readOp.leakage;
301    output_data.gate_leakage_power = power.readOp.gate_leakage;
302
303    // Peak dynamic power calculation
304    double data_transfer_unit = (mcp.mc_type == MC)? 72:16;/*DIMM data width*/
305    output_data.peak_dynamic_power = power.readOp.dynamic *
306        (mcp.peak_transfer_rate * BITS_PER_BYTE / 1e3) * mcp.dataBusWidth /
307        data_transfer_unit * mcp.num_channels / mcp.clockRate;
308
309    // Runtime dynamic energy calculation
310    output_data.runtime_dynamic_energy =
311        power.readOp.dynamic *
312        (rtp_stats.readAc.access + rtp_stats.writeAc.access) *
313        mcp.llcBlockSize * BITS_PER_BYTE / 1e9 +
314        // Original McPAT code: Assume 10% of peak power is consumed by routine
315        // job including memory refreshing and scrubbing
316        power.readOp.dynamic * 0.1 * execution_time;
317}
318
319MCFrontEnd::MCFrontEnd(XMLNode* _xml_data, InputParameter* interface_ip_,
320                       const MCParameters & mcp_, const MCStatistics & mcs_)
321    : McPATComponent(_xml_data), frontendBuffer(NULL), readBuffer(NULL),
322      writeBuffer(NULL), MC_arb(NULL), interface_ip(*interface_ip_),
323    mcp(mcp_), mcs(mcs_) {
324    int tag, data;
325    bool is_default = true;//indication for default setup
326
327    /* MC frontend engine channels share the same engines but logically partitioned
328     * For all hardware inside MC. different channels do not share resources.
329     * TODO: add docodeing/mux stage to steer memory requests to different channels.
330     */
331
332    name = "Front End";
333
334    // Memory Request Reorder Buffer
335    tag = mcp.addressbus_width + EXTRA_TAG_BITS + mcp.opcodeW;
336    data = int(ceil((physical_address_width + mcp.opcodeW) / BITS_PER_BYTE));
337
338    interface_ip.cache_sz = data * mcp.req_window_size_per_channel;
339    interface_ip.line_sz = data;
340    interface_ip.assoc = mcp.reorder_buffer_assoc;
341    interface_ip.nbanks = mcp.reorder_buffer_nbanks;
342    interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
343    interface_ip.specific_tag = tag > 0;
344    interface_ip.tag_w = tag;
345    interface_ip.access_mode = Normal;
346    interface_ip.obj_func_dyn_energy = 0;
347    interface_ip.obj_func_dyn_power = 0;
348    interface_ip.obj_func_leak_power = 0;
349    interface_ip.obj_func_cycle_t = 1;
350    interface_ip.num_rw_ports = 0;
351    interface_ip.num_rd_ports = mcp.num_channels;
352    interface_ip.num_wr_ports = interface_ip.num_rd_ports;
353    interface_ip.num_se_rd_ports = 0;
354    interface_ip.num_search_ports = mcp.num_channels;
355    interface_ip.is_cache = true;
356    interface_ip.pure_cam = false;
357    interface_ip.pure_ram = false;
358    interface_ip.throughput = 1.0 / mcp.clockRate;
359    interface_ip.latency = 1.0 / mcp.clockRate;
360    frontendBuffer = new CacheArray(xml_data, &interface_ip, "Reorder Buffer",
361                                    Uncore_device, mcp.clockRate);
362    children.push_back(frontendBuffer);
363
364    frontendBuffer->tdp_stats.reset();
365    frontendBuffer->tdp_stats.readAc.access =
366        frontendBuffer->l_ip.num_search_ports +
367        frontendBuffer->l_ip.num_wr_ports;
368    frontendBuffer->tdp_stats.writeAc.access =
369        frontendBuffer->l_ip.num_search_ports;
370    frontendBuffer->tdp_stats.searchAc.access =
371        frontendBuffer->l_ip.num_wr_ports;
372    frontendBuffer->rtp_stats.reset();
373    // TODO: These stats assume that access power is calculated per buffer
374    // bit, which requires the stats to take into account the number of
375    // bits for each buffer slot. This should be revised...
376    //For each channel, each memory word need to check the address data to
377    //achieve best scheduling results.
378    //and this need to be done on all physical DIMMs in each logical memory
379    //DIMM *mcp.dataBusWidth/72
380    frontendBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize *
381        BITS_PER_BYTE / mcp.dataBusWidth * mcp.dataBusWidth / 72;
382    frontendBuffer->rtp_stats.writeAc.access = mcs.writes * mcp.llcBlockSize *
383        BITS_PER_BYTE / mcp.dataBusWidth * mcp.dataBusWidth / 72;
384    frontendBuffer->rtp_stats.searchAc.access =
385        frontendBuffer->rtp_stats.readAc.access +
386        frontendBuffer->rtp_stats.writeAc.access;
387
388    // Read Buffers
389    //Support key words first operation
390    data = (int)ceil(mcp.dataBusWidth / BITS_PER_BYTE);
391
392    interface_ip.cache_sz = data * mcp.IO_buffer_size_per_channel;
393    interface_ip.line_sz = data;
394    interface_ip.assoc = mcp.read_buffer_assoc;
395    interface_ip.nbanks = mcp.read_buffer_nbanks;
396    interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
397    interface_ip.specific_tag = mcp.read_buffer_tag_width > 0;
398    interface_ip.tag_w = mcp.read_buffer_tag_width;
399    interface_ip.access_mode = Sequential;
400    interface_ip.obj_func_dyn_energy = 0;
401    interface_ip.obj_func_dyn_power = 0;
402    interface_ip.obj_func_leak_power = 0;
403    interface_ip.obj_func_cycle_t = 1;
404    interface_ip.num_rw_ports = 0;
405    interface_ip.num_rd_ports = mcp.num_channels;
406    interface_ip.num_wr_ports = interface_ip.num_rd_ports;
407    interface_ip.num_se_rd_ports = 0;
408    interface_ip.num_search_ports = 0;
409    interface_ip.is_cache = false;
410    interface_ip.pure_cam = false;
411    interface_ip.pure_ram = true;
412    interface_ip.throughput = 1.0 / mcp.clockRate;
413    interface_ip.latency = 1.0 / mcp.clockRate;
414    readBuffer = new CacheArray(xml_data, &interface_ip, "Read Buffer",
415                                Uncore_device, mcp.clockRate);
416    children.push_back(readBuffer);
417
418    readBuffer->tdp_stats.reset();
419    readBuffer->tdp_stats.readAc.access = readBuffer->l_ip.num_rd_ports *
420        mcs.duty_cycle;
421    readBuffer->tdp_stats.writeAc.access = readBuffer->l_ip.num_wr_ports *
422        mcs.duty_cycle;
423    readBuffer->rtp_stats.reset();
424    readBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize *
425        BITS_PER_BYTE / mcp.dataBusWidth;
426    readBuffer->rtp_stats.writeAc.access = mcs.reads * mcp.llcBlockSize *
427        BITS_PER_BYTE / mcp.dataBusWidth;
428
429    // Write Buffer
430    //Support key words first operation
431    data = (int)ceil(mcp.dataBusWidth / BITS_PER_BYTE);
432
433    interface_ip.cache_sz = data * mcp.IO_buffer_size_per_channel;
434    interface_ip.line_sz = data;
435    interface_ip.assoc = mcp.write_buffer_assoc;
436    interface_ip.nbanks = mcp.write_buffer_nbanks;
437    interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
438    interface_ip.specific_tag = mcp.write_buffer_tag_width > 0;
439    interface_ip.tag_w = mcp.write_buffer_tag_width;
440    interface_ip.access_mode = Normal;
441    interface_ip.obj_func_dyn_energy = 0;
442    interface_ip.obj_func_dyn_power = 0;
443    interface_ip.obj_func_leak_power = 0;
444    interface_ip.obj_func_cycle_t = 1;
445    interface_ip.num_rw_ports = 0;
446    interface_ip.num_rd_ports = mcp.num_channels;
447    interface_ip.num_wr_ports = interface_ip.num_rd_ports;
448    interface_ip.num_se_rd_ports = 0;
449    interface_ip.num_search_ports = 0;
450    interface_ip.is_cache = false;
451    interface_ip.pure_cam = false;
452    interface_ip.pure_ram = true;
453    interface_ip.throughput = 1.0 / mcp.clockRate;
454    interface_ip.latency = 1.0 / mcp.clockRate;
455    writeBuffer = new CacheArray(xml_data, &interface_ip, "Write Buffer",
456                                 Uncore_device, mcp.clockRate);
457    children.push_back(writeBuffer);
458
459    writeBuffer->tdp_stats.reset();
460    writeBuffer->tdp_stats.readAc.access = writeBuffer->l_ip.num_rd_ports *
461        mcs.duty_cycle;
462    writeBuffer->tdp_stats.writeAc.access = writeBuffer->l_ip.num_wr_ports *
463        mcs.duty_cycle;
464    writeBuffer->rtp_stats.reset();
465    writeBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize *
466        BITS_PER_BYTE / mcp.dataBusWidth;
467    writeBuffer->rtp_stats.writeAc.access = mcs.writes * mcp.llcBlockSize *
468        BITS_PER_BYTE / mcp.dataBusWidth;
469
470    // TODO: Set up selection logic as a leaf node in tree
471    //selection and arbitration logic
472    MC_arb =
473        new selection_logic(xml_data, is_default,
474                            mcp.req_window_size_per_channel, 1, &interface_ip,
475                            "Arbitration Logic", (mcs.reads + mcs.writes),
476                            mcp.clockRate, Uncore_device);
477    // MC_arb is not included in the roll-up due to the uninitialized area
478    //children.push_back(MC_arb);
479}
480
481MemoryController::MemoryController(XMLNode* _xml_data,
482                                   InputParameter* interface_ip_)
483    : McPATComponent(_xml_data), interface_ip(*interface_ip_) {
484    name = "Memory Controller";
485    set_mc_param();
486    // TODO: Pass params and stats as pointers
487    children.push_back(new MCFrontEnd(xml_data, &interface_ip, mcp, mcs));
488    children.push_back(new MCBackend(xml_data, &interface_ip, mcp, mcs));
489
490    if (mcp.type==0 || (mcp.type == 1 && mcp.withPHY)) {
491        children.push_back(new MCPHY(xml_data, &interface_ip, mcp, mcs));
492    }
493}
494
495void MemoryController::initialize_params() {
496    memset(&mcp, 0, sizeof(MCParameters));
497}
498
499void MemoryController::set_mc_param() {
500    initialize_params();
501
502    int num_children = xml_data->nChildNode("param");
503    int tech_type;
504    int mat_type;
505    int i;
506    for (i = 0; i < num_children; i++) {
507        XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
508        XMLCSTR node_name = paramNode->getAttribute("name");
509        XMLCSTR value = paramNode->getAttribute("value");
510
511        if (!node_name)
512            warnMissingParamName(paramNode->getAttribute("id"));
513
514        ASSIGN_FP_IF("mc_clock", mcp.clockRate);
515        ASSIGN_INT_IF("tech_type", tech_type);
516        ASSIGN_ENUM_IF("mc_type", mcp.mc_type, MemoryCtrl_type);
517        ASSIGN_FP_IF("num_mcs", mcp.num_mcs);
518        ASSIGN_INT_IF("llc_line_length", mcp.llc_line_length);
519        ASSIGN_INT_IF("databus_width", mcp.databus_width);
520        ASSIGN_INT_IF("memory_channels_per_mc", mcp.num_channels);
521        ASSIGN_INT_IF("req_window_size_per_channel",
522                      mcp.req_window_size_per_channel);
523        ASSIGN_INT_IF("IO_buffer_size_per_channel",
524                      mcp.IO_buffer_size_per_channel);
525        ASSIGN_INT_IF("addressbus_width", mcp.addressbus_width);
526        ASSIGN_INT_IF("opcode_width", mcp.opcodeW);
527        ASSIGN_INT_IF("type", mcp.type);
528        ASSIGN_ENUM_IF("LVDS", mcp.LVDS, bool);
529        ASSIGN_ENUM_IF("withPHY", mcp.withPHY, bool);
530        ASSIGN_INT_IF("peak_transfer_rate", mcp.peak_transfer_rate);
531        ASSIGN_INT_IF("number_ranks", mcp.number_ranks);
532        ASSIGN_INT_IF("reorder_buffer_assoc", mcp.reorder_buffer_assoc);
533        ASSIGN_INT_IF("reorder_buffer_nbanks", mcp.reorder_buffer_nbanks);
534        ASSIGN_INT_IF("read_buffer_assoc", mcp.read_buffer_assoc);
535        ASSIGN_INT_IF("read_buffer_nbanks", mcp.read_buffer_nbanks);
536        ASSIGN_INT_IF("read_buffer_tag_width", mcp.read_buffer_tag_width);
537        ASSIGN_INT_IF("write_buffer_assoc", mcp.write_buffer_assoc);
538        ASSIGN_INT_IF("write_buffer_nbanks", mcp.write_buffer_nbanks);
539        ASSIGN_INT_IF("write_buffer_tag_width", mcp.write_buffer_tag_width);
540        ASSIGN_INT_IF("wire_mat_type", mat_type);
541        ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);
542
543        else {
544            warnUnrecognizedParam(node_name);
545        }
546    }
547
548    if (mcp.mc_type != MC) {
549        cout << "Unknown memory controller type: Only DRAM controller is "
550             << "supported for now" << endl;
551                exit(0);
552    }
553
554    // Change from MHz to Hz
555    mcp.clockRate *= 1e6;
556
557    interface_ip.data_arr_ram_cell_tech_type    = tech_type;
558    interface_ip.data_arr_peri_global_tech_type = tech_type;
559    interface_ip.tag_arr_ram_cell_tech_type     = tech_type;
560    interface_ip.tag_arr_peri_global_tech_type  = tech_type;
561    interface_ip.wire_is_mat_type = mat_type;
562    interface_ip.wire_os_mat_type = mat_type;
563
564    num_children = xml_data->nChildNode("stat");
565    for (i = 0; i < num_children; i++) {
566        XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
567        XMLCSTR node_name = statNode->getAttribute("name");
568        XMLCSTR value = statNode->getAttribute("value");
569
570        if (!node_name)
571            warnMissingStatName(statNode->getAttribute("id"));
572
573        ASSIGN_FP_IF("duty_cycle", mcs.duty_cycle);
574        ASSIGN_FP_IF("perc_load", mcs.perc_load);
575        ASSIGN_FP_IF("memory_reads", mcs.reads);
576        ASSIGN_INT_IF("memory_writes", mcs.writes);
577
578        else {
579            warnUnrecognizedStat(node_name);
580        }
581    }
582
583    // Add ECC overhead
584    mcp.llcBlockSize = int(ceil(mcp.llc_line_length / BITS_PER_BYTE)) +
585        mcp.llc_line_length;
586    mcp.dataBusWidth = int(ceil(mcp.databus_width / BITS_PER_BYTE)) +
587        mcp.databus_width;
588}
589
590MCFrontEnd ::~MCFrontEnd() {
591
592    if (MC_arb) {
593        delete MC_arb;
594        MC_arb = NULL;
595    }
596    if (frontendBuffer) {
597        delete frontendBuffer;
598        frontendBuffer = NULL;
599    }
600    if (readBuffer) {
601        delete readBuffer;
602        readBuffer = NULL;
603    }
604    if (writeBuffer) {
605        delete writeBuffer;
606        writeBuffer = NULL;
607    }
608}
609
610MemoryController::~MemoryController() {
611    // TODO: use default constructor to delete children
612}
613
614