110152Satgutier@umich.edu/*****************************************************************************
210152Satgutier@umich.edu *                                McPAT
310152Satgutier@umich.edu *                      SOFTWARE LICENSE AGREEMENT
410152Satgutier@umich.edu *            Copyright 2012 Hewlett-Packard Development Company, L.P.
510234Syasuko.eckert@amd.com *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
610152Satgutier@umich.edu *                          All Rights Reserved
710152Satgutier@umich.edu *
810152Satgutier@umich.edu * Redistribution and use in source and binary forms, with or without
910152Satgutier@umich.edu * modification, are permitted provided that the following conditions are
1010152Satgutier@umich.edu * met: redistributions of source code must retain the above copyright
1110152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer;
1210152Satgutier@umich.edu * redistributions in binary form must reproduce the above copyright
1310152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer in the
1410152Satgutier@umich.edu * documentation and/or other materials provided with the distribution;
1510152Satgutier@umich.edu * neither the name of the copyright holders nor the names of its
1610152Satgutier@umich.edu * contributors may be used to endorse or promote products derived from
1710152Satgutier@umich.edu * this software without specific prior written permission.
1810152Satgutier@umich.edu
1910152Satgutier@umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2010152Satgutier@umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2110152Satgutier@umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2210152Satgutier@umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2310152Satgutier@umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2410152Satgutier@umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2510152Satgutier@umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2610152Satgutier@umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2710152Satgutier@umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2810152Satgutier@umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2910234Syasuko.eckert@amd.com * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3010152Satgutier@umich.edu *
3110152Satgutier@umich.edu ***************************************************************************/
3210152Satgutier@umich.edu#include <algorithm>
3310152Satgutier@umich.edu#include <cassert>
3410152Satgutier@umich.edu#include <cmath>
3510152Satgutier@umich.edu#include <iostream>
3610152Satgutier@umich.edu#include <string>
3710152Satgutier@umich.edu
3810152Satgutier@umich.edu#include "basic_circuit.h"
3910234Syasuko.eckert@amd.com#include "common.h"
4010152Satgutier@umich.edu#include "const.h"
4110152Satgutier@umich.edu#include "io.h"
4210152Satgutier@umich.edu#include "iocontrollers.h"
4310152Satgutier@umich.edu#include "logic.h"
4410152Satgutier@umich.edu
4510152Satgutier@umich.edu/*
4610152Satgutier@umich.eduSUN Niagara 2 I/O power analysis:
4710152Satgutier@umich.edutotal signal bits: 711
4810152Satgutier@umich.eduTotal FBDIMM bits: (14+10)*2*8= 384
4910152Satgutier@umich.eduPCIe bits:         (8 + 8)*2 = 32
5010152Satgutier@umich.edu10Gb NIC:          (4*2+4*2)*2 = 32
5110152Satgutier@umich.eduDebug I/Os:        168
5210152Satgutier@umich.eduOther I/Os:        711- 32-32 - 384 - 168 = 95
5310152Satgutier@umich.edu
5410152Satgutier@umich.eduAccording to "Implementation of an 8-Core, 64-Thread, Power-Efficient SPARC Server on a Chip"
5510152Satgutier@umich.edu90% of I/Os are SerDers (the calucaltion is 384+64/(711-168)=83% about the same as the 90% reported in the paper)
5610152Satgutier@umich.edu--> around 80Pins are common I/Os.
5710152Satgutier@umich.eduCommon I/Os consumes 71mW/Gb/s according to Cadence ChipEstimate @65nm
5810152Satgutier@umich.eduNiagara 2 I/O clock is 1/4 of core clock. --> 87pin (<--((711-168)*17%)) * 71mW/Gb/s *0.25*1.4Ghz = 2.17W
5910152Satgutier@umich.edu
6010152Satgutier@umich.eduTotal dynamic power of FBDIMM, NIC, PCIe = 84*0.132 + 84*0.049*0.132 = 11.14 - 2.17 = 8.98
6110152Satgutier@umich.eduFurther, if assuming I/O logic power is about 50% of I/Os then Total energy of FBDIMM, NIC, PCIe = 11.14 - 2.17*1.5 = 7.89
6210152Satgutier@umich.edu */
6310152Satgutier@umich.edu
6410152Satgutier@umich.edu/*
6510152Satgutier@umich.edu * A bug in Cadence ChipEstimator: After update the clock rate in the clock tab, a user
6610152Satgutier@umich.edu * need to re-select the IP clock (the same clk) and then click Estimate. if not reselect
6710152Satgutier@umich.edu * the new clock rate may not be propogate into the IPs.
6810152Satgutier@umich.edu *
6910152Satgutier@umich.edu */
7010152Satgutier@umich.edu
7110234Syasuko.eckert@amd.comNIUController::NIUController(XMLNode* _xml_data,InputParameter* interface_ip_)
7210234Syasuko.eckert@amd.com    : McPATComponent(_xml_data, interface_ip_) {
7310234Syasuko.eckert@amd.com    name = "NIU";
7410234Syasuko.eckert@amd.com    set_niu_param();
7510234Syasuko.eckert@amd.com}
7610152Satgutier@umich.edu
7710234Syasuko.eckert@amd.comvoid NIUController::computeArea() {
7810234Syasuko.eckert@amd.com    double mac_area;
7910234Syasuko.eckert@amd.com    double frontend_area;
8010234Syasuko.eckert@amd.com    double SerDer_area;
8110152Satgutier@umich.edu
8210234Syasuko.eckert@amd.com    if (niup.type == 0) { //high performance NIU
8310234Syasuko.eckert@amd.com        //Area estimation based on average of die photo from Niagara 2 and
8410234Syasuko.eckert@amd.com        //Cadence ChipEstimate using 65nm.
8510234Syasuko.eckert@amd.com        mac_area = (1.53 + 0.3) / 2 * (interface_ip.F_sz_um / 0.065) *
8610234Syasuko.eckert@amd.com            (interface_ip.F_sz_um / 0.065);
8710234Syasuko.eckert@amd.com        //Area estimation based on average of die photo from Niagara 2, ISSCC
8810234Syasuko.eckert@amd.com        //"An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS"
8910234Syasuko.eckert@amd.com        //and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface
9010234Syasuko.eckert@amd.com        //With Robust VCO Tuning Technique" Frontend is PCS
9110234Syasuko.eckert@amd.com        frontend_area = (9.8 + (6 + 18) * 65 / 130 * 65 / 130) / 3 *
9210234Syasuko.eckert@amd.com            (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065);
9310234Syasuko.eckert@amd.com        //Area estimation based on average of die photo from Niagara 2 and
9410234Syasuko.eckert@amd.com        //Cadence ChipEstimate hard IP @65nm.
9510234Syasuko.eckert@amd.com        //SerDer is very hard to scale
9610234Syasuko.eckert@amd.com        SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um /
9710234Syasuko.eckert@amd.com                                       0.065);//* (interface_ip.F_sz_um/0.065);
9810234Syasuko.eckert@amd.com    } else {
9910234Syasuko.eckert@amd.com        //Low power implementations are mostly from Cadence ChipEstimator;
10010234Syasuko.eckert@amd.com        //Ignore the multiple IP effect
10110234Syasuko.eckert@amd.com        // ---When there are multiple IP (same kind or not) selected, Cadence
10210234Syasuko.eckert@amd.com        //ChipEstimator results are not a simple summation of all IPs.
10310234Syasuko.eckert@amd.com        //Ignore this effect
10410234Syasuko.eckert@amd.com        mac_area = 0.24 * (interface_ip.F_sz_um / 0.065) *
10510234Syasuko.eckert@amd.com            (interface_ip.F_sz_um / 0.065);
10610234Syasuko.eckert@amd.com        frontend_area = 0.1 * (interface_ip.F_sz_um / 0.065) *
10710234Syasuko.eckert@amd.com            (interface_ip.F_sz_um / 0.065);//Frontend is the PCS layer
10810234Syasuko.eckert@amd.com        SerDer_area = 0.35 * (interface_ip.F_sz_um / 0.065) *
10910234Syasuko.eckert@amd.com            (interface_ip.F_sz_um/0.065);
11010234Syasuko.eckert@amd.com        //Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet
11110234Syasuko.eckert@amd.com        //Transceiver and XAUI Interface With Robust VCO Tuning Technique"
11210234Syasuko.eckert@amd.com        //and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can
11310234Syasuko.eckert@amd.com        //scale perfectly with the technology
11410234Syasuko.eckert@amd.com    }
11510152Satgutier@umich.edu
11610234Syasuko.eckert@amd.com    //total area
11710234Syasuko.eckert@amd.com    output_data.area = (mac_area + frontend_area + SerDer_area) * 1e6;
11810152Satgutier@umich.edu }
11910152Satgutier@umich.edu
12010234Syasuko.eckert@amd.comvoid NIUController::computeEnergy() {
12110234Syasuko.eckert@amd.com    double mac_dyn;
12210234Syasuko.eckert@amd.com    double frontend_dyn;
12310234Syasuko.eckert@amd.com    double SerDer_dyn;
12410234Syasuko.eckert@amd.com    double frontend_gates;
12510234Syasuko.eckert@amd.com    double mac_gates;
12610234Syasuko.eckert@amd.com    double SerDer_gates;
12710234Syasuko.eckert@amd.com    double NMOS_sizing;
12810234Syasuko.eckert@amd.com    double PMOS_sizing;
12910234Syasuko.eckert@amd.com    double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
13010152Satgutier@umich.edu
13110234Syasuko.eckert@amd.com    if (niup.type == 0) { //high performance NIU
13210234Syasuko.eckert@amd.com        //Power
13310234Syasuko.eckert@amd.com        //Cadence ChipEstimate using 65nm (mac, front_end are all energy.
13410234Syasuko.eckert@amd.com        //E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
13510234Syasuko.eckert@amd.com        //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
13610234Syasuko.eckert@amd.com        mac_dyn = 2.19e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd /
13710234Syasuko.eckert@amd.com            1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate;
13810234Syasuko.eckert@amd.com        //Cadence ChipEstimate using 65nm soft IP;
13910234Syasuko.eckert@amd.com        frontend_dyn = 0.27e-9 * g_tp.peri_global.Vdd / 1.1 *
14010234Syasuko.eckert@amd.com            g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
14110234Syasuko.eckert@amd.com        //according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006
14210234Syasuko.eckert@amd.com        //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
14310234Syasuko.eckert@amd.com        SerDer_dyn = 0.01 * 10 * sqrt(interface_ip.F_sz_um / 0.09) *
14410234Syasuko.eckert@amd.com            g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
14510152Satgutier@umich.edu
14610234Syasuko.eckert@amd.com        //Cadence ChipEstimate using 65nm
14710234Syasuko.eckert@amd.com        mac_gates = 111700;
14810234Syasuko.eckert@amd.com        frontend_gates = 320000;
14910234Syasuko.eckert@amd.com        SerDer_gates = 200000;
15010234Syasuko.eckert@amd.com        NMOS_sizing = 5 * g_tp.min_w_nmos_;
15110234Syasuko.eckert@amd.com        PMOS_sizing	= 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
15210234Syasuko.eckert@amd.com    } else {
15310234Syasuko.eckert@amd.com        //Power
15410234Syasuko.eckert@amd.com        //Cadence ChipEstimate using 65nm (mac, front_end are all energy.
15510234Syasuko.eckert@amd.com        ///E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
15610234Syasuko.eckert@amd.com        //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
15710234Syasuko.eckert@amd.com        mac_dyn = 1.257e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd
15810234Syasuko.eckert@amd.com            / 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate;
15910234Syasuko.eckert@amd.com        //Cadence ChipEstimate using 65nm soft IP;
16010234Syasuko.eckert@amd.com        frontend_dyn = 0.6e-9 * g_tp.peri_global.Vdd / 1.1 *
16110234Syasuko.eckert@amd.com            g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
16210234Syasuko.eckert@amd.com        //SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm
16310234Syasuko.eckert@amd.com        SerDer_dyn = 0.0216 * 10 * (interface_ip.F_sz_um / 0.13) *
16410234Syasuko.eckert@amd.com            g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
16510152Satgutier@umich.edu
16610234Syasuko.eckert@amd.com        mac_gates = 111700;
16710234Syasuko.eckert@amd.com        frontend_gates = 52000;
16810234Syasuko.eckert@amd.com        SerDer_gates = 199260;
16910234Syasuko.eckert@amd.com        NMOS_sizing = g_tp.min_w_nmos_;
17010234Syasuko.eckert@amd.com        PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
17110152Satgutier@umich.edu    }
17210234Syasuko.eckert@amd.com
17310234Syasuko.eckert@amd.com    //covert to energy per clock cycle of whole NIU
17410234Syasuko.eckert@amd.com    SerDer_dyn /= niup.clockRate;
17510234Syasuko.eckert@amd.com
17610234Syasuko.eckert@amd.com    power.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn;
17710234Syasuko.eckert@amd.com    power.readOp.leakage = (mac_gates + frontend_gates + frontend_gates) *
17810234Syasuko.eckert@amd.com        cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
17910234Syasuko.eckert@amd.com        g_tp.peri_global.Vdd;//unit W
18010234Syasuko.eckert@amd.com    double long_channel_device_reduction =
18110234Syasuko.eckert@amd.com        longer_channel_device_reduction(Uncore_device);
18210234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage =
18310234Syasuko.eckert@amd.com        power.readOp.leakage * long_channel_device_reduction;
18410234Syasuko.eckert@amd.com    power.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates) *
18510234Syasuko.eckert@amd.com        cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
18610234Syasuko.eckert@amd.com        g_tp.peri_global.Vdd;//unit W
18710234Syasuko.eckert@amd.com
18810234Syasuko.eckert@amd.com    // Output power
18910234Syasuko.eckert@amd.com    output_data.subthreshold_leakage_power =
19010234Syasuko.eckert@amd.com        longer_channel_device ? power.readOp.longer_channel_leakage :
19110234Syasuko.eckert@amd.com        power.readOp.leakage;
19210234Syasuko.eckert@amd.com    output_data.gate_leakage_power = power.readOp.gate_leakage;
19310234Syasuko.eckert@amd.com    output_data.peak_dynamic_power = power.readOp.dynamic * nius.duty_cycle;
19410234Syasuko.eckert@amd.com    output_data.runtime_dynamic_energy = power.readOp.dynamic * nius.perc_load;
19510234Syasuko.eckert@amd.com}
19610234Syasuko.eckert@amd.com
19710234Syasuko.eckert@amd.comvoid NIUController::set_niu_param() {
19810234Syasuko.eckert@amd.com    int num_children = xml_data->nChildNode("param");
19910234Syasuko.eckert@amd.com    int i;
20010234Syasuko.eckert@amd.com    for (i = 0; i < num_children; i++) {
20110234Syasuko.eckert@amd.com        XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
20210234Syasuko.eckert@amd.com        XMLCSTR node_name = paramNode->getAttribute("name");
20310234Syasuko.eckert@amd.com        XMLCSTR value = paramNode->getAttribute("value");
20410234Syasuko.eckert@amd.com
20510234Syasuko.eckert@amd.com        if (!node_name)
20610234Syasuko.eckert@amd.com            warnMissingParamName(paramNode->getAttribute("id"));
20710234Syasuko.eckert@amd.com
20810234Syasuko.eckert@amd.com        ASSIGN_FP_IF("niu_clockRate", niup.clockRate);
20910234Syasuko.eckert@amd.com        ASSIGN_INT_IF("num_units", niup.num_units);
21010234Syasuko.eckert@amd.com        ASSIGN_INT_IF("type", niup.type);
21110234Syasuko.eckert@amd.com
21210234Syasuko.eckert@amd.com        else {
21310234Syasuko.eckert@amd.com            warnUnrecognizedParam(node_name);
21410234Syasuko.eckert@amd.com        }
21510234Syasuko.eckert@amd.com    }
21610234Syasuko.eckert@amd.com
21710234Syasuko.eckert@amd.com    // Change from MHz to Hz
21810234Syasuko.eckert@amd.com    niup.clockRate *= 1e6;
21910234Syasuko.eckert@amd.com
22010234Syasuko.eckert@amd.com    num_children = xml_data->nChildNode("stat");
22110234Syasuko.eckert@amd.com    for (i = 0; i < num_children; i++) {
22210234Syasuko.eckert@amd.com        XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
22310234Syasuko.eckert@amd.com        XMLCSTR node_name = statNode->getAttribute("name");
22410234Syasuko.eckert@amd.com        XMLCSTR value = statNode->getAttribute("value");
22510234Syasuko.eckert@amd.com
22610234Syasuko.eckert@amd.com        if (!node_name)
22710234Syasuko.eckert@amd.com            warnMissingStatName(statNode->getAttribute("id"));
22810234Syasuko.eckert@amd.com
22910234Syasuko.eckert@amd.com        ASSIGN_FP_IF("duty_cycle", nius.duty_cycle);
23010234Syasuko.eckert@amd.com        ASSIGN_FP_IF("perc_load", nius.perc_load);
23110234Syasuko.eckert@amd.com
23210234Syasuko.eckert@amd.com        else {
23310234Syasuko.eckert@amd.com            warnUnrecognizedStat(node_name);
23410234Syasuko.eckert@amd.com        }
23510152Satgutier@umich.edu    }
23610152Satgutier@umich.edu}
23710152Satgutier@umich.edu
23810234Syasuko.eckert@amd.comPCIeController::PCIeController(XMLNode* _xml_data,
23910234Syasuko.eckert@amd.com                               InputParameter* interface_ip_)
24010234Syasuko.eckert@amd.com    : McPATComponent(_xml_data, interface_ip_) {
24110234Syasuko.eckert@amd.com    name = "PCIe";
24210234Syasuko.eckert@amd.com    set_pcie_param();
24310152Satgutier@umich.edu}
24410152Satgutier@umich.edu
24510234Syasuko.eckert@amd.comvoid PCIeController::computeArea() {
24610234Syasuko.eckert@amd.com    double ctrl_area;
24710234Syasuko.eckert@amd.com    double SerDer_area;
24810234Syasuko.eckert@amd.com
24910234Syasuko.eckert@amd.com    /* Assuming PCIe is bit-slice based architecture
25010234Syasuko.eckert@amd.com     * This is the reason for /8 in both area and power calculation
25110234Syasuko.eckert@amd.com     * to get per lane numbers
25210234Syasuko.eckert@amd.com     */
25310234Syasuko.eckert@amd.com
25410234Syasuko.eckert@amd.com    if (pciep.type == 0) { //high performance PCIe
25510234Syasuko.eckert@amd.com        //Area estimation based on average of die photo from Niagara 2 and
25610234Syasuko.eckert@amd.com        //Cadence ChipEstimate @ 65nm.
25710234Syasuko.eckert@amd.com        ctrl_area = (5.2 + 0.5) / 2 * (interface_ip.F_sz_um / 0.065) *
25810234Syasuko.eckert@amd.com            (interface_ip.F_sz_um / 0.065);
25910234Syasuko.eckert@amd.com        //Area estimation based on average of die photo from Niagara 2 and
26010234Syasuko.eckert@amd.com        //Cadence ChipEstimate hard IP @65nm.
26110234Syasuko.eckert@amd.com        //SerDer is very hard to scale
26210234Syasuko.eckert@amd.com        SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um /
26310234Syasuko.eckert@amd.com                                       0.065);//* (interface_ip.F_sz_um/0.065);
26410234Syasuko.eckert@amd.com    } else {
26510234Syasuko.eckert@amd.com        ctrl_area = 0.412 * (interface_ip.F_sz_um / 0.065) *
26610234Syasuko.eckert@amd.com            (interface_ip.F_sz_um / 0.065);
26710234Syasuko.eckert@amd.com        //Area estimation based on average of die photo from Niagara 2, and
26810234Syasuko.eckert@amd.com        //Cadence ChipEstimate @ 65nm.
26910234Syasuko.eckert@amd.com        SerDer_area = 0.36 * (interface_ip.F_sz_um / 0.065) *
27010234Syasuko.eckert@amd.com            (interface_ip.F_sz_um / 0.065);
27110234Syasuko.eckert@amd.com    }
27210234Syasuko.eckert@amd.com
27310234Syasuko.eckert@amd.com    // Total area
27410234Syasuko.eckert@amd.com    output_data.area = ((ctrl_area + (pciep.withPHY ? SerDer_area : 0)) / 8 *
27510234Syasuko.eckert@amd.com                        pciep.num_channels) * 1e6;
27610152Satgutier@umich.edu}
27710152Satgutier@umich.edu
27810234Syasuko.eckert@amd.comvoid PCIeController::computeEnergy() {
27910234Syasuko.eckert@amd.com    double ctrl_dyn;
28010234Syasuko.eckert@amd.com    double SerDer_dyn;
28110234Syasuko.eckert@amd.com    double ctrl_gates;
28210234Syasuko.eckert@amd.com    double SerDer_gates = 0;
28310234Syasuko.eckert@amd.com    double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
28410234Syasuko.eckert@amd.com    double NMOS_sizing;
28510234Syasuko.eckert@amd.com    double PMOS_sizing;
28610152Satgutier@umich.edu
28710234Syasuko.eckert@amd.com    /* Assuming PCIe is bit-slice based architecture
28810234Syasuko.eckert@amd.com     * This is the reason for /8 in both area and power calculation
28910234Syasuko.eckert@amd.com     * to get per lane numbers
29010234Syasuko.eckert@amd.com     */
29110152Satgutier@umich.edu
29210234Syasuko.eckert@amd.com    if (pciep.type == 0) { //high performance PCIe
29310234Syasuko.eckert@amd.com        //Power
29410234Syasuko.eckert@amd.com        //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
29510234Syasuko.eckert@amd.com        ctrl_dyn = 3.75e-9 / 8 * g_tp.peri_global.Vdd / 1.1 *
29610234Syasuko.eckert@amd.com            g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
29710234Syasuko.eckert@amd.com        //	  //Cadence ChipEstimate using 65nm soft IP;
29810234Syasuko.eckert@amd.com        //	  frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
29910234Syasuko.eckert@amd.com        //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
30010234Syasuko.eckert@amd.com        //PCIe 2.0 max per lane speed is 4Gb/s
30110234Syasuko.eckert@amd.com        SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um /0.09) *
30210234Syasuko.eckert@amd.com            g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2;
30310152Satgutier@umich.edu
30410234Syasuko.eckert@amd.com        //Cadence ChipEstimate using 65nm
30510234Syasuko.eckert@amd.com        ctrl_gates = 900000 / 8 * pciep.num_channels;
30610234Syasuko.eckert@amd.com        //	  frontend_gates   = 120000/8;
30710234Syasuko.eckert@amd.com        //	  SerDer_gates     = 200000/8;
30810234Syasuko.eckert@amd.com        NMOS_sizing = 5 * g_tp.min_w_nmos_;
30910234Syasuko.eckert@amd.com        PMOS_sizing	= 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
31010234Syasuko.eckert@amd.com    } else {
31110234Syasuko.eckert@amd.com        //Power
31210234Syasuko.eckert@amd.com        //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
31310234Syasuko.eckert@amd.com        ctrl_dyn = 2.21e-9 / 8 * g_tp.peri_global.Vdd / 1.1 *
31410234Syasuko.eckert@amd.com            g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
31510234Syasuko.eckert@amd.com        //	  //Cadence ChipEstimate using 65nm soft IP;
31610234Syasuko.eckert@amd.com        //	  frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
31710234Syasuko.eckert@amd.com        //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
31810234Syasuko.eckert@amd.com        //PCIe 2.0 max per lane speed is 4Gb/s
31910234Syasuko.eckert@amd.com        SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um / 0.09) *
32010234Syasuko.eckert@amd.com            g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2;
32110152Satgutier@umich.edu
32210234Syasuko.eckert@amd.com        //Cadence ChipEstimate using 65nm
32310234Syasuko.eckert@amd.com        ctrl_gates = 200000 / 8 * pciep.num_channels;
32410234Syasuko.eckert@amd.com        //	  frontend_gates   = 120000/8;
32510234Syasuko.eckert@amd.com        SerDer_gates = 200000 / 8 * pciep.num_channels;
32610234Syasuko.eckert@amd.com        NMOS_sizing = g_tp.min_w_nmos_;
32710234Syasuko.eckert@amd.com        PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
32810152Satgutier@umich.edu
32910152Satgutier@umich.edu    }
33010234Syasuko.eckert@amd.com
33110234Syasuko.eckert@amd.com    //covert to energy per clock cycle
33210234Syasuko.eckert@amd.com    SerDer_dyn /= pciep.clockRate;
33310234Syasuko.eckert@amd.com
33410234Syasuko.eckert@amd.com    power.readOp.dynamic = (ctrl_dyn + (pciep.withPHY ? SerDer_dyn : 0)) *
33510234Syasuko.eckert@amd.com        pciep.num_channels;
33610234Syasuko.eckert@amd.com    power.readOp.leakage = (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) *
33710234Syasuko.eckert@amd.com        cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
33810234Syasuko.eckert@amd.com        g_tp.peri_global.Vdd;//unit W
33910234Syasuko.eckert@amd.com    double long_channel_device_reduction =
34010234Syasuko.eckert@amd.com        longer_channel_device_reduction(Uncore_device);
34110234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage =
34210234Syasuko.eckert@amd.com        power.readOp.leakage * long_channel_device_reduction;
34310234Syasuko.eckert@amd.com    power.readOp.gate_leakage = (ctrl_gates +
34410234Syasuko.eckert@amd.com                                 (pciep.withPHY ? SerDer_gates : 0)) *
34510234Syasuko.eckert@amd.com        cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
34610234Syasuko.eckert@amd.com        g_tp.peri_global.Vdd;//unit W
34710234Syasuko.eckert@amd.com
34810234Syasuko.eckert@amd.com    // Output power
34910234Syasuko.eckert@amd.com    output_data.subthreshold_leakage_power =
35010234Syasuko.eckert@amd.com        longer_channel_device ? power.readOp.longer_channel_leakage :
35110234Syasuko.eckert@amd.com        power.readOp.leakage;
35210234Syasuko.eckert@amd.com    output_data.gate_leakage_power = power.readOp.gate_leakage;
35310234Syasuko.eckert@amd.com    output_data.peak_dynamic_power = power.readOp.dynamic * pcies.duty_cycle;
35410234Syasuko.eckert@amd.com    output_data.runtime_dynamic_energy =
35510234Syasuko.eckert@amd.com        power.readOp.dynamic * pcies.perc_load;
35610234Syasuko.eckert@amd.com}
35710234Syasuko.eckert@amd.com
35810234Syasuko.eckert@amd.comvoid PCIeController::set_pcie_param() {
35910234Syasuko.eckert@amd.com    int num_children = xml_data->nChildNode("param");
36010234Syasuko.eckert@amd.com    int i;
36110234Syasuko.eckert@amd.com    for (i = 0; i < num_children; i++) {
36210234Syasuko.eckert@amd.com        XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
36310234Syasuko.eckert@amd.com        XMLCSTR node_name = paramNode->getAttribute("name");
36410234Syasuko.eckert@amd.com        XMLCSTR value = paramNode->getAttribute("value");
36510234Syasuko.eckert@amd.com
36610234Syasuko.eckert@amd.com        if (!node_name)
36710234Syasuko.eckert@amd.com            warnMissingParamName(paramNode->getAttribute("id"));
36810234Syasuko.eckert@amd.com
36910234Syasuko.eckert@amd.com        ASSIGN_FP_IF("pcie_clockRate", pciep.clockRate);
37010234Syasuko.eckert@amd.com        ASSIGN_INT_IF("num_units", pciep.num_units);
37110234Syasuko.eckert@amd.com        ASSIGN_INT_IF("num_channels", pciep.num_channels);
37210234Syasuko.eckert@amd.com        ASSIGN_INT_IF("type", pciep.type);
37310234Syasuko.eckert@amd.com        ASSIGN_ENUM_IF("withPHY", pciep.withPHY, bool);
37410234Syasuko.eckert@amd.com
37510234Syasuko.eckert@amd.com        else {
37610234Syasuko.eckert@amd.com            warnUnrecognizedParam(node_name);
37710234Syasuko.eckert@amd.com        }
37810234Syasuko.eckert@amd.com    }
37910234Syasuko.eckert@amd.com
38010234Syasuko.eckert@amd.com    // Change from MHz to Hz
38110234Syasuko.eckert@amd.com    pciep.clockRate *= 1e6;
38210234Syasuko.eckert@amd.com
38310234Syasuko.eckert@amd.com    num_children = xml_data->nChildNode("stat");
38410234Syasuko.eckert@amd.com    for (i = 0; i < num_children; i++) {
38510234Syasuko.eckert@amd.com        XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
38610234Syasuko.eckert@amd.com        XMLCSTR node_name = statNode->getAttribute("name");
38710234Syasuko.eckert@amd.com        XMLCSTR value = statNode->getAttribute("value");
38810234Syasuko.eckert@amd.com
38910234Syasuko.eckert@amd.com        if (!node_name)
39010234Syasuko.eckert@amd.com            warnMissingStatName(statNode->getAttribute("id"));
39110234Syasuko.eckert@amd.com
39210234Syasuko.eckert@amd.com        ASSIGN_FP_IF("duty_cycle", pcies.duty_cycle);
39310234Syasuko.eckert@amd.com        ASSIGN_FP_IF("perc_load", pcies.perc_load);
39410234Syasuko.eckert@amd.com
39510234Syasuko.eckert@amd.com        else {
39610234Syasuko.eckert@amd.com            warnUnrecognizedStat(node_name);
39710234Syasuko.eckert@amd.com        }
39810152Satgutier@umich.edu    }
39910152Satgutier@umich.edu}
40010152Satgutier@umich.edu
40110234Syasuko.eckert@amd.comFlashController::FlashController(XMLNode* _xml_data,
40210234Syasuko.eckert@amd.com                                 InputParameter* interface_ip_)
40310234Syasuko.eckert@amd.com    : McPATComponent(_xml_data, interface_ip_) {
40410234Syasuko.eckert@amd.com    name = "Flash Controller";
40510234Syasuko.eckert@amd.com    set_fc_param();
40610152Satgutier@umich.edu}
40710152Satgutier@umich.edu
40810234Syasuko.eckert@amd.comvoid FlashController::computeArea() {
40910234Syasuko.eckert@amd.com    double ctrl_area;
41010234Syasuko.eckert@amd.com    double SerDer_area;
41110152Satgutier@umich.edu
41210234Syasuko.eckert@amd.com    /* Assuming Flash is bit-slice based architecture
41310234Syasuko.eckert@amd.com     * This is the reason for /8 in both area and power calculation
41410234Syasuko.eckert@amd.com     * to get per lane numbers
41510234Syasuko.eckert@amd.com     */
41610234Syasuko.eckert@amd.com
41710234Syasuko.eckert@amd.com    if (fcp.type == 0) { //high performance flash controller
41810234Syasuko.eckert@amd.com        cout << "Current McPAT does not support high performance flash "
41910234Syasuko.eckert@amd.com             << "controller since even low power designs are enough for "
42010234Syasuko.eckert@amd.com             << "maintain throughput" <<endl;
42110234Syasuko.eckert@amd.com        exit(0);
42210234Syasuko.eckert@amd.com    } else {
42310234Syasuko.eckert@amd.com        ctrl_area = 0.243 * (interface_ip.F_sz_um / 0.065) *
42410234Syasuko.eckert@amd.com            (interface_ip.F_sz_um / 0.065);
42510234Syasuko.eckert@amd.com        //Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL
42610234Syasuko.eckert@amd.com        //from CAST
42710234Syasuko.eckert@amd.com        SerDer_area = 0.36 / 8 * (interface_ip.F_sz_um / 0.065) *
42810234Syasuko.eckert@amd.com            (interface_ip.F_sz_um / 0.065);
42910234Syasuko.eckert@amd.com    }
43010234Syasuko.eckert@amd.com
43110234Syasuko.eckert@amd.com    double number_channel = 1 + (fcp.num_channels - 1) * 0.2;
43210234Syasuko.eckert@amd.com    output_data.area = (ctrl_area + (fcp.withPHY ? SerDer_area : 0)) *
43310234Syasuko.eckert@amd.com        1e6 * number_channel;
43410152Satgutier@umich.edu}
43510152Satgutier@umich.edu
43610234Syasuko.eckert@amd.comvoid FlashController::computeEnergy() {
43710234Syasuko.eckert@amd.com    double ctrl_dyn;
43810234Syasuko.eckert@amd.com    double SerDer_dyn;
43910234Syasuko.eckert@amd.com    double ctrl_gates;
44010234Syasuko.eckert@amd.com    double SerDer_gates;
44110234Syasuko.eckert@amd.com    double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
44210234Syasuko.eckert@amd.com    double NMOS_sizing;
44310234Syasuko.eckert@amd.com    double PMOS_sizing;
44410152Satgutier@umich.edu
44510234Syasuko.eckert@amd.com    /* Assuming Flash is bit-slice based architecture
44610234Syasuko.eckert@amd.com     * This is the reason for /8 in both area and power calculation
44710234Syasuko.eckert@amd.com     * to get per lane numbers
44810234Syasuko.eckert@amd.com     */
44910152Satgutier@umich.edu
45010234Syasuko.eckert@amd.com    if (fcp.type == 0) { //high performance flash controller
45110234Syasuko.eckert@amd.com        cout << "Current McPAT does not support high performance flash "
45210234Syasuko.eckert@amd.com             << "controller since even low power designs are enough for "
45310234Syasuko.eckert@amd.com             << "maintain throughput" <<endl;
45410234Syasuko.eckert@amd.com        exit(0);
45510234Syasuko.eckert@amd.com        NMOS_sizing = 5 * g_tp.min_w_nmos_;
45610234Syasuko.eckert@amd.com        PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
45710234Syasuko.eckert@amd.com    } else {
45810234Syasuko.eckert@amd.com        //based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it
45910234Syasuko.eckert@amd.com        //support 8x lanes with each lane speed up to 250MB/s (PCIe1.1x).
46010234Syasuko.eckert@amd.com        //This is already saturate the 200MB/s of the flash controller core
46110234Syasuko.eckert@amd.com        //above.
46210234Syasuko.eckert@amd.com        ctrl_gates = 129267;
46310234Syasuko.eckert@amd.com        SerDer_gates = 200000 / 8;
46410234Syasuko.eckert@amd.com        NMOS_sizing = g_tp.min_w_nmos_;
46510234Syasuko.eckert@amd.com        PMOS_sizing	= g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
46610152Satgutier@umich.edu
46710234Syasuko.eckert@amd.com        //Power
46810234Syasuko.eckert@amd.com        //Cadence ChipEstimate using 65nm the controller 125mW for every
46910234Syasuko.eckert@amd.com        //200MB/s This is power not energy!
47010234Syasuko.eckert@amd.com        ctrl_dyn = 0.125 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd /
47110234Syasuko.eckert@amd.com            1.1 * (interface_ip.F_sz_nm / 65.0);
47210234Syasuko.eckert@amd.com        //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
47310234Syasuko.eckert@amd.com        SerDer_dyn = 0.01 * 1.6 * (interface_ip.F_sz_um / 0.09) *
47410234Syasuko.eckert@amd.com            g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
47510234Syasuko.eckert@amd.com        //max  Per controller speed is 1.6Gb/s (200MB/s)
47610234Syasuko.eckert@amd.com    }
47710152Satgutier@umich.edu
47810234Syasuko.eckert@amd.com    double number_channel = 1 + (fcp.num_channels - 1) * 0.2;
47910234Syasuko.eckert@amd.com    power.readOp.dynamic = (ctrl_dyn + (fcp.withPHY ? SerDer_dyn : 0)) *
48010234Syasuko.eckert@amd.com        number_channel;
48110234Syasuko.eckert@amd.com    power.readOp.leakage = ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) *
48210234Syasuko.eckert@amd.com                            number_channel) *
48310234Syasuko.eckert@amd.com        cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
48410234Syasuko.eckert@amd.com        g_tp.peri_global.Vdd;//unit W
48510234Syasuko.eckert@amd.com    double long_channel_device_reduction =
48610234Syasuko.eckert@amd.com        longer_channel_device_reduction(Uncore_device);
48710234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage =
48810234Syasuko.eckert@amd.com        power.readOp.leakage * long_channel_device_reduction;
48910234Syasuko.eckert@amd.com    power.readOp.gate_leakage =
49010234Syasuko.eckert@amd.com        ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) * number_channel) *
49110234Syasuko.eckert@amd.com        cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
49210234Syasuko.eckert@amd.com        g_tp.peri_global.Vdd;//unit W
49310152Satgutier@umich.edu
49410234Syasuko.eckert@amd.com    // Output power
49510234Syasuko.eckert@amd.com    output_data.subthreshold_leakage_power =
49610234Syasuko.eckert@amd.com        longer_channel_device ? power.readOp.longer_channel_leakage :
49710234Syasuko.eckert@amd.com        power.readOp.leakage;
49810234Syasuko.eckert@amd.com    output_data.gate_leakage_power = power.readOp.gate_leakage;
49910234Syasuko.eckert@amd.com    output_data.peak_dynamic_power = power.readOp.dynamic * fcs.duty_cycle;
50010234Syasuko.eckert@amd.com    output_data.runtime_dynamic_energy = power.readOp.dynamic * fcs.perc_load;
50110152Satgutier@umich.edu}
50210152Satgutier@umich.edu
50310152Satgutier@umich.eduvoid FlashController::set_fc_param()
50410152Satgutier@umich.edu{
50510234Syasuko.eckert@amd.com    int num_children = xml_data->nChildNode("param");
50610234Syasuko.eckert@amd.com    int i;
50710234Syasuko.eckert@amd.com    for (i = 0; i < num_children; i++) {
50810234Syasuko.eckert@amd.com        XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
50910234Syasuko.eckert@amd.com        XMLCSTR node_name = paramNode->getAttribute("name");
51010234Syasuko.eckert@amd.com        XMLCSTR value = paramNode->getAttribute("value");
51110152Satgutier@umich.edu
51210234Syasuko.eckert@amd.com        if (!node_name)
51310234Syasuko.eckert@amd.com            warnMissingParamName(paramNode->getAttribute("id"));
51410234Syasuko.eckert@amd.com
51510234Syasuko.eckert@amd.com        ASSIGN_INT_IF("num_channels", fcp.num_channels);
51610234Syasuko.eckert@amd.com        ASSIGN_INT_IF("type", fcp.type);
51710234Syasuko.eckert@amd.com        ASSIGN_ENUM_IF("withPHY", fcp.withPHY, bool);
51810234Syasuko.eckert@amd.com
51910234Syasuko.eckert@amd.com        else {
52010234Syasuko.eckert@amd.com            warnUnrecognizedParam(node_name);
52110234Syasuko.eckert@amd.com        }
52210234Syasuko.eckert@amd.com    }
52310234Syasuko.eckert@amd.com
52410234Syasuko.eckert@amd.com    num_children = xml_data->nChildNode("stat");
52510234Syasuko.eckert@amd.com    for (i = 0; i < num_children; i++) {
52610234Syasuko.eckert@amd.com        XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
52710234Syasuko.eckert@amd.com        XMLCSTR node_name = statNode->getAttribute("name");
52810234Syasuko.eckert@amd.com        XMLCSTR value = statNode->getAttribute("value");
52910234Syasuko.eckert@amd.com
53010234Syasuko.eckert@amd.com        if (!node_name)
53110234Syasuko.eckert@amd.com            warnMissingStatName(statNode->getAttribute("id"));
53210234Syasuko.eckert@amd.com
53310234Syasuko.eckert@amd.com        ASSIGN_FP_IF("duty_cycle", fcs.duty_cycle);
53410234Syasuko.eckert@amd.com        ASSIGN_FP_IF("perc_load", fcs.perc_load);
53510234Syasuko.eckert@amd.com
53610234Syasuko.eckert@amd.com        else {
53710234Syasuko.eckert@amd.com            warnUnrecognizedStat(node_name);
53810234Syasuko.eckert@amd.com        }
53910234Syasuko.eckert@amd.com    }
54010152Satgutier@umich.edu}
541