110152Satgutier@umich.edu/***************************************************************************** 210152Satgutier@umich.edu * McPAT 310152Satgutier@umich.edu * SOFTWARE LICENSE AGREEMENT 410152Satgutier@umich.edu * Copyright 2012 Hewlett-Packard Development Company, L.P. 510234Syasuko.eckert@amd.com * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. 610152Satgutier@umich.edu * All Rights Reserved 710152Satgutier@umich.edu * 810152Satgutier@umich.edu * Redistribution and use in source and binary forms, with or without 910152Satgutier@umich.edu * modification, are permitted provided that the following conditions are 1010152Satgutier@umich.edu * met: redistributions of source code must retain the above copyright 1110152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer; 1210152Satgutier@umich.edu * redistributions in binary form must reproduce the above copyright 1310152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer in the 1410152Satgutier@umich.edu * documentation and/or other materials provided with the distribution; 1510152Satgutier@umich.edu * neither the name of the copyright holders nor the names of its 1610152Satgutier@umich.edu * contributors may be used to endorse or promote products derived from 1710152Satgutier@umich.edu * this software without specific prior written permission. 1810152Satgutier@umich.edu 1910152Satgutier@umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2010152Satgutier@umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2110152Satgutier@umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2210152Satgutier@umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2310152Satgutier@umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2410152Satgutier@umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 2510152Satgutier@umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2610152Satgutier@umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2710152Satgutier@umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2810152Satgutier@umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 2910234Syasuko.eckert@amd.com * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3010152Satgutier@umich.edu * 3110152Satgutier@umich.edu ***************************************************************************/ 3210152Satgutier@umich.edu#include <algorithm> 3310152Satgutier@umich.edu#include <cassert> 3410152Satgutier@umich.edu#include <cmath> 3510152Satgutier@umich.edu#include <iostream> 3610152Satgutier@umich.edu#include <string> 3710152Satgutier@umich.edu 3810152Satgutier@umich.edu#include "basic_circuit.h" 3910234Syasuko.eckert@amd.com#include "common.h" 4010152Satgutier@umich.edu#include "const.h" 4110152Satgutier@umich.edu#include "io.h" 4210152Satgutier@umich.edu#include "iocontrollers.h" 4310152Satgutier@umich.edu#include "logic.h" 4410152Satgutier@umich.edu 4510152Satgutier@umich.edu/* 4610152Satgutier@umich.eduSUN Niagara 2 I/O power analysis: 4710152Satgutier@umich.edutotal signal bits: 711 4810152Satgutier@umich.eduTotal FBDIMM bits: (14+10)*2*8= 384 4910152Satgutier@umich.eduPCIe bits: (8 + 8)*2 = 32 5010152Satgutier@umich.edu10Gb NIC: (4*2+4*2)*2 = 32 5110152Satgutier@umich.eduDebug I/Os: 168 5210152Satgutier@umich.eduOther I/Os: 711- 32-32 - 384 - 168 = 95 5310152Satgutier@umich.edu 5410152Satgutier@umich.eduAccording to "Implementation of an 8-Core, 64-Thread, Power-Efficient SPARC Server on a Chip" 5510152Satgutier@umich.edu90% of I/Os are SerDers (the calucaltion is 384+64/(711-168)=83% about the same as the 90% reported in the paper) 5610152Satgutier@umich.edu--> around 80Pins are common I/Os. 5710152Satgutier@umich.eduCommon I/Os consumes 71mW/Gb/s according to Cadence ChipEstimate @65nm 5810152Satgutier@umich.eduNiagara 2 I/O clock is 1/4 of core clock. --> 87pin (<--((711-168)*17%)) * 71mW/Gb/s *0.25*1.4Ghz = 2.17W 5910152Satgutier@umich.edu 6010152Satgutier@umich.eduTotal dynamic power of FBDIMM, NIC, PCIe = 84*0.132 + 84*0.049*0.132 = 11.14 - 2.17 = 8.98 6110152Satgutier@umich.eduFurther, if assuming I/O logic power is about 50% of I/Os then Total energy of FBDIMM, NIC, PCIe = 11.14 - 2.17*1.5 = 7.89 6210152Satgutier@umich.edu */ 6310152Satgutier@umich.edu 6410152Satgutier@umich.edu/* 6510152Satgutier@umich.edu * A bug in Cadence ChipEstimator: After update the clock rate in the clock tab, a user 6610152Satgutier@umich.edu * need to re-select the IP clock (the same clk) and then click Estimate. if not reselect 6710152Satgutier@umich.edu * the new clock rate may not be propogate into the IPs. 6810152Satgutier@umich.edu * 6910152Satgutier@umich.edu */ 7010152Satgutier@umich.edu 7110234Syasuko.eckert@amd.comNIUController::NIUController(XMLNode* _xml_data,InputParameter* interface_ip_) 7210234Syasuko.eckert@amd.com : McPATComponent(_xml_data, interface_ip_) { 7310234Syasuko.eckert@amd.com name = "NIU"; 7410234Syasuko.eckert@amd.com set_niu_param(); 7510234Syasuko.eckert@amd.com} 7610152Satgutier@umich.edu 7710234Syasuko.eckert@amd.comvoid NIUController::computeArea() { 7810234Syasuko.eckert@amd.com double mac_area; 7910234Syasuko.eckert@amd.com double frontend_area; 8010234Syasuko.eckert@amd.com double SerDer_area; 8110152Satgutier@umich.edu 8210234Syasuko.eckert@amd.com if (niup.type == 0) { //high performance NIU 8310234Syasuko.eckert@amd.com //Area estimation based on average of die photo from Niagara 2 and 8410234Syasuko.eckert@amd.com //Cadence ChipEstimate using 65nm. 8510234Syasuko.eckert@amd.com mac_area = (1.53 + 0.3) / 2 * (interface_ip.F_sz_um / 0.065) * 8610234Syasuko.eckert@amd.com (interface_ip.F_sz_um / 0.065); 8710234Syasuko.eckert@amd.com //Area estimation based on average of die photo from Niagara 2, ISSCC 8810234Syasuko.eckert@amd.com //"An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS" 8910234Syasuko.eckert@amd.com //and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface 9010234Syasuko.eckert@amd.com //With Robust VCO Tuning Technique" Frontend is PCS 9110234Syasuko.eckert@amd.com frontend_area = (9.8 + (6 + 18) * 65 / 130 * 65 / 130) / 3 * 9210234Syasuko.eckert@amd.com (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065); 9310234Syasuko.eckert@amd.com //Area estimation based on average of die photo from Niagara 2 and 9410234Syasuko.eckert@amd.com //Cadence ChipEstimate hard IP @65nm. 9510234Syasuko.eckert@amd.com //SerDer is very hard to scale 9610234Syasuko.eckert@amd.com SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um / 9710234Syasuko.eckert@amd.com 0.065);//* (interface_ip.F_sz_um/0.065); 9810234Syasuko.eckert@amd.com } else { 9910234Syasuko.eckert@amd.com //Low power implementations are mostly from Cadence ChipEstimator; 10010234Syasuko.eckert@amd.com //Ignore the multiple IP effect 10110234Syasuko.eckert@amd.com // ---When there are multiple IP (same kind or not) selected, Cadence 10210234Syasuko.eckert@amd.com //ChipEstimator results are not a simple summation of all IPs. 10310234Syasuko.eckert@amd.com //Ignore this effect 10410234Syasuko.eckert@amd.com mac_area = 0.24 * (interface_ip.F_sz_um / 0.065) * 10510234Syasuko.eckert@amd.com (interface_ip.F_sz_um / 0.065); 10610234Syasuko.eckert@amd.com frontend_area = 0.1 * (interface_ip.F_sz_um / 0.065) * 10710234Syasuko.eckert@amd.com (interface_ip.F_sz_um / 0.065);//Frontend is the PCS layer 10810234Syasuko.eckert@amd.com SerDer_area = 0.35 * (interface_ip.F_sz_um / 0.065) * 10910234Syasuko.eckert@amd.com (interface_ip.F_sz_um/0.065); 11010234Syasuko.eckert@amd.com //Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet 11110234Syasuko.eckert@amd.com //Transceiver and XAUI Interface With Robust VCO Tuning Technique" 11210234Syasuko.eckert@amd.com //and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can 11310234Syasuko.eckert@amd.com //scale perfectly with the technology 11410234Syasuko.eckert@amd.com } 11510152Satgutier@umich.edu 11610234Syasuko.eckert@amd.com //total area 11710234Syasuko.eckert@amd.com output_data.area = (mac_area + frontend_area + SerDer_area) * 1e6; 11810152Satgutier@umich.edu } 11910152Satgutier@umich.edu 12010234Syasuko.eckert@amd.comvoid NIUController::computeEnergy() { 12110234Syasuko.eckert@amd.com double mac_dyn; 12210234Syasuko.eckert@amd.com double frontend_dyn; 12310234Syasuko.eckert@amd.com double SerDer_dyn; 12410234Syasuko.eckert@amd.com double frontend_gates; 12510234Syasuko.eckert@amd.com double mac_gates; 12610234Syasuko.eckert@amd.com double SerDer_gates; 12710234Syasuko.eckert@amd.com double NMOS_sizing; 12810234Syasuko.eckert@amd.com double PMOS_sizing; 12910234Syasuko.eckert@amd.com double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); 13010152Satgutier@umich.edu 13110234Syasuko.eckert@amd.com if (niup.type == 0) { //high performance NIU 13210234Syasuko.eckert@amd.com //Power 13310234Syasuko.eckert@amd.com //Cadence ChipEstimate using 65nm (mac, front_end are all energy. 13410234Syasuko.eckert@amd.com //E=P*T = P/F = 1.37/1Ghz = 1.37e-9); 13510234Syasuko.eckert@amd.com //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm 13610234Syasuko.eckert@amd.com mac_dyn = 2.19e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / 13710234Syasuko.eckert@amd.com 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate; 13810234Syasuko.eckert@amd.com //Cadence ChipEstimate using 65nm soft IP; 13910234Syasuko.eckert@amd.com frontend_dyn = 0.27e-9 * g_tp.peri_global.Vdd / 1.1 * 14010234Syasuko.eckert@amd.com g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0); 14110234Syasuko.eckert@amd.com //according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006 14210234Syasuko.eckert@amd.com //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm 14310234Syasuko.eckert@amd.com SerDer_dyn = 0.01 * 10 * sqrt(interface_ip.F_sz_um / 0.09) * 14410234Syasuko.eckert@amd.com g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2; 14510152Satgutier@umich.edu 14610234Syasuko.eckert@amd.com //Cadence ChipEstimate using 65nm 14710234Syasuko.eckert@amd.com mac_gates = 111700; 14810234Syasuko.eckert@amd.com frontend_gates = 320000; 14910234Syasuko.eckert@amd.com SerDer_gates = 200000; 15010234Syasuko.eckert@amd.com NMOS_sizing = 5 * g_tp.min_w_nmos_; 15110234Syasuko.eckert@amd.com PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; 15210234Syasuko.eckert@amd.com } else { 15310234Syasuko.eckert@amd.com //Power 15410234Syasuko.eckert@amd.com //Cadence ChipEstimate using 65nm (mac, front_end are all energy. 15510234Syasuko.eckert@amd.com ///E=P*T = P/F = 1.37/1Ghz = 1.37e-9); 15610234Syasuko.eckert@amd.com //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm 15710234Syasuko.eckert@amd.com mac_dyn = 1.257e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd 15810234Syasuko.eckert@amd.com / 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate; 15910234Syasuko.eckert@amd.com //Cadence ChipEstimate using 65nm soft IP; 16010234Syasuko.eckert@amd.com frontend_dyn = 0.6e-9 * g_tp.peri_global.Vdd / 1.1 * 16110234Syasuko.eckert@amd.com g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0); 16210234Syasuko.eckert@amd.com //SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm 16310234Syasuko.eckert@amd.com SerDer_dyn = 0.0216 * 10 * (interface_ip.F_sz_um / 0.13) * 16410234Syasuko.eckert@amd.com g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2; 16510152Satgutier@umich.edu 16610234Syasuko.eckert@amd.com mac_gates = 111700; 16710234Syasuko.eckert@amd.com frontend_gates = 52000; 16810234Syasuko.eckert@amd.com SerDer_gates = 199260; 16910234Syasuko.eckert@amd.com NMOS_sizing = g_tp.min_w_nmos_; 17010234Syasuko.eckert@amd.com PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; 17110152Satgutier@umich.edu } 17210234Syasuko.eckert@amd.com 17310234Syasuko.eckert@amd.com //covert to energy per clock cycle of whole NIU 17410234Syasuko.eckert@amd.com SerDer_dyn /= niup.clockRate; 17510234Syasuko.eckert@amd.com 17610234Syasuko.eckert@amd.com power.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn; 17710234Syasuko.eckert@amd.com power.readOp.leakage = (mac_gates + frontend_gates + frontend_gates) * 17810234Syasuko.eckert@amd.com cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * 17910234Syasuko.eckert@amd.com g_tp.peri_global.Vdd;//unit W 18010234Syasuko.eckert@amd.com double long_channel_device_reduction = 18110234Syasuko.eckert@amd.com longer_channel_device_reduction(Uncore_device); 18210234Syasuko.eckert@amd.com power.readOp.longer_channel_leakage = 18310234Syasuko.eckert@amd.com power.readOp.leakage * long_channel_device_reduction; 18410234Syasuko.eckert@amd.com power.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates) * 18510234Syasuko.eckert@amd.com cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * 18610234Syasuko.eckert@amd.com g_tp.peri_global.Vdd;//unit W 18710234Syasuko.eckert@amd.com 18810234Syasuko.eckert@amd.com // Output power 18910234Syasuko.eckert@amd.com output_data.subthreshold_leakage_power = 19010234Syasuko.eckert@amd.com longer_channel_device ? power.readOp.longer_channel_leakage : 19110234Syasuko.eckert@amd.com power.readOp.leakage; 19210234Syasuko.eckert@amd.com output_data.gate_leakage_power = power.readOp.gate_leakage; 19310234Syasuko.eckert@amd.com output_data.peak_dynamic_power = power.readOp.dynamic * nius.duty_cycle; 19410234Syasuko.eckert@amd.com output_data.runtime_dynamic_energy = power.readOp.dynamic * nius.perc_load; 19510234Syasuko.eckert@amd.com} 19610234Syasuko.eckert@amd.com 19710234Syasuko.eckert@amd.comvoid NIUController::set_niu_param() { 19810234Syasuko.eckert@amd.com int num_children = xml_data->nChildNode("param"); 19910234Syasuko.eckert@amd.com int i; 20010234Syasuko.eckert@amd.com for (i = 0; i < num_children; i++) { 20110234Syasuko.eckert@amd.com XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); 20210234Syasuko.eckert@amd.com XMLCSTR node_name = paramNode->getAttribute("name"); 20310234Syasuko.eckert@amd.com XMLCSTR value = paramNode->getAttribute("value"); 20410234Syasuko.eckert@amd.com 20510234Syasuko.eckert@amd.com if (!node_name) 20610234Syasuko.eckert@amd.com warnMissingParamName(paramNode->getAttribute("id")); 20710234Syasuko.eckert@amd.com 20810234Syasuko.eckert@amd.com ASSIGN_FP_IF("niu_clockRate", niup.clockRate); 20910234Syasuko.eckert@amd.com ASSIGN_INT_IF("num_units", niup.num_units); 21010234Syasuko.eckert@amd.com ASSIGN_INT_IF("type", niup.type); 21110234Syasuko.eckert@amd.com 21210234Syasuko.eckert@amd.com else { 21310234Syasuko.eckert@amd.com warnUnrecognizedParam(node_name); 21410234Syasuko.eckert@amd.com } 21510234Syasuko.eckert@amd.com } 21610234Syasuko.eckert@amd.com 21710234Syasuko.eckert@amd.com // Change from MHz to Hz 21810234Syasuko.eckert@amd.com niup.clockRate *= 1e6; 21910234Syasuko.eckert@amd.com 22010234Syasuko.eckert@amd.com num_children = xml_data->nChildNode("stat"); 22110234Syasuko.eckert@amd.com for (i = 0; i < num_children; i++) { 22210234Syasuko.eckert@amd.com XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); 22310234Syasuko.eckert@amd.com XMLCSTR node_name = statNode->getAttribute("name"); 22410234Syasuko.eckert@amd.com XMLCSTR value = statNode->getAttribute("value"); 22510234Syasuko.eckert@amd.com 22610234Syasuko.eckert@amd.com if (!node_name) 22710234Syasuko.eckert@amd.com warnMissingStatName(statNode->getAttribute("id")); 22810234Syasuko.eckert@amd.com 22910234Syasuko.eckert@amd.com ASSIGN_FP_IF("duty_cycle", nius.duty_cycle); 23010234Syasuko.eckert@amd.com ASSIGN_FP_IF("perc_load", nius.perc_load); 23110234Syasuko.eckert@amd.com 23210234Syasuko.eckert@amd.com else { 23310234Syasuko.eckert@amd.com warnUnrecognizedStat(node_name); 23410234Syasuko.eckert@amd.com } 23510152Satgutier@umich.edu } 23610152Satgutier@umich.edu} 23710152Satgutier@umich.edu 23810234Syasuko.eckert@amd.comPCIeController::PCIeController(XMLNode* _xml_data, 23910234Syasuko.eckert@amd.com InputParameter* interface_ip_) 24010234Syasuko.eckert@amd.com : McPATComponent(_xml_data, interface_ip_) { 24110234Syasuko.eckert@amd.com name = "PCIe"; 24210234Syasuko.eckert@amd.com set_pcie_param(); 24310152Satgutier@umich.edu} 24410152Satgutier@umich.edu 24510234Syasuko.eckert@amd.comvoid PCIeController::computeArea() { 24610234Syasuko.eckert@amd.com double ctrl_area; 24710234Syasuko.eckert@amd.com double SerDer_area; 24810234Syasuko.eckert@amd.com 24910234Syasuko.eckert@amd.com /* Assuming PCIe is bit-slice based architecture 25010234Syasuko.eckert@amd.com * This is the reason for /8 in both area and power calculation 25110234Syasuko.eckert@amd.com * to get per lane numbers 25210234Syasuko.eckert@amd.com */ 25310234Syasuko.eckert@amd.com 25410234Syasuko.eckert@amd.com if (pciep.type == 0) { //high performance PCIe 25510234Syasuko.eckert@amd.com //Area estimation based on average of die photo from Niagara 2 and 25610234Syasuko.eckert@amd.com //Cadence ChipEstimate @ 65nm. 25710234Syasuko.eckert@amd.com ctrl_area = (5.2 + 0.5) / 2 * (interface_ip.F_sz_um / 0.065) * 25810234Syasuko.eckert@amd.com (interface_ip.F_sz_um / 0.065); 25910234Syasuko.eckert@amd.com //Area estimation based on average of die photo from Niagara 2 and 26010234Syasuko.eckert@amd.com //Cadence ChipEstimate hard IP @65nm. 26110234Syasuko.eckert@amd.com //SerDer is very hard to scale 26210234Syasuko.eckert@amd.com SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um / 26310234Syasuko.eckert@amd.com 0.065);//* (interface_ip.F_sz_um/0.065); 26410234Syasuko.eckert@amd.com } else { 26510234Syasuko.eckert@amd.com ctrl_area = 0.412 * (interface_ip.F_sz_um / 0.065) * 26610234Syasuko.eckert@amd.com (interface_ip.F_sz_um / 0.065); 26710234Syasuko.eckert@amd.com //Area estimation based on average of die photo from Niagara 2, and 26810234Syasuko.eckert@amd.com //Cadence ChipEstimate @ 65nm. 26910234Syasuko.eckert@amd.com SerDer_area = 0.36 * (interface_ip.F_sz_um / 0.065) * 27010234Syasuko.eckert@amd.com (interface_ip.F_sz_um / 0.065); 27110234Syasuko.eckert@amd.com } 27210234Syasuko.eckert@amd.com 27310234Syasuko.eckert@amd.com // Total area 27410234Syasuko.eckert@amd.com output_data.area = ((ctrl_area + (pciep.withPHY ? SerDer_area : 0)) / 8 * 27510234Syasuko.eckert@amd.com pciep.num_channels) * 1e6; 27610152Satgutier@umich.edu} 27710152Satgutier@umich.edu 27810234Syasuko.eckert@amd.comvoid PCIeController::computeEnergy() { 27910234Syasuko.eckert@amd.com double ctrl_dyn; 28010234Syasuko.eckert@amd.com double SerDer_dyn; 28110234Syasuko.eckert@amd.com double ctrl_gates; 28210234Syasuko.eckert@amd.com double SerDer_gates = 0; 28310234Syasuko.eckert@amd.com double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); 28410234Syasuko.eckert@amd.com double NMOS_sizing; 28510234Syasuko.eckert@amd.com double PMOS_sizing; 28610152Satgutier@umich.edu 28710234Syasuko.eckert@amd.com /* Assuming PCIe is bit-slice based architecture 28810234Syasuko.eckert@amd.com * This is the reason for /8 in both area and power calculation 28910234Syasuko.eckert@amd.com * to get per lane numbers 29010234Syasuko.eckert@amd.com */ 29110152Satgutier@umich.edu 29210234Syasuko.eckert@amd.com if (pciep.type == 0) { //high performance PCIe 29310234Syasuko.eckert@amd.com //Power 29410234Syasuko.eckert@amd.com //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer 29510234Syasuko.eckert@amd.com ctrl_dyn = 3.75e-9 / 8 * g_tp.peri_global.Vdd / 1.1 * 29610234Syasuko.eckert@amd.com g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0); 29710234Syasuko.eckert@amd.com // //Cadence ChipEstimate using 65nm soft IP; 29810234Syasuko.eckert@amd.com // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); 29910234Syasuko.eckert@amd.com //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm 30010234Syasuko.eckert@amd.com //PCIe 2.0 max per lane speed is 4Gb/s 30110234Syasuko.eckert@amd.com SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um /0.09) * 30210234Syasuko.eckert@amd.com g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2; 30310152Satgutier@umich.edu 30410234Syasuko.eckert@amd.com //Cadence ChipEstimate using 65nm 30510234Syasuko.eckert@amd.com ctrl_gates = 900000 / 8 * pciep.num_channels; 30610234Syasuko.eckert@amd.com // frontend_gates = 120000/8; 30710234Syasuko.eckert@amd.com // SerDer_gates = 200000/8; 30810234Syasuko.eckert@amd.com NMOS_sizing = 5 * g_tp.min_w_nmos_; 30910234Syasuko.eckert@amd.com PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; 31010234Syasuko.eckert@amd.com } else { 31110234Syasuko.eckert@amd.com //Power 31210234Syasuko.eckert@amd.com //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer 31310234Syasuko.eckert@amd.com ctrl_dyn = 2.21e-9 / 8 * g_tp.peri_global.Vdd / 1.1 * 31410234Syasuko.eckert@amd.com g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0); 31510234Syasuko.eckert@amd.com // //Cadence ChipEstimate using 65nm soft IP; 31610234Syasuko.eckert@amd.com // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); 31710234Syasuko.eckert@amd.com //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm 31810234Syasuko.eckert@amd.com //PCIe 2.0 max per lane speed is 4Gb/s 31910234Syasuko.eckert@amd.com SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um / 0.09) * 32010234Syasuko.eckert@amd.com g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2; 32110152Satgutier@umich.edu 32210234Syasuko.eckert@amd.com //Cadence ChipEstimate using 65nm 32310234Syasuko.eckert@amd.com ctrl_gates = 200000 / 8 * pciep.num_channels; 32410234Syasuko.eckert@amd.com // frontend_gates = 120000/8; 32510234Syasuko.eckert@amd.com SerDer_gates = 200000 / 8 * pciep.num_channels; 32610234Syasuko.eckert@amd.com NMOS_sizing = g_tp.min_w_nmos_; 32710234Syasuko.eckert@amd.com PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; 32810152Satgutier@umich.edu 32910152Satgutier@umich.edu } 33010234Syasuko.eckert@amd.com 33110234Syasuko.eckert@amd.com //covert to energy per clock cycle 33210234Syasuko.eckert@amd.com SerDer_dyn /= pciep.clockRate; 33310234Syasuko.eckert@amd.com 33410234Syasuko.eckert@amd.com power.readOp.dynamic = (ctrl_dyn + (pciep.withPHY ? SerDer_dyn : 0)) * 33510234Syasuko.eckert@amd.com pciep.num_channels; 33610234Syasuko.eckert@amd.com power.readOp.leakage = (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) * 33710234Syasuko.eckert@amd.com cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * 33810234Syasuko.eckert@amd.com g_tp.peri_global.Vdd;//unit W 33910234Syasuko.eckert@amd.com double long_channel_device_reduction = 34010234Syasuko.eckert@amd.com longer_channel_device_reduction(Uncore_device); 34110234Syasuko.eckert@amd.com power.readOp.longer_channel_leakage = 34210234Syasuko.eckert@amd.com power.readOp.leakage * long_channel_device_reduction; 34310234Syasuko.eckert@amd.com power.readOp.gate_leakage = (ctrl_gates + 34410234Syasuko.eckert@amd.com (pciep.withPHY ? SerDer_gates : 0)) * 34510234Syasuko.eckert@amd.com cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * 34610234Syasuko.eckert@amd.com g_tp.peri_global.Vdd;//unit W 34710234Syasuko.eckert@amd.com 34810234Syasuko.eckert@amd.com // Output power 34910234Syasuko.eckert@amd.com output_data.subthreshold_leakage_power = 35010234Syasuko.eckert@amd.com longer_channel_device ? power.readOp.longer_channel_leakage : 35110234Syasuko.eckert@amd.com power.readOp.leakage; 35210234Syasuko.eckert@amd.com output_data.gate_leakage_power = power.readOp.gate_leakage; 35310234Syasuko.eckert@amd.com output_data.peak_dynamic_power = power.readOp.dynamic * pcies.duty_cycle; 35410234Syasuko.eckert@amd.com output_data.runtime_dynamic_energy = 35510234Syasuko.eckert@amd.com power.readOp.dynamic * pcies.perc_load; 35610234Syasuko.eckert@amd.com} 35710234Syasuko.eckert@amd.com 35810234Syasuko.eckert@amd.comvoid PCIeController::set_pcie_param() { 35910234Syasuko.eckert@amd.com int num_children = xml_data->nChildNode("param"); 36010234Syasuko.eckert@amd.com int i; 36110234Syasuko.eckert@amd.com for (i = 0; i < num_children; i++) { 36210234Syasuko.eckert@amd.com XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); 36310234Syasuko.eckert@amd.com XMLCSTR node_name = paramNode->getAttribute("name"); 36410234Syasuko.eckert@amd.com XMLCSTR value = paramNode->getAttribute("value"); 36510234Syasuko.eckert@amd.com 36610234Syasuko.eckert@amd.com if (!node_name) 36710234Syasuko.eckert@amd.com warnMissingParamName(paramNode->getAttribute("id")); 36810234Syasuko.eckert@amd.com 36910234Syasuko.eckert@amd.com ASSIGN_FP_IF("pcie_clockRate", pciep.clockRate); 37010234Syasuko.eckert@amd.com ASSIGN_INT_IF("num_units", pciep.num_units); 37110234Syasuko.eckert@amd.com ASSIGN_INT_IF("num_channels", pciep.num_channels); 37210234Syasuko.eckert@amd.com ASSIGN_INT_IF("type", pciep.type); 37310234Syasuko.eckert@amd.com ASSIGN_ENUM_IF("withPHY", pciep.withPHY, bool); 37410234Syasuko.eckert@amd.com 37510234Syasuko.eckert@amd.com else { 37610234Syasuko.eckert@amd.com warnUnrecognizedParam(node_name); 37710234Syasuko.eckert@amd.com } 37810234Syasuko.eckert@amd.com } 37910234Syasuko.eckert@amd.com 38010234Syasuko.eckert@amd.com // Change from MHz to Hz 38110234Syasuko.eckert@amd.com pciep.clockRate *= 1e6; 38210234Syasuko.eckert@amd.com 38310234Syasuko.eckert@amd.com num_children = xml_data->nChildNode("stat"); 38410234Syasuko.eckert@amd.com for (i = 0; i < num_children; i++) { 38510234Syasuko.eckert@amd.com XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); 38610234Syasuko.eckert@amd.com XMLCSTR node_name = statNode->getAttribute("name"); 38710234Syasuko.eckert@amd.com XMLCSTR value = statNode->getAttribute("value"); 38810234Syasuko.eckert@amd.com 38910234Syasuko.eckert@amd.com if (!node_name) 39010234Syasuko.eckert@amd.com warnMissingStatName(statNode->getAttribute("id")); 39110234Syasuko.eckert@amd.com 39210234Syasuko.eckert@amd.com ASSIGN_FP_IF("duty_cycle", pcies.duty_cycle); 39310234Syasuko.eckert@amd.com ASSIGN_FP_IF("perc_load", pcies.perc_load); 39410234Syasuko.eckert@amd.com 39510234Syasuko.eckert@amd.com else { 39610234Syasuko.eckert@amd.com warnUnrecognizedStat(node_name); 39710234Syasuko.eckert@amd.com } 39810152Satgutier@umich.edu } 39910152Satgutier@umich.edu} 40010152Satgutier@umich.edu 40110234Syasuko.eckert@amd.comFlashController::FlashController(XMLNode* _xml_data, 40210234Syasuko.eckert@amd.com InputParameter* interface_ip_) 40310234Syasuko.eckert@amd.com : McPATComponent(_xml_data, interface_ip_) { 40410234Syasuko.eckert@amd.com name = "Flash Controller"; 40510234Syasuko.eckert@amd.com set_fc_param(); 40610152Satgutier@umich.edu} 40710152Satgutier@umich.edu 40810234Syasuko.eckert@amd.comvoid FlashController::computeArea() { 40910234Syasuko.eckert@amd.com double ctrl_area; 41010234Syasuko.eckert@amd.com double SerDer_area; 41110152Satgutier@umich.edu 41210234Syasuko.eckert@amd.com /* Assuming Flash is bit-slice based architecture 41310234Syasuko.eckert@amd.com * This is the reason for /8 in both area and power calculation 41410234Syasuko.eckert@amd.com * to get per lane numbers 41510234Syasuko.eckert@amd.com */ 41610234Syasuko.eckert@amd.com 41710234Syasuko.eckert@amd.com if (fcp.type == 0) { //high performance flash controller 41810234Syasuko.eckert@amd.com cout << "Current McPAT does not support high performance flash " 41910234Syasuko.eckert@amd.com << "controller since even low power designs are enough for " 42010234Syasuko.eckert@amd.com << "maintain throughput" <<endl; 42110234Syasuko.eckert@amd.com exit(0); 42210234Syasuko.eckert@amd.com } else { 42310234Syasuko.eckert@amd.com ctrl_area = 0.243 * (interface_ip.F_sz_um / 0.065) * 42410234Syasuko.eckert@amd.com (interface_ip.F_sz_um / 0.065); 42510234Syasuko.eckert@amd.com //Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL 42610234Syasuko.eckert@amd.com //from CAST 42710234Syasuko.eckert@amd.com SerDer_area = 0.36 / 8 * (interface_ip.F_sz_um / 0.065) * 42810234Syasuko.eckert@amd.com (interface_ip.F_sz_um / 0.065); 42910234Syasuko.eckert@amd.com } 43010234Syasuko.eckert@amd.com 43110234Syasuko.eckert@amd.com double number_channel = 1 + (fcp.num_channels - 1) * 0.2; 43210234Syasuko.eckert@amd.com output_data.area = (ctrl_area + (fcp.withPHY ? SerDer_area : 0)) * 43310234Syasuko.eckert@amd.com 1e6 * number_channel; 43410152Satgutier@umich.edu} 43510152Satgutier@umich.edu 43610234Syasuko.eckert@amd.comvoid FlashController::computeEnergy() { 43710234Syasuko.eckert@amd.com double ctrl_dyn; 43810234Syasuko.eckert@amd.com double SerDer_dyn; 43910234Syasuko.eckert@amd.com double ctrl_gates; 44010234Syasuko.eckert@amd.com double SerDer_gates; 44110234Syasuko.eckert@amd.com double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); 44210234Syasuko.eckert@amd.com double NMOS_sizing; 44310234Syasuko.eckert@amd.com double PMOS_sizing; 44410152Satgutier@umich.edu 44510234Syasuko.eckert@amd.com /* Assuming Flash is bit-slice based architecture 44610234Syasuko.eckert@amd.com * This is the reason for /8 in both area and power calculation 44710234Syasuko.eckert@amd.com * to get per lane numbers 44810234Syasuko.eckert@amd.com */ 44910152Satgutier@umich.edu 45010234Syasuko.eckert@amd.com if (fcp.type == 0) { //high performance flash controller 45110234Syasuko.eckert@amd.com cout << "Current McPAT does not support high performance flash " 45210234Syasuko.eckert@amd.com << "controller since even low power designs are enough for " 45310234Syasuko.eckert@amd.com << "maintain throughput" <<endl; 45410234Syasuko.eckert@amd.com exit(0); 45510234Syasuko.eckert@amd.com NMOS_sizing = 5 * g_tp.min_w_nmos_; 45610234Syasuko.eckert@amd.com PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; 45710234Syasuko.eckert@amd.com } else { 45810234Syasuko.eckert@amd.com //based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it 45910234Syasuko.eckert@amd.com //support 8x lanes with each lane speed up to 250MB/s (PCIe1.1x). 46010234Syasuko.eckert@amd.com //This is already saturate the 200MB/s of the flash controller core 46110234Syasuko.eckert@amd.com //above. 46210234Syasuko.eckert@amd.com ctrl_gates = 129267; 46310234Syasuko.eckert@amd.com SerDer_gates = 200000 / 8; 46410234Syasuko.eckert@amd.com NMOS_sizing = g_tp.min_w_nmos_; 46510234Syasuko.eckert@amd.com PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; 46610152Satgutier@umich.edu 46710234Syasuko.eckert@amd.com //Power 46810234Syasuko.eckert@amd.com //Cadence ChipEstimate using 65nm the controller 125mW for every 46910234Syasuko.eckert@amd.com //200MB/s This is power not energy! 47010234Syasuko.eckert@amd.com ctrl_dyn = 0.125 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / 47110234Syasuko.eckert@amd.com 1.1 * (interface_ip.F_sz_nm / 65.0); 47210234Syasuko.eckert@amd.com //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm 47310234Syasuko.eckert@amd.com SerDer_dyn = 0.01 * 1.6 * (interface_ip.F_sz_um / 0.09) * 47410234Syasuko.eckert@amd.com g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2; 47510234Syasuko.eckert@amd.com //max Per controller speed is 1.6Gb/s (200MB/s) 47610234Syasuko.eckert@amd.com } 47710152Satgutier@umich.edu 47810234Syasuko.eckert@amd.com double number_channel = 1 + (fcp.num_channels - 1) * 0.2; 47910234Syasuko.eckert@amd.com power.readOp.dynamic = (ctrl_dyn + (fcp.withPHY ? SerDer_dyn : 0)) * 48010234Syasuko.eckert@amd.com number_channel; 48110234Syasuko.eckert@amd.com power.readOp.leakage = ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) * 48210234Syasuko.eckert@amd.com number_channel) * 48310234Syasuko.eckert@amd.com cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * 48410234Syasuko.eckert@amd.com g_tp.peri_global.Vdd;//unit W 48510234Syasuko.eckert@amd.com double long_channel_device_reduction = 48610234Syasuko.eckert@amd.com longer_channel_device_reduction(Uncore_device); 48710234Syasuko.eckert@amd.com power.readOp.longer_channel_leakage = 48810234Syasuko.eckert@amd.com power.readOp.leakage * long_channel_device_reduction; 48910234Syasuko.eckert@amd.com power.readOp.gate_leakage = 49010234Syasuko.eckert@amd.com ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) * number_channel) * 49110234Syasuko.eckert@amd.com cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * 49210234Syasuko.eckert@amd.com g_tp.peri_global.Vdd;//unit W 49310152Satgutier@umich.edu 49410234Syasuko.eckert@amd.com // Output power 49510234Syasuko.eckert@amd.com output_data.subthreshold_leakage_power = 49610234Syasuko.eckert@amd.com longer_channel_device ? power.readOp.longer_channel_leakage : 49710234Syasuko.eckert@amd.com power.readOp.leakage; 49810234Syasuko.eckert@amd.com output_data.gate_leakage_power = power.readOp.gate_leakage; 49910234Syasuko.eckert@amd.com output_data.peak_dynamic_power = power.readOp.dynamic * fcs.duty_cycle; 50010234Syasuko.eckert@amd.com output_data.runtime_dynamic_energy = power.readOp.dynamic * fcs.perc_load; 50110152Satgutier@umich.edu} 50210152Satgutier@umich.edu 50310152Satgutier@umich.eduvoid FlashController::set_fc_param() 50410152Satgutier@umich.edu{ 50510234Syasuko.eckert@amd.com int num_children = xml_data->nChildNode("param"); 50610234Syasuko.eckert@amd.com int i; 50710234Syasuko.eckert@amd.com for (i = 0; i < num_children; i++) { 50810234Syasuko.eckert@amd.com XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); 50910234Syasuko.eckert@amd.com XMLCSTR node_name = paramNode->getAttribute("name"); 51010234Syasuko.eckert@amd.com XMLCSTR value = paramNode->getAttribute("value"); 51110152Satgutier@umich.edu 51210234Syasuko.eckert@amd.com if (!node_name) 51310234Syasuko.eckert@amd.com warnMissingParamName(paramNode->getAttribute("id")); 51410234Syasuko.eckert@amd.com 51510234Syasuko.eckert@amd.com ASSIGN_INT_IF("num_channels", fcp.num_channels); 51610234Syasuko.eckert@amd.com ASSIGN_INT_IF("type", fcp.type); 51710234Syasuko.eckert@amd.com ASSIGN_ENUM_IF("withPHY", fcp.withPHY, bool); 51810234Syasuko.eckert@amd.com 51910234Syasuko.eckert@amd.com else { 52010234Syasuko.eckert@amd.com warnUnrecognizedParam(node_name); 52110234Syasuko.eckert@amd.com } 52210234Syasuko.eckert@amd.com } 52310234Syasuko.eckert@amd.com 52410234Syasuko.eckert@amd.com num_children = xml_data->nChildNode("stat"); 52510234Syasuko.eckert@amd.com for (i = 0; i < num_children; i++) { 52610234Syasuko.eckert@amd.com XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); 52710234Syasuko.eckert@amd.com XMLCSTR node_name = statNode->getAttribute("name"); 52810234Syasuko.eckert@amd.com XMLCSTR value = statNode->getAttribute("value"); 52910234Syasuko.eckert@amd.com 53010234Syasuko.eckert@amd.com if (!node_name) 53110234Syasuko.eckert@amd.com warnMissingStatName(statNode->getAttribute("id")); 53210234Syasuko.eckert@amd.com 53310234Syasuko.eckert@amd.com ASSIGN_FP_IF("duty_cycle", fcs.duty_cycle); 53410234Syasuko.eckert@amd.com ASSIGN_FP_IF("perc_load", fcs.perc_load); 53510234Syasuko.eckert@amd.com 53610234Syasuko.eckert@amd.com else { 53710234Syasuko.eckert@amd.com warnUnrecognizedStat(node_name); 53810234Syasuko.eckert@amd.com } 53910234Syasuko.eckert@amd.com } 54010152Satgutier@umich.edu} 541