Cross Reference: /gem5/ext/mcpat/iocontrollers.cc

Deleted Added

sdiff udiff text old ( 10152:52c552138ba1 ) new ( 10234:5cb711fa6176 )

full compact

1/*****************************************************************************
2 * McPAT
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.

6 * All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”

29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

30 *
31 ***************************************************************************/
32#include <algorithm>
33#include <cassert>
34#include <cmath>
35#include <iostream>
36#include <string>
37

37#include "XML_Parse.h"

38#include "basic_circuit.h"

39#include "basic_components.h"

39#include "common.h"

40#include "const.h"
41#include "io.h"
42#include "iocontrollers.h"
43#include "logic.h"

44#include "parameter.h"

44
45/*
46SUN Niagara 2 I/O power analysis:
47total signal bits: 711
48Total FBDIMM bits: (14+10)*2*8= 384
49PCIe bits: (8 + 8)*2 = 32
5010Gb NIC: (4*2+4*2)*2 = 32
51Debug I/Os: 168
52Other I/Os: 711- 32-32 - 384 - 168 = 95
53
54According to "Implementation of an 8-Core, 64-Thread, Power-Efficient SPARC Server on a Chip"
5590% of I/Os are SerDers (the calucaltion is 384+64/(711-168)=83% about the same as the 90% reported in the paper)
56--> around 80Pins are common I/Os.
57Common I/Os consumes 71mW/Gb/s according to Cadence ChipEstimate @65nm
58Niagara 2 I/O clock is 1/4 of core clock. --> 87pin (<--((711-168)*17%)) * 71mW/Gb/s *0.25*1.4Ghz = 2.17W
59
60Total dynamic power of FBDIMM, NIC, PCIe = 84*0.132 + 84*0.049*0.132 = 11.14 - 2.17 = 8.98
61Further, if assuming I/O logic power is about 50% of I/Os then Total energy of FBDIMM, NIC, PCIe = 11.14 - 2.17*1.5 = 7.89
62 */
63
64/*
65 * A bug in Cadence ChipEstimator: After update the clock rate in the clock tab, a user
66 * need to re-select the IP clock (the same clk) and then click Estimate. if not reselect
67 * the new clock rate may not be propogate into the IPs.
68 *
69 */
70

72NIUController::NIUController(ParseXML *XML_interface,InputParameter* interface_ip_)
73:XML(XML_interface),
74 interface_ip(*interface_ip_)
75 {
76 local_result = init_interface(&interface_ip);

71NIUController::NIUController(XMLNode* _xml_data,InputParameter* interface_ip_)
72 : McPATComponent(_xml_data, interface_ip_) {
73 name = "NIU";
74 set_niu_param();
75}

78 double frontend_area, phy_area, mac_area, SerDer_area;
79 double frontend_dyn, mac_dyn, SerDer_dyn;
80 double frontend_gates, mac_gates, SerDer_gates;
81 double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
82 double NMOS_sizing, PMOS_sizing;

77void NIUController::computeArea() {
78 double mac_area;
79 double frontend_area;
80 double SerDer_area;

84 set_niu_param();

82 if (niup.type == 0) { //high performance NIU
83 //Area estimation based on average of die photo from Niagara 2 and
84 //Cadence ChipEstimate using 65nm.
85 mac_area = (1.53 + 0.3) / 2 * (interface_ip.F_sz_um / 0.065) *
86 (interface_ip.F_sz_um / 0.065);
87 //Area estimation based on average of die photo from Niagara 2, ISSCC
88 //"An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS"
89 //and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface
90 //With Robust VCO Tuning Technique" Frontend is PCS
91 frontend_area = (9.8 + (6 + 18) * 65 / 130 * 65 / 130) / 3 *
92 (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065);
93 //Area estimation based on average of die photo from Niagara 2 and
94 //Cadence ChipEstimate hard IP @65nm.
95 //SerDer is very hard to scale
96 SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um /
97 0.065);//* (interface_ip.F_sz_um/0.065);
98 } else {
99 //Low power implementations are mostly from Cadence ChipEstimator;
100 //Ignore the multiple IP effect
101 // ---When there are multiple IP (same kind or not) selected, Cadence
102 //ChipEstimator results are not a simple summation of all IPs.
103 //Ignore this effect
104 mac_area = 0.24 * (interface_ip.F_sz_um / 0.065) *
105 (interface_ip.F_sz_um / 0.065);
106 frontend_area = 0.1 * (interface_ip.F_sz_um / 0.065) *
107 (interface_ip.F_sz_um / 0.065);//Frontend is the PCS layer
108 SerDer_area = 0.35 * (interface_ip.F_sz_um / 0.065) *
109 (interface_ip.F_sz_um/0.065);
110 //Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet
111 //Transceiver and XAUI Interface With Robust VCO Tuning Technique"
112 //and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can
113 //scale perfectly with the technology
114 }

115

86 if (niup.type == 0) //high performance NIU
87 {
88 //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate using 65nm.
89 mac_area = (1.53 + 0.3)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
90 //Area estimation based on average of die photo from Niagara 2, ISSCC "An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS"
91 //and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique" Frontend is PCS
92 frontend_area = (9.8 + (6 + 18)*65/130*65/130)/3 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
93 //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm.
94 //SerDer is very hard to scale
95 SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065);
96 phy_area = frontend_area + SerDer_area;
97 //total area
98 area.set_area((mac_area + frontend_area + SerDer_area)*1e6);
99 //Power
~~100~~ //Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
~~101~~ mac_dyn = 2.19e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
~~102~~ //Cadence ChipEstimate using 65nm soft IP;
~~103~~ frontend_dyn = 0.27e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate;
~~104~~ //according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006
~~105~~ //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
~~106~~ SerDer_dyn = 0.01*10*sqrt(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
~~107~~ SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU

116 //total area
117 output_data.area = (mac_area + frontend_area + SerDer_area) * 1e6;
118 }

119

~~109~~ //Cadence ChipEstimate using 65nm
~~110~~ mac_gates = 111700;
~~111~~ frontend_gates = 320000;
~~112~~ SerDer_gates = 200000;
~~113~~ NMOS_sizing = 5*g_tp.min_w_nmos_;
~~114~~ PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;

120void NIUController::computeEnergy() {
121 double mac_dyn;
122 double frontend_dyn;
123 double SerDer_dyn;
124 double frontend_gates;
125 double mac_gates;
126 double SerDer_gates;
127 double NMOS_sizing;
128 double PMOS_sizing;
129 double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();

130

131 if (niup.type == 0) { //high performance NIU
132 //Power
133 //Cadence ChipEstimate using 65nm (mac, front_end are all energy.
134 //E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
135 //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
136 mac_dyn = 2.19e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd /
137 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate;
138 //Cadence ChipEstimate using 65nm soft IP;
139 frontend_dyn = 0.27e-9 * g_tp.peri_global.Vdd / 1.1 *
140 g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
141 //according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006
142 //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
143 SerDer_dyn = 0.01 * 10 * sqrt(interface_ip.F_sz_um / 0.09) *
144 g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;

145

~~117~~ }
~~118~~ else
~~119~~ {//Low power implementations are mostly from Cadence ChipEstimator; Ignore the multiple IP effect
~~120~~ // ---When there are multiple IP (same kind or not) selected, Cadence ChipEstimator results are not
~~121~~ // a simple summation of all IPs. Ignore this effect
~~122~~ mac_area = 0.24 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
~~123~~ frontend_area = 0.1 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);//Frontend is the PCS layer
~~124~~ SerDer_area = 0.35 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
~~125~~ //Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique"
~~126~~ //and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can scale perfectly with the technology
~~127~~ //total area
~~128~~ area.set_area((mac_area + frontend_area + SerDer_area)*1e6);
~~129~~ //Power
~~130~~ //Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
~~131~~ mac_dyn = 1.257e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
~~132~~ //Cadence ChipEstimate using 65nm soft IP;
~~133~~ frontend_dyn = 0.6e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate;
~~134~~ //SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm
~~135~~ SerDer_dyn = 0.0216*10*(interface_ip.F_sz_um/0.13)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
~~136~~ SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU

146 //Cadence ChipEstimate using 65nm
147 mac_gates = 111700;
148 frontend_gates = 320000;
149 SerDer_gates = 200000;
150 NMOS_sizing = 5 * g_tp.min_w_nmos_;
151 PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
152 } else {
153 //Power
154 //Cadence ChipEstimate using 65nm (mac, front_end are all energy.
155 ///E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
156 //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
157 mac_dyn = 1.257e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd
158 / 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate;
159 //Cadence ChipEstimate using 65nm soft IP;
160 frontend_dyn = 0.6e-9 * g_tp.peri_global.Vdd / 1.1 *
161 g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
162 //SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm
163 SerDer_dyn = 0.0216 * 10 * (interface_ip.F_sz_um / 0.13) *
164 g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;

165

~~138~~ mac_gates = 111700;
~~139~~ frontend_gates = 52000;
~~140~~ SerDer_gates = 199260;

166 mac_gates = 111700;
167 frontend_gates = 52000;
168 SerDer_gates = 199260;
169 NMOS_sizing = g_tp.min_w_nmos_;
170 PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
171 }

172

~~142~~ NMOS_sizing = g_tp.min_w_nmos_;
~~143~~ PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;

173 //covert to energy per clock cycle of whole NIU
174 SerDer_dyn /= niup.clockRate;

175

~~145~~ }

176 power.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn;
177 power.readOp.leakage = (mac_gates + frontend_gates + frontend_gates) *
178 cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
179 g_tp.peri_global.Vdd;//unit W
180 double long_channel_device_reduction =
181 longer_channel_device_reduction(Uncore_device);
182 power.readOp.longer_channel_leakage =
183 power.readOp.leakage * long_channel_device_reduction;
184 power.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates) *
185 cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
186 g_tp.peri_global.Vdd;//unit W

187

~~147~~ power_t.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn;
~~148~~ power_t.readOp.leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
~~149~~ double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
~~150~~ power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
~~151~~ power_t.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
~~152~~ }

188 // Output power
189 output_data.subthreshold_leakage_power =
190 longer_channel_device ? power.readOp.longer_channel_leakage :
191 power.readOp.leakage;
192 output_data.gate_leakage_power = power.readOp.gate_leakage;
193 output_data.peak_dynamic_power = power.readOp.dynamic * nius.duty_cycle;
194 output_data.runtime_dynamic_energy = power.readOp.dynamic * nius.perc_load;
195}

196

~~154~~void NIUController::computeEnergy(bool is_tdp)
~~155~~{
~~156~~ if (is_tdp)
~~157~~ {

197void NIUController::set_niu_param() {
198 int num_children = xml_data->nChildNode("param");
199 int i;
200 for (i = 0; i < num_children; i++) {
201 XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
202 XMLCSTR node_name = paramNode->getAttribute("name");
203 XMLCSTR value = paramNode->getAttribute("value");

204

205 if (!node_name)
206 warnMissingParamName(paramNode->getAttribute("id"));

207

~~160~~ power = power_t;
~~161~~ power.readOp.dynamic *= niup.duty_cycle;

208 ASSIGN_FP_IF("niu_clockRate", niup.clockRate);
209 ASSIGN_INT_IF("num_units", niup.num_units);
210 ASSIGN_INT_IF("type", niup.type);

211

212 else {
213 warnUnrecognizedParam(node_name);
214 }

215 }

~~164~~ else
~~165~~ {
~~166~~ rt_power = power_t;
~~167~~ rt_power.readOp.dynamic *= niup.perc_load;
~~168~~ }
~~169~~}

216

~~171~~void NIUController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
~~172~~{
~~173~~ string indent_str(indent, ' ');
~~174~~ string indent_str_next(indent+2, ' ');
~~175~~ bool long_channel = XML->sys.longer_channel_device;

217 // Change from MHz to Hz
218 niup.clockRate *= 1e6;

219

~~177~~ if (is_tdp)
~~178~~ {
~~179~~ cout << "NIU:" << endl;
~~180~~ cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
~~181~~ cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*niup.clockRate << " W" << endl;
~~182~~ cout << indent_str<< "Subthreshold Leakage = "
~~183~~ << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
~~184~~ //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
~~185~~ cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
~~186~~ cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*niup.clockRate << " W" << endl;
~~187~~ cout<<endl;
~~188~~ }
~~189~~ else
~~190~~ {

220 num_children = xml_data->nChildNode("stat");
221 for (i = 0; i < num_children; i++) {
222 XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
223 XMLCSTR node_name = statNode->getAttribute("name");
224 XMLCSTR value = statNode->getAttribute("value");

225

~~192~~ }

226 if (!node_name)
227 warnMissingStatName(statNode->getAttribute("id"));

228

229 ASSIGN_FP_IF("duty_cycle", nius.duty_cycle);
230 ASSIGN_FP_IF("perc_load", nius.perc_load);
231
232 else {
233 warnUnrecognizedStat(node_name);
234 }
235 }

236}
237

~~196~~void NIUController::set_niu_param()
~~197~~{
~~198~~ niup.clockRate = XML->sys.niu.clockrate;
~~199~~ niup.clockRate *= 1e6;
~~200~~ niup.num_units = XML->sys.niu.number_units;
~~201~~ niup.duty_cycle = XML->sys.niu.duty_cycle;
~~202~~ niup.perc_load = XML->sys.niu.total_load_perc;
~~203~~ niup.type = XML->sys.niu.type;
~~204~~// niup.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);

238PCIeController::PCIeController(XMLNode* _xml_data,
239 InputParameter* interface_ip_)
240 : McPATComponent(_xml_data, interface_ip_) {
241 name = "PCIe";
242 set_pcie_param();

243}
244

~~207~~PCIeController::PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_)
~~208~~:XML(XML_interface),
~~209~~ interface_ip(*interface_ip_)
~~210~~ {
~~211~~ local_result = init_interface(&interface_ip);
~~212~~ double frontend_area, phy_area, ctrl_area, SerDer_area;
~~213~~ double ctrl_dyn, frontend_dyn, SerDer_dyn;
~~214~~ double ctrl_gates,frontend_gates, SerDer_gates;
~~215~~ double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
~~216~~ double NMOS_sizing, PMOS_sizing;

245void PCIeController::computeArea() {
246 double ctrl_area;
247 double SerDer_area;

248

~~218~~ /* Assuming PCIe is bit-slice based architecture
~~219~~ * This is the reason for /8 in both area and power calculation
~~220~~ * to get per lane numbers
~~221~~ */

249 /* Assuming PCIe is bit-slice based architecture
250 * This is the reason for /8 in both area and power calculation
251 * to get per lane numbers
252 */

253

~~223~~ set_pcie_param();
~~224~~ if (pciep.type == 0) //high performance NIU
~~225~~ {
~~226~~ //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate @ 65nm.
~~227~~ ctrl_area = (5.2 + 0.5)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
~~228~~ //Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm.
~~229~~ frontend_area = (5.2 + 0.1)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
~~230~~ //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm.
~~231~~ //SerDer is very hard to scale
~~232~~ SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065);
~~233~~ phy_area = frontend_area + SerDer_area;
~~234~~ //total area
~~235~~ //Power
~~236~~ //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
~~237~~ ctrl_dyn = 3.75e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
~~238~~ // //Cadence ChipEstimate using 65nm soft IP;
~~239~~ // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
~~240~~ //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
~~241~~ SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s
~~242~~ SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle

254 if (pciep.type == 0) { //high performance PCIe
255 //Area estimation based on average of die photo from Niagara 2 and
256 //Cadence ChipEstimate @ 65nm.
257 ctrl_area = (5.2 + 0.5) / 2 * (interface_ip.F_sz_um / 0.065) *
258 (interface_ip.F_sz_um / 0.065);
259 //Area estimation based on average of die photo from Niagara 2 and
260 //Cadence ChipEstimate hard IP @65nm.
261 //SerDer is very hard to scale
262 SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um /
263 0.065);//* (interface_ip.F_sz_um/0.065);
264 } else {
265 ctrl_area = 0.412 * (interface_ip.F_sz_um / 0.065) *
266 (interface_ip.F_sz_um / 0.065);
267 //Area estimation based on average of die photo from Niagara 2, and
268 //Cadence ChipEstimate @ 65nm.
269 SerDer_area = 0.36 * (interface_ip.F_sz_um / 0.065) *
270 (interface_ip.F_sz_um / 0.065);
271 }

272

~~244~~ //power_t.readOp.dynamic = (ctrl_dyn)*pciep.num_channels;
~~245~~ //Cadence ChipEstimate using 65nm
~~246~~ ctrl_gates = 900000/8*pciep.num_channels;
~~247~~ // frontend_gates = 120000/8;
~~248~~ // SerDer_gates = 200000/8;
~~249~~ NMOS_sizing = 5*g_tp.min_w_nmos_;
~~250~~ PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
~~251~~ }
~~252~~ else
~~253~~ {
~~254~~ ctrl_area = 0.412 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
~~255~~ //Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm.
~~256~~ SerDer_area = 0.36 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
~~257~~ //total area
~~258~~ //Power
~~259~~ //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
~~260~~ ctrl_dyn = 2.21e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
~~261~~ // //Cadence ChipEstimate using 65nm soft IP;
~~262~~ // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
~~263~~ //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
~~264~~ SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s
~~265~~ SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle

273 // Total area
274 output_data.area = ((ctrl_area + (pciep.withPHY ? SerDer_area : 0)) / 8 *
275 pciep.num_channels) * 1e6;
276}

277

~~267~~ //Cadence ChipEstimate using 65nm
~~268~~ ctrl_gates = 200000/8*pciep.num_channels;
~~269~~ // frontend_gates = 120000/8;
~~270~~ SerDer_gates = 200000/8*pciep.num_channels;
~~271~~ NMOS_sizing = g_tp.min_w_nmos_;
~~272~~ PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;

278void PCIeController::computeEnergy() {
279 double ctrl_dyn;
280 double SerDer_dyn;
281 double ctrl_gates;
282 double SerDer_gates = 0;
283 double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
284 double NMOS_sizing;
285 double PMOS_sizing;

286

~~274~~ }
~~275~~ area.set_area(((ctrl_area + (pciep.withPHY? SerDer_area:0))/8*pciep.num_channels)*1e6);
~~276~~ power_t.readOp.dynamic = (ctrl_dyn + (pciep.withPHY? SerDer_dyn:0))*pciep.num_channels;
~~277~~ power_t.readOp.leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
~~278~~ double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
~~279~~ power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
~~280~~ power_t.readOp.gate_leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
~~281~~ }

287 /* Assuming PCIe is bit-slice based architecture
288 * This is the reason for /8 in both area and power calculation
289 * to get per lane numbers
290 */

291

~~283~~void PCIeController::computeEnergy(bool is_tdp)
~~284~~{
~~285~~ if (is_tdp)
~~286~~ {

292 if (pciep.type == 0) { //high performance PCIe
293 //Power
294 //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
295 ctrl_dyn = 3.75e-9 / 8 * g_tp.peri_global.Vdd / 1.1 *
296 g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
297 // //Cadence ChipEstimate using 65nm soft IP;
298 // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
299 //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
300 //PCIe 2.0 max per lane speed is 4Gb/s
301 SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um /0.09) *
302 g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2;

303

304 //Cadence ChipEstimate using 65nm
305 ctrl_gates = 900000 / 8 * pciep.num_channels;
306 // frontend_gates = 120000/8;
307 // SerDer_gates = 200000/8;
308 NMOS_sizing = 5 * g_tp.min_w_nmos_;
309 PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
310 } else {
311 //Power
312 //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
313 ctrl_dyn = 2.21e-9 / 8 * g_tp.peri_global.Vdd / 1.1 *
314 g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
315 // //Cadence ChipEstimate using 65nm soft IP;
316 // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
317 //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
318 //PCIe 2.0 max per lane speed is 4Gb/s
319 SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um / 0.09) *
320 g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2;

321

~~289~~ power = power_t;
~~290~~ power.readOp.dynamic *= pciep.duty_cycle;

322 //Cadence ChipEstimate using 65nm
323 ctrl_gates = 200000 / 8 * pciep.num_channels;
324 // frontend_gates = 120000/8;
325 SerDer_gates = 200000 / 8 * pciep.num_channels;
326 NMOS_sizing = g_tp.min_w_nmos_;
327 PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;

328
329 }

~~293~~ else
~~294~~ {
~~295~~ rt_power = power_t;
~~296~~ rt_power.readOp.dynamic *= pciep.perc_load;
~~297~~ }

330
331 //covert to energy per clock cycle
332 SerDer_dyn /= pciep.clockRate;
333
334 power.readOp.dynamic = (ctrl_dyn + (pciep.withPHY ? SerDer_dyn : 0)) *
335 pciep.num_channels;
336 power.readOp.leakage = (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) *
337 cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
338 g_tp.peri_global.Vdd;//unit W
339 double long_channel_device_reduction =
340 longer_channel_device_reduction(Uncore_device);
341 power.readOp.longer_channel_leakage =
342 power.readOp.leakage * long_channel_device_reduction;
343 power.readOp.gate_leakage = (ctrl_gates +
344 (pciep.withPHY ? SerDer_gates : 0)) *
345 cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
346 g_tp.peri_global.Vdd;//unit W
347
348 // Output power
349 output_data.subthreshold_leakage_power =
350 longer_channel_device ? power.readOp.longer_channel_leakage :
351 power.readOp.leakage;
352 output_data.gate_leakage_power = power.readOp.gate_leakage;
353 output_data.peak_dynamic_power = power.readOp.dynamic * pcies.duty_cycle;
354 output_data.runtime_dynamic_energy =
355 power.readOp.dynamic * pcies.perc_load;

356}
357

~~300~~void PCIeController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
~~301~~{
~~302~~ string indent_str(indent, ' ');
~~303~~ string indent_str_next(indent+2, ' ');
~~304~~ bool long_channel = XML->sys.longer_channel_device;

358void PCIeController::set_pcie_param() {
359 int num_children = xml_data->nChildNode("param");
360 int i;
361 for (i = 0; i < num_children; i++) {
362 XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
363 XMLCSTR node_name = paramNode->getAttribute("name");
364 XMLCSTR value = paramNode->getAttribute("value");

365

~~306~~ if (is_tdp)
~~307~~ {
~~308~~ cout << "PCIe:" << endl;
~~309~~ cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
~~310~~ cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*pciep.clockRate << " W" << endl;
~~311~~ cout << indent_str<< "Subthreshold Leakage = "
~~312~~ << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
~~313~~ //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
~~314~~ cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
~~315~~ cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*pciep.clockRate << " W" << endl;
~~316~~ cout<<endl;
~~317~~ }
~~318~~ else
~~319~~ {

366 if (!node_name)
367 warnMissingParamName(paramNode->getAttribute("id"));

368

369 ASSIGN_FP_IF("pcie_clockRate", pciep.clockRate);
370 ASSIGN_INT_IF("num_units", pciep.num_units);
371 ASSIGN_INT_IF("num_channels", pciep.num_channels);
372 ASSIGN_INT_IF("type", pciep.type);
373 ASSIGN_ENUM_IF("withPHY", pciep.withPHY, bool);
374
375 else {
376 warnUnrecognizedParam(node_name);

377 }

378 }

379

~~323~~}

380 // Change from MHz to Hz
381 pciep.clockRate *= 1e6;

382

~~325~~void PCIeController::set_pcie_param()
~~326~~{
~~327~~ pciep.clockRate = XML->sys.pcie.clockrate;
~~328~~ pciep.clockRate *= 1e6;
~~329~~ pciep.num_units = XML->sys.pcie.number_units;
~~330~~ pciep.num_channels = XML->sys.pcie.num_channels;
~~331~~ pciep.duty_cycle = XML->sys.pcie.duty_cycle;
~~332~~ pciep.perc_load = XML->sys.pcie.total_load_perc;
~~333~~ pciep.type = XML->sys.pcie.type;
~~334~~ pciep.withPHY = XML->sys.pcie.withPHY;
~~335~~// pciep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);

383 num_children = xml_data->nChildNode("stat");
384 for (i = 0; i < num_children; i++) {
385 XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
386 XMLCSTR node_name = statNode->getAttribute("name");
387 XMLCSTR value = statNode->getAttribute("value");

388

389 if (!node_name)
390 warnMissingStatName(statNode->getAttribute("id"));
391
392 ASSIGN_FP_IF("duty_cycle", pcies.duty_cycle);
393 ASSIGN_FP_IF("perc_load", pcies.perc_load);
394
395 else {
396 warnUnrecognizedStat(node_name);
397 }
398 }

399}
400

~~339~~FlashController::FlashController(ParseXML *XML_interface,InputParameter* interface_ip_)
~~340~~:XML(XML_interface),
~~341~~ interface_ip(*interface_ip_)
~~342~~ {
~~343~~ local_result = init_interface(&interface_ip);
~~344~~ double frontend_area, phy_area, ctrl_area, SerDer_area;
~~345~~ double ctrl_dyn, frontend_dyn, SerDer_dyn;
~~346~~ double ctrl_gates,frontend_gates, SerDer_gates;
~~347~~ double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
~~348~~ double NMOS_sizing, PMOS_sizing;

401FlashController::FlashController(XMLNode* _xml_data,
402 InputParameter* interface_ip_)
403 : McPATComponent(_xml_data, interface_ip_) {
404 name = "Flash Controller";
405 set_fc_param();
406}

407

~~350~~ /* Assuming PCIe is bit-slice based architecture
~~351~~ * This is the reason for /8 in both area and power calculation
~~352~~ * to get per lane numbers
~~353~~ */

408void FlashController::computeArea() {
409 double ctrl_area;
410 double SerDer_area;

411

~~355~~ set_fc_param();
~~356~~ if (fcp.type == 0) //high performance NIU
~~357~~ {
~~358~~ cout<<"Current McPAT does not support high performance flash contorller since even low power designs are enough for maintain throughput"<<endl;
~~359~~ exit(0);
~~360~~ NMOS_sizing = 5*g_tp.min_w_nmos_;
~~361~~ PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
~~362~~ }
~~363~~ else
~~364~~ {
~~365~~ ctrl_area = 0.243 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
~~366~~ //Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL from CAST
~~367~~ SerDer_area = 0.36/8 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
~~368~~ //based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it support 8x lanes with each lane
~~369~~ //speed up to 250MB/s (PCIe1.1x) This is already saturate the 200MB/s of the flash controller core above.
~~370~~ ctrl_gates = 129267;
~~371~~ SerDer_gates = 200000/8;
~~372~~ NMOS_sizing = g_tp.min_w_nmos_;
~~373~~ PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;

412 /* Assuming Flash is bit-slice based architecture
413 * This is the reason for /8 in both area and power calculation
414 * to get per lane numbers
415 */

416

~~375~~ //Power
~~376~~ //Cadence ChipEstimate using 65nm the controller 125mW for every 200MB/s This is power not energy!
~~377~~ ctrl_dyn = 0.125*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
~~378~~ //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
~~379~~ SerDer_dyn = 0.01*1.6*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
~~380~~ //max Per controller speed is 1.6Gb/s (200MB/s)
~~381~~ }
~~382~~ double number_channel = 1+(fcp.num_channels-1)*0.2;
~~383~~ area.set_area((ctrl_area + (fcp.withPHY? SerDer_area:0))*1e6*number_channel);
~~384~~ power_t.readOp.dynamic = (ctrl_dyn + (fcp.withPHY? SerDer_dyn:0))*number_channel;
~~385~~ power_t.readOp.leakage = ((ctrl_gates + (fcp.withPHY? SerDer_gates:0))*number_channel)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
~~386~~ double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
~~387~~ power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
~~388~~ power_t.readOp.gate_leakage = ((ctrl_gates + (fcp.withPHY? SerDer_gates:0))*number_channel)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
~~389~~ }

417 if (fcp.type == 0) { //high performance flash controller
418 cout << "Current McPAT does not support high performance flash "
419 << "controller since even low power designs are enough for "
420 << "maintain throughput" <<endl;
421 exit(0);
422 } else {
423 ctrl_area = 0.243 * (interface_ip.F_sz_um / 0.065) *
424 (interface_ip.F_sz_um / 0.065);
425 //Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL
426 //from CAST
427 SerDer_area = 0.36 / 8 * (interface_ip.F_sz_um / 0.065) *
428 (interface_ip.F_sz_um / 0.065);
429 }

430

~~391~~void FlashController::computeEnergy(bool is_tdp)
~~392~~{
~~393~~ if (is_tdp)
~~394~~ {

431 double number_channel = 1 + (fcp.num_channels - 1) * 0.2;
432 output_data.area = (ctrl_area + (fcp.withPHY ? SerDer_area : 0)) *
433 1e6 * number_channel;
434}

435

436void FlashController::computeEnergy() {
437 double ctrl_dyn;
438 double SerDer_dyn;
439 double ctrl_gates;
440 double SerDer_gates;
441 double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
442 double NMOS_sizing;
443 double PMOS_sizing;

444

~~397~~ power = power_t;
~~398~~ power.readOp.dynamic *= fcp.duty_cycle;

445 /* Assuming Flash is bit-slice based architecture
446 * This is the reason for /8 in both area and power calculation
447 * to get per lane numbers
448 */

449

450 if (fcp.type == 0) { //high performance flash controller
451 cout << "Current McPAT does not support high performance flash "
452 << "controller since even low power designs are enough for "
453 << "maintain throughput" <<endl;
454 exit(0);
455 NMOS_sizing = 5 * g_tp.min_w_nmos_;
456 PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
457 } else {
458 //based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it
459 //support 8x lanes with each lane speed up to 250MB/s (PCIe1.1x).
460 //This is already saturate the 200MB/s of the flash controller core
461 //above.
462 ctrl_gates = 129267;
463 SerDer_gates = 200000 / 8;
464 NMOS_sizing = g_tp.min_w_nmos_;
465 PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
466
467 //Power
468 //Cadence ChipEstimate using 65nm the controller 125mW for every
469 //200MB/s This is power not energy!
470 ctrl_dyn = 0.125 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd /
471 1.1 * (interface_ip.F_sz_nm / 65.0);
472 //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
473 SerDer_dyn = 0.01 * 1.6 * (interface_ip.F_sz_um / 0.09) *
474 g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
475 //max Per controller speed is 1.6Gb/s (200MB/s)

476 }

~~401~~ else
~~402~~ {
~~403~~ rt_power = power_t;
~~404~~ rt_power.readOp.dynamic *= fcp.perc_load;
~~405~~ }

477
478 double number_channel = 1 + (fcp.num_channels - 1) * 0.2;
479 power.readOp.dynamic = (ctrl_dyn + (fcp.withPHY ? SerDer_dyn : 0)) *
480 number_channel;
481 power.readOp.leakage = ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) *
482 number_channel) *
483 cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
484 g_tp.peri_global.Vdd;//unit W
485 double long_channel_device_reduction =
486 longer_channel_device_reduction(Uncore_device);
487 power.readOp.longer_channel_leakage =
488 power.readOp.leakage * long_channel_device_reduction;
489 power.readOp.gate_leakage =
490 ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) * number_channel) *
491 cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
492 g_tp.peri_global.Vdd;//unit W
493
494 // Output power
495 output_data.subthreshold_leakage_power =
496 longer_channel_device ? power.readOp.longer_channel_leakage :
497 power.readOp.leakage;
498 output_data.gate_leakage_power = power.readOp.gate_leakage;
499 output_data.peak_dynamic_power = power.readOp.dynamic * fcs.duty_cycle;
500 output_data.runtime_dynamic_energy = power.readOp.dynamic * fcs.perc_load;

501}
502

~~408~~void FlashController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)

503void FlashController::set_fc_param()

504{

~~410~~ string indent_str(indent, ' ');
~~411~~ string indent_str_next(indent+2, ' ');
~~412~~ bool long_channel = XML->sys.longer_channel_device;

505 int num_children = xml_data->nChildNode("param");
506 int i;
507 for (i = 0; i < num_children; i++) {
508 XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
509 XMLCSTR node_name = paramNode->getAttribute("name");
510 XMLCSTR value = paramNode->getAttribute("value");

511

~~414~~ if (is_tdp)
~~415~~ {
~~416~~ cout << "Flash Controller:" << endl;
~~417~~ cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
~~418~~ cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" << endl;//no multiply of clock since this is power already
~~419~~ cout << indent_str<< "Subthreshold Leakage = "
~~420~~ << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
~~421~~ //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
~~422~~ cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
~~423~~ cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic << " W" << endl;
~~424~~ cout<<endl;
~~425~~ }
~~426~~ else
~~427~~ {

512 if (!node_name)
513 warnMissingParamName(paramNode->getAttribute("id"));

514

515 ASSIGN_INT_IF("num_channels", fcp.num_channels);
516 ASSIGN_INT_IF("type", fcp.type);
517 ASSIGN_ENUM_IF("withPHY", fcp.withPHY, bool);
518
519 else {
520 warnUnrecognizedParam(node_name);

521 }

522 }

523

~~431~~}

524 num_children = xml_data->nChildNode("stat");
525 for (i = 0; i < num_children; i++) {
526 XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
527 XMLCSTR node_name = statNode->getAttribute("name");
528 XMLCSTR value = statNode->getAttribute("value");

529

~~433~~void FlashController::set_fc_param()
~~434~~{
~~435~~// fcp.clockRate = XML->sys.flashc.mc_clock;
~~436~~// fcp.clockRate *= 1e6;
~~437~~ fcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate;
~~438~~ fcp.num_channels = ceil(fcp.peakDataTransferRate/200);
~~439~~ fcp.num_mcs = XML->sys.flashc.number_mcs;
~~440~~ fcp.duty_cycle = XML->sys.flashc.duty_cycle;
~~441~~ fcp.perc_load = XML->sys.flashc.total_load_perc;
~~442~~ fcp.type = XML->sys.flashc.type;
~~443~~ fcp.withPHY = XML->sys.flashc.withPHY;
~~444~~// flashcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);

530 if (!node_name)
531 warnMissingStatName(statNode->getAttribute("id"));

532

533 ASSIGN_FP_IF("duty_cycle", fcs.duty_cycle);
534 ASSIGN_FP_IF("perc_load", fcs.perc_load);
535
536 else {
537 warnUnrecognizedStat(node_name);
538 }
539 }

540}