memoryctrl.cc revision 10152
1/***************************************************************************** 2 * McPAT 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. 5 * All Rights Reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer; 11 * redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution; 14 * neither the name of the copyright holders nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 * 30 ***************************************************************************/ 31#include <algorithm> 32#include <cassert> 33#include <cmath> 34#include <iostream> 35#include <string> 36 37#include "XML_Parse.h" 38#include "basic_circuit.h" 39#include "basic_components.h" 40#include "const.h" 41#include "io.h" 42#include "logic.h" 43#include "memoryctrl.h" 44#include "parameter.h" 45 46/* overview of MC models: 47 * McPAT memory controllers are modeled according to large number of industrial data points. 48 * The Basic memory controller architecture is base on the Synopsis designs 49 * (DesignWare DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite protocol controllers) 50 * as in Cadence ChipEstimator Tool 51 * 52 * An MC has 3 parts as shown in this design. McPAT models both high performance MC 53 * based on Niagara processor designs and curving and low power MC based on data points in 54 * Cadence ChipEstimator Tool. 55 * 56 * The frontend is modeled analytically, the backend is modeled empirically according to 57 * DDR2/DDR3-Lite protocol controllers in Cadence ChipEstimator Tool 58 * The PHY is modeled based on 59 * "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006, 60 * and A 14mW 6.25Gb/s Transceiver in 90nm CMOS for Serial Chip-to-Chip Communication," ISSCC 2007 61 * 62 * In Cadence ChipEstimator Tool there are two types of memory controllers: the full memory controllers 63 * that includes the frontend as the DesignWare DDR2/DDR3-Lite memory controllers and the backend only 64 * memory controllers as the DDR2/DDR3-Lite protocol controllers (except DesignWare DDR2/DDR3-Lite memory 65 * controllers, all memory controller IP in Cadence ChipEstimator Tool are backend memory controllers such as 66 * DDRC 1600A and DDRC 800A). Thus,to some extend the area and power difference between DesignWare 67 * DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite protocol controllers can be an estimation to the 68 * frontend power and area, which is very close the analitically modeled results of the frontend for Niagara2@65nm 69 * 70 */ 71 72MCBackend::MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_) 73:l_ip(*interface_ip_), 74 mc_type(mc_type_), 75 mcp(mcp_) 76{ 77 78 local_result = init_interface(&l_ip); 79 compute(); 80 81} 82 83 84void MCBackend::compute() 85{ 86 //double max_row_addr_width = 20.0;//Current address 12~18bits 87 double C_MCB, mc_power, backend_dyn, backend_gates;//, refresh_period,refresh_freq;//Equivalent per bit Cap for backend, 88 double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); 89 double NMOS_sizing, PMOS_sizing; 90 91 if (mc_type == MC) 92 { 93 if (mcp.type == 0) 94 { 95 //area = (2.2927*log(peakDataTransferRate)-14.504)*memDataWidth/144.0*(l_ip.F_sz_um/0.09); 96 area.set_area((2.7927*log(mcp.peakDataTransferRate*2)-19.862)/2.0*mcp.dataBusWidth/128.0*(l_ip.F_sz_um/0.09)*mcp.num_channels*1e6);//um^2 97 //assuming the approximately same scaling factor as seen in processors. 98 //C_MCB=0.2/1.3/1.3/266/64/0.09*g_ip.F_sz_um;//based on AMD Geode processor which has a very basic mc on chip. 99 //C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process. 100 //mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et 101 mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend 102 C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065; 103 power_t.readOp.dynamic = C_MCB*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(mcp.dataBusWidth/*+mcp.addressBusWidth*/);//per access energy in memory controller 104 power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W 105 power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W 106 107 } 108 else 109 { NMOS_sizing = g_tp.min_w_nmos_; 110 PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; 111 area.set_area(0.15*mcp.dataBusWidth/72.0*(l_ip.F_sz_um/0.065)* (l_ip.F_sz_um/0.065)*mcp.num_channels*1e6);//um^2 112 backend_dyn = 0.9e-9/800e6*mcp.clockRate/12800*mcp.peakDataTransferRate*mcp.dataBusWidth/72.0*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(l_ip.F_sz_nm/65.0);//Average on DDR2/3 protocol controller and DDRC 1600/800A in Cadence ChipEstimate 113 //Scaling to technology and DIMM feature. The base IP support DDR3-1600(PC3 12800) 114 backend_gates = 50000*mcp.dataBusWidth/64.0;//5000 is from Cadence ChipEstimator 115 116 power_t.readOp.dynamic = backend_dyn; 117 power_t.readOp.leakage = (backend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W 118 power_t.readOp.gate_leakage = (backend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W 119 120 } 121 } 122 else 123 {//skip old model 124 cout<<"Unknown memory controllers"<<endl;exit(0); 125 area.set_area(0.243*mcp.dataBusWidth/8);//area based on Cadence ChipEstimator for 8bit bus 126 //mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend 127 C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065; 128 power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W 129 power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W 130 power_t.readOp.dynamic *= 1.2; 131 power_t.readOp.leakage *= 1.2; 132 power_t.readOp.gate_leakage *= 1.2; 133 //flash controller has about 20% more backend power since BCH ECC in flash is complex and power hungry 134 } 135 double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); 136 power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction; 137} 138 139void MCBackend::computeEnergy(bool is_tdp) 140{ 141 //backend uses internal data buswidth 142 if (is_tdp) 143 { 144 //init stats for Peak 145 stats_t.readAc.access = 0.5*mcp.num_channels; 146 stats_t.writeAc.access = 0.5*mcp.num_channels; 147 tdp_stats = stats_t; 148 } 149 else 150 { 151 //init stats for runtime power (RTP) 152 stats_t.readAc.access = mcp.reads; 153 stats_t.writeAc.access = mcp.writes; 154 tdp_stats = stats_t; 155 } 156 if (is_tdp) 157 { 158 power = power_t; 159 power.readOp.dynamic = (stats_t.readAc.access + stats_t.writeAc.access)*power_t.readOp.dynamic; 160 161 } 162 else 163 { 164 rt_power.readOp.dynamic = (stats_t.readAc.access + stats_t.writeAc.access)*mcp.llcBlockSize*8.0/mcp.dataBusWidth*power_t.readOp.dynamic; 165 rt_power = rt_power + power_t*pppm_lkg; 166 rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime; 167 //Assume 10% of peak power is consumed by routine job including memory refreshing and scrubbing 168 } 169} 170 171 172MCPHY::MCPHY(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_) 173:l_ip(*interface_ip_), 174 mc_type(mc_type_), 175 mcp(mcp_) 176{ 177 178 local_result = init_interface(&l_ip); 179 compute(); 180} 181 182void MCPHY::compute() 183{ 184 //PHY uses internal data buswidth but the actuall off-chip datawidth is 64bits + ecc 185 double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio() ; 186 /* 187 * according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006; 188 * From Cadence ChipEstimator for normal I/O around 0.4~0.8 mW/Gb/s 189 */ 190 double power_per_gb_per_s, phy_dyn,phy_gates, NMOS_sizing, PMOS_sizing; 191 192 if (mc_type == MC) 193 { 194 if (mcp.type == 0) 195 { 196 power_per_gb_per_s = mcp.LVDS? 0.01:0.04; 197 //Based on die photos from Niagara 1 and 2. 198 //TODO merge this into undifferentiated core.PHY only achieves square root of the ideal scaling. 199 //area = (6.4323*log(peakDataTransferRate)-34.76)*memDataWidth/128.0*(l_ip.F_sz_um/0.09); 200 area.set_area((6.4323*log(mcp.peakDataTransferRate*2)-48.134)*mcp.dataBusWidth/128.0*(l_ip.F_sz_um/0.09)*mcp.num_channels*1e6/2);//TODO:/2 201 //This is from curve fitting based on Niagara 1 and 2's PHY die photo. 202 //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down 203 //power.readOp.dynamic = 0.02*memAccesses*llcBlocksize*8;//change from Bytes to bits. 204 power_t.readOp.dynamic = power_per_gb_per_s*sqrt(l_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2; 205 power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W 206 power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W 207 208 } 209 else 210 { 211 NMOS_sizing = g_tp.min_w_nmos_; 212 PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; 213 //Designware/synopsis 16bit DDR3 PHY is 1.3mm (WITH IOs) at 40nm for upto DDR3 2133 (PC3 17066) 214 double non_IO_percentage = 0.2; 215 area.set_area(1.3*non_IO_percentage/2133.0e6*mcp.clockRate/17066*mcp.peakDataTransferRate*mcp.dataBusWidth/16.0*(l_ip.F_sz_um/0.040)* (l_ip.F_sz_um/0.040)*mcp.num_channels*1e6);//um^2 216 phy_gates = 200000*mcp.dataBusWidth/64.0; 217 power_per_gb_per_s = 0.01; 218 //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down 219 power_t.readOp.dynamic = power_per_gb_per_s*(l_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2; 220 power_t.readOp.leakage = (mcp.withPHY? phy_gates:0)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W 221 power_t.readOp.gate_leakage = (mcp.withPHY? phy_gates:0)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W 222 } 223 224 } 225 else 226 { 227 area.set_area(0.4e6/2*mcp.dataBusWidth/8);//area based on Cadence ChipEstimator for 8bit bus 228 } 229 230// double phy_factor = (int)ceil(mcp.dataBusWidth/72.0);//Previous phy power numbers are based on 72 bit DIMM interface 231// power_t.readOp.dynamic *= phy_factor; 232// power_t.readOp.leakage *= phy_factor; 233// power_t.readOp.gate_leakage *= phy_factor; 234 235 double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); 236 power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction; 237} 238 239 240void MCPHY::computeEnergy(bool is_tdp) 241{ 242 if (is_tdp) 243 { 244 //init stats for Peak 245 stats_t.readAc.access = 0.5*mcp.num_channels; //time share on buses 246 stats_t.writeAc.access = 0.5*mcp.num_channels; 247 tdp_stats = stats_t; 248 } 249 else 250 { 251 //init stats for runtime power (RTP) 252 stats_t.readAc.access = mcp.reads; 253 stats_t.writeAc.access = mcp.writes; 254 tdp_stats = stats_t; 255 } 256 257 if (is_tdp) 258 { 259 double data_transfer_unit = (mc_type == MC)? 72:16;/*DIMM data width*/ 260 power = power_t; 261 power.readOp.dynamic = power.readOp.dynamic * (mcp.peakDataTransferRate*8*1e6/1e9/*change to Gbs*/)*mcp.dataBusWidth/data_transfer_unit*mcp.num_channels/mcp.clockRate; 262 // divide by clock rate is for match the final computation where *clock is used 263 //(stats_t.readAc.access*power_t.readOp.dynamic+ 264// stats_t.writeAc.access*power_t.readOp.dynamic); 265 266 } 267 else 268 { 269 rt_power = power_t; 270// rt_power.readOp.dynamic = (stats_t.readAc.access*power_t.readOp.dynamic+ 271// stats_t.writeAc.access*power_t.readOp.dynamic); 272 273 rt_power.readOp.dynamic=power_t.readOp.dynamic*(stats_t.readAc.access + stats_t.writeAc.access)*(mcp.llcBlockSize)*8/1e9/mcp.executionTime*(mcp.executionTime); 274 rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime; 275 } 276} 277 278MCFrontEnd::MCFrontEnd(ParseXML *XML_interface,InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_) 279:XML(XML_interface), 280 interface_ip(*interface_ip_), 281 mc_type(mc_type_), 282 mcp(mcp_), 283 MC_arb(0), 284 frontendBuffer(0), 285 readBuffer(0), 286 writeBuffer(0) 287{ 288 /* All computations are for a single MC 289 * 290 */ 291 292 int tag, data; 293 bool is_default =true;//indication for default setup 294 295 /* MC frontend engine channels share the same engines but logically partitioned 296 * For all hardware inside MC. different channels do not share resources. 297 * TODO: add docodeing/mux stage to steer memory requests to different channels. 298 */ 299 300 //memory request reorder buffer 301 tag = mcp.addressBusWidth + EXTRA_TAG_BITS + mcp.opcodeW; 302 data = int(ceil((XML->sys.physical_address_width + mcp.opcodeW)/8.0)); 303 interface_ip.cache_sz = data*XML->sys.mc.req_window_size_per_channel; 304 interface_ip.line_sz = data; 305 interface_ip.assoc = 0; 306 interface_ip.nbanks = 1; 307 interface_ip.out_w = interface_ip.line_sz*8; 308 interface_ip.specific_tag = 1; 309 interface_ip.tag_w = tag; 310 interface_ip.access_mode = 0; 311 interface_ip.throughput = 1.0/mcp.clockRate; 312 interface_ip.latency = 1.0/mcp.clockRate; 313 interface_ip.is_cache = true; 314 interface_ip.pure_cam = false; 315 interface_ip.pure_ram = false; 316 interface_ip.obj_func_dyn_energy = 0; 317 interface_ip.obj_func_dyn_power = 0; 318 interface_ip.obj_func_leak_power = 0; 319 interface_ip.obj_func_cycle_t = 1; 320 interface_ip.num_rw_ports = 0; 321 interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; 322 interface_ip.num_wr_ports = interface_ip.num_rd_ports; 323 interface_ip.num_se_rd_ports = 0; 324 interface_ip.num_search_ports = XML->sys.mc.memory_channels_per_mc; 325 frontendBuffer = new ArrayST(&interface_ip, "MC ReorderBuffer", Uncore_device); 326 frontendBuffer->area.set_area(frontendBuffer->area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); 327 area.set_area(area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); 328 329 //selection and arbitration logic 330 MC_arb = new selection_logic(is_default, XML->sys.mc.req_window_size_per_channel,1,&interface_ip, Uncore_device); 331 332 //read buffers. 333 data = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte 334 interface_ip.cache_sz = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize; 335 interface_ip.line_sz = data; 336 interface_ip.assoc = 1; 337 interface_ip.nbanks = 1; 338 interface_ip.out_w = interface_ip.line_sz*8; 339 interface_ip.access_mode = 1; 340 interface_ip.throughput = 1.0/mcp.clockRate; 341 interface_ip.latency = 1.0/mcp.clockRate; 342 interface_ip.is_cache = false; 343 interface_ip.pure_cam = false; 344 interface_ip.pure_ram = true; 345 interface_ip.obj_func_dyn_energy = 0; 346 interface_ip.obj_func_dyn_power = 0; 347 interface_ip.obj_func_leak_power = 0; 348 interface_ip.obj_func_cycle_t = 1; 349 interface_ip.num_rw_ports = 0;//XML->sys.mc.memory_channels_per_mc*2>2?2:XML->sys.mc.memory_channels_per_mc*2; 350 interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; 351 interface_ip.num_wr_ports = interface_ip.num_rd_ports; 352 interface_ip.num_se_rd_ports = 0; 353 readBuffer = new ArrayST(&interface_ip, "MC ReadBuffer", Uncore_device); 354 readBuffer->area.set_area(readBuffer->area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); 355 area.set_area(area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); 356 357 //write buffer 358 data = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte 359 interface_ip.cache_sz = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize; 360 interface_ip.line_sz = data; 361 interface_ip.assoc = 1; 362 interface_ip.nbanks = 1; 363 interface_ip.out_w = interface_ip.line_sz*8; 364 interface_ip.access_mode = 0; 365 interface_ip.throughput = 1.0/mcp.clockRate; 366 interface_ip.latency = 1.0/mcp.clockRate; 367 interface_ip.obj_func_dyn_energy = 0; 368 interface_ip.obj_func_dyn_power = 0; 369 interface_ip.obj_func_leak_power = 0; 370 interface_ip.obj_func_cycle_t = 1; 371 interface_ip.num_rw_ports = 0; 372 interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; 373 interface_ip.num_wr_ports = interface_ip.num_rd_ports; 374 interface_ip.num_se_rd_ports = 0; 375 writeBuffer = new ArrayST(&interface_ip, "MC writeBuffer", Uncore_device); 376 writeBuffer->area.set_area(writeBuffer->area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); 377 area.set_area(area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); 378} 379 380void MCFrontEnd::computeEnergy(bool is_tdp) 381{ 382 if (is_tdp) 383 { 384 //init stats for Peak 385 frontendBuffer->stats_t.readAc.access = frontendBuffer->l_ip.num_search_ports; 386 frontendBuffer->stats_t.writeAc.access = frontendBuffer->l_ip.num_wr_ports; 387 frontendBuffer->tdp_stats = frontendBuffer->stats_t; 388 389 readBuffer->stats_t.readAc.access = readBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle; 390 readBuffer->stats_t.writeAc.access = readBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle; 391 readBuffer->tdp_stats = readBuffer->stats_t; 392 393 writeBuffer->stats_t.readAc.access = writeBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle; 394 writeBuffer->stats_t.writeAc.access = writeBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle; 395 writeBuffer->tdp_stats = writeBuffer->stats_t; 396 397 } 398 else 399 { 400 //init stats for runtime power (RTP) 401 frontendBuffer->stats_t.readAc.access = XML->sys.mc.memory_reads *mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72; 402 //For each channel, each memory word need to check the address data to achieve best scheduling results. 403 //and this need to be done on all physical DIMMs in each logical memory DIMM *mcp.dataBusWidth/72 404 frontendBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72; 405 frontendBuffer->rtp_stats = frontendBuffer->stats_t; 406 407 readBuffer->stats_t.readAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first 408 readBuffer->stats_t.writeAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first 409 readBuffer->rtp_stats = readBuffer->stats_t; 410 411 writeBuffer->stats_t.readAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth; 412 writeBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth; 413 writeBuffer->rtp_stats = writeBuffer->stats_t; 414 } 415 416 frontendBuffer->power_t.reset(); 417 readBuffer->power_t.reset(); 418 writeBuffer->power_t.reset(); 419 420// frontendBuffer->power_t.readOp.dynamic += (frontendBuffer->stats_t.readAc.access* 421// (frontendBuffer->local_result.power.searchOp.dynamic+frontendBuffer->local_result.power.readOp.dynamic)+ 422// frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic); 423 424 frontendBuffer->power_t.readOp.dynamic += (frontendBuffer->stats_t.readAc.access + 425 frontendBuffer->stats_t.writeAc.access)*frontendBuffer->local_result.power.searchOp.dynamic 426 + frontendBuffer->stats_t.readAc.access * frontendBuffer->local_result.power.readOp.dynamic 427 + frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic; 428 429 readBuffer->power_t.readOp.dynamic += (readBuffer->stats_t.readAc.access* 430 readBuffer->local_result.power.readOp.dynamic+ 431 readBuffer->stats_t.writeAc.access*readBuffer->local_result.power.writeOp.dynamic); 432 writeBuffer->power_t.readOp.dynamic += (writeBuffer->stats_t.readAc.access* 433 writeBuffer->local_result.power.readOp.dynamic+ 434 writeBuffer->stats_t.writeAc.access*writeBuffer->local_result.power.writeOp.dynamic); 435 436 if (is_tdp) 437 { 438 power = power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t + 439 (frontendBuffer->local_result.power + 440 readBuffer->local_result.power + 441 writeBuffer->local_result.power)*pppm_lkg; 442 443 } 444 else 445 { 446 rt_power = rt_power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t + 447 (frontendBuffer->local_result.power + 448 readBuffer->local_result.power + 449 writeBuffer->local_result.power)*pppm_lkg; 450 rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime; 451 } 452} 453 454void MCFrontEnd::displayEnergy(uint32_t indent,int plevel,bool is_tdp) 455{ 456 string indent_str(indent, ' '); 457 string indent_str_next(indent+2, ' '); 458 459 if (is_tdp) 460 { 461 cout << indent_str << "Front End ROB:" << endl; 462 cout << indent_str_next << "Area = " << frontendBuffer->area.get_area()*1e-6<< " mm^2" << endl; 463 cout << indent_str_next << "Peak Dynamic = " << frontendBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl; 464 cout << indent_str_next << "Subthreshold Leakage = " << frontendBuffer->power.readOp.leakage <<" W" << endl; 465 cout << indent_str_next << "Gate Leakage = " << frontendBuffer->power.readOp.gate_leakage << " W" << endl; 466 cout << indent_str_next << "Runtime Dynamic = " << frontendBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; 467 468 cout <<endl; 469 cout << indent_str<< "Read Buffer:" << endl; 470 cout << indent_str_next << "Area = " << readBuffer->area.get_area()*1e-6 << " mm^2" << endl; 471 cout << indent_str_next << "Peak Dynamic = " << readBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl; 472 cout << indent_str_next << "Subthreshold Leakage = " << readBuffer->power.readOp.leakage << " W" << endl; 473 cout << indent_str_next << "Gate Leakage = " << readBuffer->power.readOp.gate_leakage << " W" << endl; 474 cout << indent_str_next << "Runtime Dynamic = " << readBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; 475 cout <<endl; 476 cout << indent_str << "Write Buffer:" << endl; 477 cout << indent_str_next << "Area = " << writeBuffer->area.get_area() *1e-6 << " mm^2" << endl; 478 cout << indent_str_next << "Peak Dynamic = " << writeBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl; 479 cout << indent_str_next << "Subthreshold Leakage = " << writeBuffer->power.readOp.leakage << " W" << endl; 480 cout << indent_str_next << "Gate Leakage = " << writeBuffer->power.readOp.gate_leakage << " W" << endl; 481 cout << indent_str_next << "Runtime Dynamic = " << writeBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; 482 cout <<endl; 483 } 484 else 485 { 486 cout << indent_str << "Front End ROB:" << endl; 487 cout << indent_str_next << "Area = " << frontendBuffer->area.get_area()*1e-6<< " mm^2" << endl; 488 cout << indent_str_next << "Peak Dynamic = " << frontendBuffer->rt_power.readOp.dynamic*mcp.clockRate << " W" << endl; 489 cout << indent_str_next << "Subthreshold Leakage = " << frontendBuffer->rt_power.readOp.leakage <<" W" << endl; 490 cout << indent_str_next << "Gate Leakage = " << frontendBuffer->rt_power.readOp.gate_leakage << " W" << endl; 491 cout <<endl; 492 cout << indent_str<< "Read Buffer:" << endl; 493 cout << indent_str_next << "Area = " << readBuffer->area.get_area()*1e-6 << " mm^2" << endl; 494 cout << indent_str_next << "Peak Dynamic = " << readBuffer->rt_power.readOp.dynamic*mcp.clockRate << " W" << endl; 495 cout << indent_str_next << "Subthreshold Leakage = " << readBuffer->rt_power.readOp.leakage << " W" << endl; 496 cout << indent_str_next << "Gate Leakage = " << readBuffer->rt_power.readOp.gate_leakage << " W" << endl; 497 cout <<endl; 498 cout << indent_str << "Write Buffer:" << endl; 499 cout << indent_str_next << "Area = " << writeBuffer->area.get_area() *1e-6 << " mm^2" << endl; 500 cout << indent_str_next << "Peak Dynamic = " << writeBuffer->rt_power.readOp.dynamic*mcp.clockRate << " W" << endl; 501 cout << indent_str_next << "Subthreshold Leakage = " << writeBuffer->rt_power.readOp.leakage << " W" << endl; 502 cout << indent_str_next << "Gate Leakage = " << writeBuffer->rt_power.readOp.gate_leakage << " W" << endl; 503 } 504 505} 506 507 508MemoryController::MemoryController(ParseXML *XML_interface,InputParameter* interface_ip_, enum MemoryCtrl_type mc_type_) 509:XML(XML_interface), 510 interface_ip(*interface_ip_), 511 mc_type(mc_type_), 512 frontend(0), 513 transecEngine(0), 514 PHY(0), 515 pipeLogic(0) 516{ 517 /* All computations are for a single MC 518 * 519 */ 520 interface_ip.wire_is_mat_type = 2; 521 interface_ip.wire_os_mat_type = 2; 522 interface_ip.wt =Global; 523 set_mc_param(); 524 frontend = new MCFrontEnd(XML, &interface_ip, mcp, mc_type); 525 area.set_area(area.get_area()+ frontend->area.get_area()); 526 transecEngine = new MCBackend(&interface_ip, mcp, mc_type); 527 area.set_area(area.get_area()+ transecEngine->area.get_area()); 528 if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) 529 { 530 PHY = new MCPHY(&interface_ip, mcp, mc_type); 531 area.set_area(area.get_area()+ PHY->area.get_area()); 532 } 533 //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. 534// transecEngine.initialize(&interface_ip); 535// transecEngine.peakDataTransferRate = XML->sys.mem.peak_transfer_rate; 536// transecEngine.memDataWidth = dataBusWidth; 537// transecEngine.memRank = XML->sys.mem.number_ranks; 538// //transecEngine.memAccesses=XML->sys.mc.memory_accesses; 539// //transecEngine.llcBlocksize=llcBlockSize; 540// transecEngine.compute(); 541// transecEngine.area.set_area(XML->sys.mc.memory_channels_per_mc*transecEngine.area.get_area()) ; 542// area.set_area(area.get_area()+ transecEngine.area.get_area()); 543// ///cout<<"area="<<area<<endl; 544//// 545// //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers 546// PHY.initialize(&interface_ip); 547// PHY.peakDataTransferRate = XML->sys.mem.peak_transfer_rate; 548// PHY.memDataWidth = dataBusWidth; 549// //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power 550// //PHY.llcBlocksize=llcBlockSize; 551// PHY.compute(); 552// PHY.area.set_area(XML->sys.mc.memory_channels_per_mc*PHY.area.get_area()) ; 553// area.set_area(area.get_area()+ PHY.area.get_area()); 554 ///cout<<"area="<<area<<endl; 555// 556// interface_ip.pipeline_stages = 5;//normal memory controller has five stages in the pipeline. 557// interface_ip.per_stage_vector = addressBusWidth + XML->sys.core[0].opcode_width + dataBusWidth; 558// pipeLogic = new pipeline(is_default, &interface_ip); 559// //pipeLogic.init_pipeline(is_default, &interface_ip); 560// pipeLogic->compute_pipeline(); 561// area.set_area(area.get_area()+ pipeLogic->area.get_area()*1e-6); 562// area.set_area((area.get_area()+mc_area*1e-6)*1.1);//placement and routing overhead 563// 564// 565//// //clock 566//// clockNetwork.init_wire_external(is_default, &interface_ip); 567//// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb 568//// clockNetwork.end_wiring_level =5;//toplevel metal 569//// clockNetwork.start_wiring_level =5;//toplevel metal 570//// clockNetwork.num_regs = pipeLogic.tot_stage_vector; 571//// clockNetwork.optimize_wire(); 572 573 574} 575void MemoryController::computeEnergy(bool is_tdp) 576{ 577 578 frontend->computeEnergy(is_tdp); 579 transecEngine->computeEnergy(is_tdp); 580 if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) 581 { 582 PHY->computeEnergy(is_tdp); 583 } 584 if (is_tdp) 585 { 586 power = power + frontend->power + transecEngine->power; 587 if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) 588 { 589 power = power + PHY->power; 590 } 591 } 592 else 593 { 594 rt_power = rt_power + frontend->rt_power + transecEngine->rt_power; 595 if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) 596 { 597 rt_power = rt_power + PHY->rt_power; 598 } 599 } 600} 601 602void MemoryController::displayEnergy(uint32_t indent,int plevel,bool is_tdp) 603{ 604 string indent_str(indent, ' '); 605 string indent_str_next(indent+2, ' '); 606 bool long_channel = XML->sys.longer_channel_device; 607 608 if (is_tdp) 609 { 610 cout << "Memory Controller:" << endl; 611 cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl; 612 cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*mcp.clockRate << " W" << endl; 613 cout << indent_str<< "Subthreshold Leakage = " 614 << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; 615 //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; 616 cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; 617 cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; 618 cout<<endl; 619 cout << indent_str << "Front End Engine:" << endl; 620 cout << indent_str_next << "Area = " << frontend->area.get_area()*1e-6<< " mm^2" << endl; 621 cout << indent_str_next << "Peak Dynamic = " << frontend->power.readOp.dynamic*mcp.clockRate << " W" << endl; 622 cout << indent_str_next << "Subthreshold Leakage = " 623 << (long_channel? frontend->power.readOp.longer_channel_leakage:frontend->power.readOp.leakage) <<" W" << endl; 624 cout << indent_str_next << "Gate Leakage = " << frontend->power.readOp.gate_leakage << " W" << endl; 625 cout << indent_str_next << "Runtime Dynamic = " << frontend->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; 626 cout <<endl; 627 if (plevel >2){ 628 frontend->displayEnergy(indent+4,is_tdp); 629 } 630 cout << indent_str << "Transaction Engine:" << endl; 631 cout << indent_str_next << "Area = " << transecEngine->area.get_area()*1e-6<< " mm^2" << endl; 632 cout << indent_str_next << "Peak Dynamic = " << transecEngine->power.readOp.dynamic*mcp.clockRate << " W" << endl; 633 cout << indent_str_next << "Subthreshold Leakage = " 634 << (long_channel? transecEngine->power.readOp.longer_channel_leakage:transecEngine->power.readOp.leakage) <<" W" << endl; 635 cout << indent_str_next << "Gate Leakage = " << transecEngine->power.readOp.gate_leakage << " W" << endl; 636 cout << indent_str_next << "Runtime Dynamic = " << transecEngine->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; 637 cout <<endl; 638 if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) 639 { 640 cout << indent_str << "PHY:" << endl; 641 cout << indent_str_next << "Area = " << PHY->area.get_area()*1e-6<< " mm^2" << endl; 642 cout << indent_str_next << "Peak Dynamic = " << PHY->power.readOp.dynamic*mcp.clockRate << " W" << endl; 643 cout << indent_str_next << "Subthreshold Leakage = " 644 << (long_channel? PHY->power.readOp.longer_channel_leakage:PHY->power.readOp.leakage) <<" W" << endl; 645 cout << indent_str_next << "Gate Leakage = " << PHY->power.readOp.gate_leakage << " W" << endl; 646 cout << indent_str_next << "Runtime Dynamic = " << PHY->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; 647 cout <<endl; 648 } 649 } 650 else 651 { 652 cout << "Memory Controller:" << endl; 653 cout << indent_str_next << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; 654 cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*mcp.clockRate << " W" << endl; 655 cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage <<" W" << endl; 656 cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; 657 cout<<endl; 658 } 659 660} 661 662void MemoryController::set_mc_param() 663{ 664 665 if (mc_type==MC) 666 { 667 mcp.clockRate =XML->sys.mc.mc_clock*2;//DDR double pumped 668 mcp.clockRate *= 1e6; 669 mcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); 670 671 mcp.llcBlockSize =int(ceil(XML->sys.mc.llc_line_length/8.0))+XML->sys.mc.llc_line_length;//ecc overhead 672 mcp.dataBusWidth =int(ceil(XML->sys.mc.databus_width/8.0)) + XML->sys.mc.databus_width; 673 mcp.addressBusWidth =int(ceil(XML->sys.mc.addressbus_width));//XML->sys.physical_address_width; 674 mcp.opcodeW =16; 675 mcp.num_mcs = XML->sys.mc.number_mcs; 676 mcp.num_channels = XML->sys.mc.memory_channels_per_mc; 677 mcp.reads = XML->sys.mc.memory_reads; 678 mcp.writes = XML->sys.mc.memory_writes; 679 //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. 680 mcp.peakDataTransferRate = XML->sys.mc.peak_transfer_rate; 681 mcp.memRank = XML->sys.mc.number_ranks; 682 //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers 683 //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power 684 //PHY.llcBlocksize=llcBlockSize; 685 mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared 686 mcp.LVDS = XML->sys.mc.LVDS; 687 mcp.type = XML->sys.mc.type; 688 mcp.withPHY = XML->sys.mc.withPHY; 689 } 690// else if (mc_type==FLASHC) 691// { 692// mcp.clockRate =XML->sys.flashc.mc_clock*2;//DDR double pumped 693// mcp.clockRate *= 1e6; 694// mcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); 695// 696// mcp.llcBlockSize =int(ceil(XML->sys.flashc.llc_line_length/8.0))+XML->sys.flashc.llc_line_length;//ecc overhead 697// mcp.dataBusWidth =int(ceil(XML->sys.flashc.databus_width/8.0)) + XML->sys.flashc.databus_width; 698// mcp.addressBusWidth =int(ceil(XML->sys.flashc.addressbus_width));//XML->sys.physical_address_width; 699// mcp.opcodeW =16; 700// mcp.num_mcs = XML->sys.flashc.number_mcs; 701// mcp.num_channels = XML->sys.flashc.memory_channels_per_mc; 702// mcp.reads = XML->sys.flashc.memory_reads; 703// mcp.writes = XML->sys.flashc.memory_writes; 704// //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. 705// mcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate; 706// mcp.memRank = XML->sys.flashc.number_ranks; 707// //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers 708// //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power 709// //PHY.llcBlocksize=llcBlockSize; 710// mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared 711// mcp.LVDS = XML->sys.flashc.LVDS; 712// mcp.type = XML->sys.flashc.type; 713// } 714 else 715 { 716 cout<<"Unknown memory controller type: neither DRAM controller nor Flash controller" <<endl; 717 exit(0); 718 } 719} 720 721MCFrontEnd ::~MCFrontEnd(){ 722 723 if(MC_arb) {delete MC_arb; MC_arb = 0;} 724 if(frontendBuffer) {delete frontendBuffer; frontendBuffer = 0;} 725 if(readBuffer) {delete readBuffer; readBuffer = 0;} 726 if(writeBuffer) {delete writeBuffer; writeBuffer = 0;} 727} 728 729MemoryController ::~MemoryController(){ 730 731 if(frontend) {delete frontend; frontend = 0;} 732 if(transecEngine) {delete transecEngine; transecEngine = 0;} 733 if(PHY) {delete PHY; PHY = 0;} 734 if(pipeLogic) {delete pipeLogic; pipeLogic = 0;} 735} 736 737