1/***************************************************************************** 2 * McPAT 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. 5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. 6 * All Rights Reserved 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are 10 * met: redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer; 12 * redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution; 15 * neither the name of the copyright holders nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 ***************************************************************************/ 32 33#include <algorithm> 34#include <cassert> 35#include <cmath> 36#include <iostream> 37#include <string> 38 39#include "basic_circuit.h" 40#include "basic_components.h" 41#include "common.h" 42#include "const.h" 43#include "io.h" 44#include "logic.h" 45#include "memoryctrl.h" 46#include "parameter.h" 47 48/* overview of MC models: 49 * McPAT memory controllers are modeled according to large number of industrial data points. 50 * The Basic memory controller architecture is base on the Synopsis designs 51 * (DesignWare DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite protocol controllers) 52 * as in Cadence ChipEstimator Tool 53 * 54 * An MC has 3 parts as shown in this design. McPAT models both high performance MC 55 * based on Niagara processor designs and curving and low power MC based on data points in 56 * Cadence ChipEstimator Tool. 57 * 58 * The frontend is modeled analytically, the backend is modeled empirically according to 59 * DDR2/DDR3-Lite protocol controllers in Cadence ChipEstimator Tool 60 * The PHY is modeled based on 61 * "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006, 62 * and A 14mW 6.25Gb/s Transceiver in 90nm CMOS for Serial Chip-to-Chip Communication," ISSCC 2007 63 * 64 * In Cadence ChipEstimator Tool there are two types of memory controllers: the full memory controllers 65 * that includes the frontend as the DesignWare DDR2/DDR3-Lite memory controllers and the backend only 66 * memory controllers as the DDR2/DDR3-Lite protocol controllers (except DesignWare DDR2/DDR3-Lite memory 67 * controllers, all memory controller IP in Cadence ChipEstimator Tool are backend memory controllers such as 68 * DDRC 1600A and DDRC 800A). Thus,to some extend the area and power difference between DesignWare 69 * DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite protocol controllers can be an estimation to the 70 * frontend power and area, which is very close the analitically modeled results of the frontend for Niagara2@65nm 71 * 72 */ 73 74MCBackend::MCBackend(XMLNode* _xml_data, InputParameter* interface_ip_, 75 const MCParameters & mcp_, const MCStatistics & mcs_) 76 : McPATComponent(_xml_data), l_ip(*interface_ip_), mcp(mcp_), mcs(mcs_) { 77 name = "Transaction Engine"; 78 local_result = init_interface(&l_ip, name); 79 80 // Set up stats for the power calculations 81 tdp_stats.reset(); 82 tdp_stats.readAc.access = 0.5 * mcp.num_channels * mcp.clockRate; 83 tdp_stats.writeAc.access = 0.5 * mcp.num_channels * mcp.clockRate; 84 rtp_stats.reset(); 85 rtp_stats.readAc.access = mcs.reads; 86 rtp_stats.writeAc.access = mcs.writes; 87} 88 89void MCBackend::computeArea() { 90 // The area is in nm^2 91 if (mcp.mc_type == MC) { 92 if (mcp.type == 0) { 93 output_data.area = (2.7927 * log(mcp.peak_transfer_rate * 2) - 94 19.862) / 2.0 * mcp.dataBusWidth / 128.0 * 95 (l_ip.F_sz_um / 0.09) * mcp.num_channels; 96 } else { 97 output_data.area = 0.15 * mcp.dataBusWidth / 72.0 * 98 (l_ip.F_sz_um / 0.065) * (l_ip.F_sz_um / 0.065) * 99 mcp.num_channels; 100 } 101 } else { 102 //skip old model 103 cout << "Unknown memory controllers" << endl; 104 exit(0); 105 //area based on Cadence ChipEstimator for 8bit bus 106 output_data.area = 0.243 * mcp.dataBusWidth / 8; 107 } 108} 109 110 111void MCBackend::computeEnergy() { 112 double C_MCB, mc_power; 113 double backend_dyn; 114 double backend_gates; 115 double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); 116 double NMOS_sizing = g_tp.min_w_nmos_; 117 double PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; 118 double area_um2 = output_data.area * 1e6; 119 120 if (mcp.mc_type == MC) { 121 if (mcp.type == 0) { 122 //assuming the approximately same scaling factor as seen in processors. 123 //C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process. 124 //mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et 125 mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend 126 C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065; 127 //per access energy in memory controller 128 power.readOp.dynamic = C_MCB * g_tp.peri_global.Vdd * 129 g_tp.peri_global.Vdd * 130 (mcp.dataBusWidth/*+mcp.addressBusWidth*/); 131 power.readOp.leakage = area_um2 / 2 * 132 (g_tp.scaling_factor.core_tx_density) * 133 cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) * 134 g_tp.peri_global.Vdd;//unit W 135 power.readOp.gate_leakage = area_um2 / 2 * 136 (g_tp.scaling_factor.core_tx_density) * 137 cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) * 138 g_tp.peri_global.Vdd;//unit W 139 } else { 140 //Average on DDR2/3 protocol controller and DDRC 1600/800A in 141 //Cadence ChipEstimate 142 backend_dyn = 0.9e-9 / 800e6 * mcp.clockRate / 12800 * 143 mcp.peak_transfer_rate* mcp.dataBusWidth / 72.0 * 144 g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / 1.1 * 145 (l_ip.F_sz_nm/65.0); 146 //Scaling to technology and DIMM feature. The base IP support 147 //DDR3-1600(PC3 12800) 148 //5000 is from Cadence ChipEstimator 149 backend_gates = 50000 * mcp.dataBusWidth / 64.0; 150 151 power.readOp.dynamic = backend_dyn; 152 power.readOp.leakage = (backend_gates) * 153 cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * 154 g_tp.peri_global.Vdd;//unit W 155 power.readOp.gate_leakage = (backend_gates) * 156 cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * 157 g_tp.peri_global.Vdd;//unit W 158 } 159 } else { 160 //skip old model 161 cout<<"Unknown memory controllers"<<endl;exit(0); 162 //mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend 163 C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065; 164 power.readOp.leakage = area_um2 / 2 * 165 (g_tp.scaling_factor.core_tx_density) * 166 cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) * 167 g_tp.peri_global.Vdd;//unit W 168 power.readOp.gate_leakage = area_um2 / 2 * 169 (g_tp.scaling_factor.core_tx_density) * 170 cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) * 171 g_tp.peri_global.Vdd;//unit W 172 power.readOp.dynamic *= 1.2; 173 power.readOp.leakage *= 1.2; 174 power.readOp.gate_leakage *= 1.2; 175 //flash controller has about 20% more backend power since BCH ECC in 176 //flash is complex and power hungry 177 } 178 double long_channel_device_reduction = 179 longer_channel_device_reduction(Uncore_device); 180 power.readOp.longer_channel_leakage = power.readOp.leakage * 181 long_channel_device_reduction; 182 183 // Output leakage power calculations 184 output_data.subthreshold_leakage_power = 185 longer_channel_device ? power.readOp.longer_channel_leakage : 186 power.readOp.leakage; 187 output_data.gate_leakage_power = power.readOp.gate_leakage; 188 189 // Peak dynamic power calculation 190 output_data.peak_dynamic_power = power.readOp.dynamic * 191 (tdp_stats.readAc.access + tdp_stats.writeAc.access); 192 193 // Runtime dynamic energy calculation 194 output_data.runtime_dynamic_energy = 195 power.readOp.dynamic * 196 (rtp_stats.readAc.access + rtp_stats.writeAc.access) * 197 mcp.llcBlockSize * BITS_PER_BYTE / mcp.dataBusWidth + 198 // Original McPAT code: Assume 10% of peak power is consumed by routine 199 // job including memory refreshing and scrubbing 200 power.readOp.dynamic * 0.1 * execution_time; 201} 202 203MCPHY::MCPHY(XMLNode* _xml_data, InputParameter* interface_ip_, 204 const MCParameters & mcp_, const MCStatistics & mcs_) 205 : McPATComponent(_xml_data), l_ip(*interface_ip_), mcp(mcp_), mcs(mcs_) { 206 name = "Physical Interface (PHY)"; 207 local_result = init_interface(&l_ip, name); 208 209 // Set up stats for the power calculations 210 // TODO: Figure out why TDP stats aren't used 211 tdp_stats.reset(); 212 tdp_stats.readAc.access = 0.5 * mcp.num_channels; 213 tdp_stats.writeAc.access = 0.5 * mcp.num_channels; 214 rtp_stats.reset(); 215 rtp_stats.readAc.access = mcs.reads; 216 rtp_stats.writeAc.access = mcs.writes; 217} 218 219void MCPHY::computeArea() { 220 if (mcp.mc_type == MC) { 221 if (mcp.type == 0) { 222 //Based on die photos from Niagara 1 and 2. 223 //TODO merge this into undifferentiated core.PHY only achieves 224 //square root of the ideal scaling. 225 output_data.area = (6.4323 * log(mcp.peak_transfer_rate * 2) - 226 48.134) * mcp.dataBusWidth / 128.0 * 227 (l_ip.F_sz_um / 0.09) * mcp.num_channels / 2;//TODO:/2 228 } else { 229 //Designware/synopsis 16bit DDR3 PHY is 1.3mm (WITH IOs) at 40nm 230 //for upto DDR3 2133 (PC3 17066) 231 double non_IO_percentage = 0.2; 232 output_data.area = 1.3 * non_IO_percentage / 2133.0e6 * 233 mcp.clockRate / 17066 * mcp.peak_transfer_rate * 234 mcp.dataBusWidth / 16.0 * (l_ip.F_sz_um / 0.040)* 235 (l_ip.F_sz_um / 0.040) * mcp.num_channels;//um^2 236 } 237 } else { 238 //area based on Cadence ChipEstimator for 8bit bus 239 output_data.area = 0.4e6 / 2 * mcp.dataBusWidth / 8 / 1e6; 240 } 241} 242 243void MCPHY::computeEnergy() { 244 //PHY uses internal data buswidth but the actuall off-chip datawidth is 64bits + ecc 245 double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); 246 /* 247 * according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006; 248 * From Cadence ChipEstimator for normal I/O around 0.4~0.8 mW/Gb/s 249 */ 250 double power_per_gb_per_s, phy_dyn,phy_gates; 251 double NMOS_sizing = g_tp.min_w_nmos_; 252 double PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; 253 double area_um2 = output_data.area * 1e6; 254 255 if (mcp.mc_type == MC) { 256 if (mcp.type == 0) { 257 power_per_gb_per_s = mcp.LVDS ? 0.01 : 0.04; 258 //This is from curve fitting based on Niagara 1 and 2's PHY die photo. 259 //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down 260 //power.readOp.dynamic = 0.02*memAccesses*llcBlocksize*8;//change from Bytes to bits. 261 power.readOp.dynamic = power_per_gb_per_s * 262 sqrt(l_ip.F_sz_um / 0.09) * g_tp.peri_global.Vdd / 1.2 * 263 g_tp.peri_global.Vdd / 1.2; 264 power.readOp.leakage = area_um2 / 2 * 265 (g_tp.scaling_factor.core_tx_density) * 266 cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) * 267 g_tp.peri_global.Vdd;//unit W 268 power.readOp.gate_leakage = area_um2 / 2 * 269 (g_tp.scaling_factor.core_tx_density) * 270 cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) * 271 g_tp.peri_global.Vdd;//unit W 272 } else { 273 phy_gates = 200000 * mcp.dataBusWidth / 64.0; 274 power_per_gb_per_s = 0.01; 275 //This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down 276 power.readOp.dynamic = power_per_gb_per_s * (l_ip.F_sz_um / 0.09) * 277 g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2; 278 power.readOp.leakage = (mcp.withPHY ? phy_gates : 0) * 279 cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * 280 g_tp.peri_global.Vdd;//unit W 281 power.readOp.gate_leakage = (mcp.withPHY ? phy_gates : 0) * 282 cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * 283 g_tp.peri_global.Vdd;//unit W 284 } 285 } 286 287// double phy_factor = (int)ceil(mcp.dataBusWidth/72.0);//Previous phy power numbers are based on 72 bit DIMM interface 288// power_t.readOp.dynamic *= phy_factor; 289// power_t.readOp.leakage *= phy_factor; 290// power_t.readOp.gate_leakage *= phy_factor; 291 292 double long_channel_device_reduction = 293 longer_channel_device_reduction(Uncore_device); 294 power.readOp.longer_channel_leakage = 295 power.readOp.leakage * long_channel_device_reduction; 296 297 // Leakage power calculations 298 output_data.subthreshold_leakage_power = 299 longer_channel_device ? power.readOp.longer_channel_leakage : 300 power.readOp.leakage; 301 output_data.gate_leakage_power = power.readOp.gate_leakage; 302 303 // Peak dynamic power calculation 304 double data_transfer_unit = (mcp.mc_type == MC)? 72:16;/*DIMM data width*/ 305 output_data.peak_dynamic_power = power.readOp.dynamic * 306 (mcp.peak_transfer_rate * BITS_PER_BYTE / 1e3) * mcp.dataBusWidth / 307 data_transfer_unit * mcp.num_channels / mcp.clockRate; 308 309 // Runtime dynamic energy calculation 310 output_data.runtime_dynamic_energy = 311 power.readOp.dynamic * 312 (rtp_stats.readAc.access + rtp_stats.writeAc.access) * 313 mcp.llcBlockSize * BITS_PER_BYTE / 1e9 + 314 // Original McPAT code: Assume 10% of peak power is consumed by routine 315 // job including memory refreshing and scrubbing 316 power.readOp.dynamic * 0.1 * execution_time; 317} 318 319MCFrontEnd::MCFrontEnd(XMLNode* _xml_data, InputParameter* interface_ip_, 320 const MCParameters & mcp_, const MCStatistics & mcs_) 321 : McPATComponent(_xml_data), frontendBuffer(NULL), readBuffer(NULL), 322 writeBuffer(NULL), MC_arb(NULL), interface_ip(*interface_ip_), 323 mcp(mcp_), mcs(mcs_) { 324 int tag, data; 325 bool is_default = true;//indication for default setup 326 327 /* MC frontend engine channels share the same engines but logically partitioned 328 * For all hardware inside MC. different channels do not share resources. 329 * TODO: add docodeing/mux stage to steer memory requests to different channels. 330 */ 331 332 name = "Front End"; 333 334 // Memory Request Reorder Buffer 335 tag = mcp.addressbus_width + EXTRA_TAG_BITS + mcp.opcodeW; 336 data = int(ceil((physical_address_width + mcp.opcodeW) / BITS_PER_BYTE)); 337 338 interface_ip.cache_sz = data * mcp.req_window_size_per_channel; 339 interface_ip.line_sz = data; 340 interface_ip.assoc = mcp.reorder_buffer_assoc; 341 interface_ip.nbanks = mcp.reorder_buffer_nbanks; 342 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 343 interface_ip.specific_tag = tag > 0; 344 interface_ip.tag_w = tag; 345 interface_ip.access_mode = Normal; 346 interface_ip.obj_func_dyn_energy = 0; 347 interface_ip.obj_func_dyn_power = 0; 348 interface_ip.obj_func_leak_power = 0; 349 interface_ip.obj_func_cycle_t = 1; 350 interface_ip.num_rw_ports = 0; 351 interface_ip.num_rd_ports = mcp.num_channels; 352 interface_ip.num_wr_ports = interface_ip.num_rd_ports; 353 interface_ip.num_se_rd_ports = 0; 354 interface_ip.num_search_ports = mcp.num_channels; 355 interface_ip.is_cache = true; 356 interface_ip.pure_cam = false; 357 interface_ip.pure_ram = false; 358 interface_ip.throughput = 1.0 / mcp.clockRate; 359 interface_ip.latency = 1.0 / mcp.clockRate; 360 frontendBuffer = new CacheArray(xml_data, &interface_ip, "Reorder Buffer", 361 Uncore_device, mcp.clockRate); 362 children.push_back(frontendBuffer); 363 364 frontendBuffer->tdp_stats.reset(); 365 frontendBuffer->tdp_stats.readAc.access = 366 frontendBuffer->l_ip.num_search_ports + 367 frontendBuffer->l_ip.num_wr_ports; 368 frontendBuffer->tdp_stats.writeAc.access = 369 frontendBuffer->l_ip.num_search_ports; 370 frontendBuffer->tdp_stats.searchAc.access = 371 frontendBuffer->l_ip.num_wr_ports; 372 frontendBuffer->rtp_stats.reset(); 373 // TODO: These stats assume that access power is calculated per buffer 374 // bit, which requires the stats to take into account the number of 375 // bits for each buffer slot. This should be revised... 376 //For each channel, each memory word need to check the address data to 377 //achieve best scheduling results. 378 //and this need to be done on all physical DIMMs in each logical memory 379 //DIMM *mcp.dataBusWidth/72 380 frontendBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize * 381 BITS_PER_BYTE / mcp.dataBusWidth * mcp.dataBusWidth / 72; 382 frontendBuffer->rtp_stats.writeAc.access = mcs.writes * mcp.llcBlockSize * 383 BITS_PER_BYTE / mcp.dataBusWidth * mcp.dataBusWidth / 72; 384 frontendBuffer->rtp_stats.searchAc.access = 385 frontendBuffer->rtp_stats.readAc.access + 386 frontendBuffer->rtp_stats.writeAc.access; 387 388 // Read Buffers 389 //Support key words first operation 390 data = (int)ceil(mcp.dataBusWidth / BITS_PER_BYTE); 391 392 interface_ip.cache_sz = data * mcp.IO_buffer_size_per_channel; 393 interface_ip.line_sz = data; 394 interface_ip.assoc = mcp.read_buffer_assoc; 395 interface_ip.nbanks = mcp.read_buffer_nbanks; 396 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 397 interface_ip.specific_tag = mcp.read_buffer_tag_width > 0; 398 interface_ip.tag_w = mcp.read_buffer_tag_width; 399 interface_ip.access_mode = Sequential; 400 interface_ip.obj_func_dyn_energy = 0; 401 interface_ip.obj_func_dyn_power = 0; 402 interface_ip.obj_func_leak_power = 0; 403 interface_ip.obj_func_cycle_t = 1; 404 interface_ip.num_rw_ports = 0; 405 interface_ip.num_rd_ports = mcp.num_channels; 406 interface_ip.num_wr_ports = interface_ip.num_rd_ports; 407 interface_ip.num_se_rd_ports = 0; 408 interface_ip.num_search_ports = 0; 409 interface_ip.is_cache = false; 410 interface_ip.pure_cam = false; 411 interface_ip.pure_ram = true; 412 interface_ip.throughput = 1.0 / mcp.clockRate; 413 interface_ip.latency = 1.0 / mcp.clockRate; 414 readBuffer = new CacheArray(xml_data, &interface_ip, "Read Buffer", 415 Uncore_device, mcp.clockRate); 416 children.push_back(readBuffer); 417 418 readBuffer->tdp_stats.reset(); 419 readBuffer->tdp_stats.readAc.access = readBuffer->l_ip.num_rd_ports * 420 mcs.duty_cycle; 421 readBuffer->tdp_stats.writeAc.access = readBuffer->l_ip.num_wr_ports * 422 mcs.duty_cycle; 423 readBuffer->rtp_stats.reset(); 424 readBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize * 425 BITS_PER_BYTE / mcp.dataBusWidth; 426 readBuffer->rtp_stats.writeAc.access = mcs.reads * mcp.llcBlockSize * 427 BITS_PER_BYTE / mcp.dataBusWidth; 428 429 // Write Buffer 430 //Support key words first operation 431 data = (int)ceil(mcp.dataBusWidth / BITS_PER_BYTE); 432 433 interface_ip.cache_sz = data * mcp.IO_buffer_size_per_channel; 434 interface_ip.line_sz = data; 435 interface_ip.assoc = mcp.write_buffer_assoc; 436 interface_ip.nbanks = mcp.write_buffer_nbanks; 437 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 438 interface_ip.specific_tag = mcp.write_buffer_tag_width > 0; 439 interface_ip.tag_w = mcp.write_buffer_tag_width; 440 interface_ip.access_mode = Normal; 441 interface_ip.obj_func_dyn_energy = 0; 442 interface_ip.obj_func_dyn_power = 0; 443 interface_ip.obj_func_leak_power = 0; 444 interface_ip.obj_func_cycle_t = 1; 445 interface_ip.num_rw_ports = 0; 446 interface_ip.num_rd_ports = mcp.num_channels; 447 interface_ip.num_wr_ports = interface_ip.num_rd_ports; 448 interface_ip.num_se_rd_ports = 0; 449 interface_ip.num_search_ports = 0; 450 interface_ip.is_cache = false; 451 interface_ip.pure_cam = false; 452 interface_ip.pure_ram = true; 453 interface_ip.throughput = 1.0 / mcp.clockRate; 454 interface_ip.latency = 1.0 / mcp.clockRate; 455 writeBuffer = new CacheArray(xml_data, &interface_ip, "Write Buffer", 456 Uncore_device, mcp.clockRate); 457 children.push_back(writeBuffer); 458 459 writeBuffer->tdp_stats.reset(); 460 writeBuffer->tdp_stats.readAc.access = writeBuffer->l_ip.num_rd_ports * 461 mcs.duty_cycle; 462 writeBuffer->tdp_stats.writeAc.access = writeBuffer->l_ip.num_wr_ports * 463 mcs.duty_cycle; 464 writeBuffer->rtp_stats.reset(); 465 writeBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize * 466 BITS_PER_BYTE / mcp.dataBusWidth; 467 writeBuffer->rtp_stats.writeAc.access = mcs.writes * mcp.llcBlockSize * 468 BITS_PER_BYTE / mcp.dataBusWidth; 469 470 // TODO: Set up selection logic as a leaf node in tree 471 //selection and arbitration logic 472 MC_arb = 473 new selection_logic(xml_data, is_default, 474 mcp.req_window_size_per_channel, 1, &interface_ip, 475 "Arbitration Logic", (mcs.reads + mcs.writes), 476 mcp.clockRate, Uncore_device); 477 // MC_arb is not included in the roll-up due to the uninitialized area 478 //children.push_back(MC_arb); 479} 480 481MemoryController::MemoryController(XMLNode* _xml_data, 482 InputParameter* interface_ip_) 483 : McPATComponent(_xml_data), interface_ip(*interface_ip_) { 484 name = "Memory Controller"; 485 set_mc_param(); 486 // TODO: Pass params and stats as pointers 487 children.push_back(new MCFrontEnd(xml_data, &interface_ip, mcp, mcs)); 488 children.push_back(new MCBackend(xml_data, &interface_ip, mcp, mcs)); 489 490 if (mcp.type==0 || (mcp.type == 1 && mcp.withPHY)) { 491 children.push_back(new MCPHY(xml_data, &interface_ip, mcp, mcs)); 492 } 493} 494 495void MemoryController::initialize_params() { 496 memset(&mcp, 0, sizeof(MCParameters)); 497} 498 499void MemoryController::set_mc_param() { 500 initialize_params(); 501 502 int num_children = xml_data->nChildNode("param"); 503 int tech_type; 504 int mat_type; 505 int i; 506 for (i = 0; i < num_children; i++) { 507 XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); 508 XMLCSTR node_name = paramNode->getAttribute("name"); 509 XMLCSTR value = paramNode->getAttribute("value"); 510 511 if (!node_name) 512 warnMissingParamName(paramNode->getAttribute("id")); 513 514 ASSIGN_FP_IF("mc_clock", mcp.clockRate); 515 ASSIGN_INT_IF("tech_type", tech_type); 516 ASSIGN_ENUM_IF("mc_type", mcp.mc_type, MemoryCtrl_type); 517 ASSIGN_FP_IF("num_mcs", mcp.num_mcs); 518 ASSIGN_INT_IF("llc_line_length", mcp.llc_line_length); 519 ASSIGN_INT_IF("databus_width", mcp.databus_width); 520 ASSIGN_INT_IF("memory_channels_per_mc", mcp.num_channels); 521 ASSIGN_INT_IF("req_window_size_per_channel", 522 mcp.req_window_size_per_channel); 523 ASSIGN_INT_IF("IO_buffer_size_per_channel", 524 mcp.IO_buffer_size_per_channel); 525 ASSIGN_INT_IF("addressbus_width", mcp.addressbus_width); 526 ASSIGN_INT_IF("opcode_width", mcp.opcodeW); 527 ASSIGN_INT_IF("type", mcp.type); 528 ASSIGN_ENUM_IF("LVDS", mcp.LVDS, bool); 529 ASSIGN_ENUM_IF("withPHY", mcp.withPHY, bool); 530 ASSIGN_INT_IF("peak_transfer_rate", mcp.peak_transfer_rate); 531 ASSIGN_INT_IF("number_ranks", mcp.number_ranks); 532 ASSIGN_INT_IF("reorder_buffer_assoc", mcp.reorder_buffer_assoc); 533 ASSIGN_INT_IF("reorder_buffer_nbanks", mcp.reorder_buffer_nbanks); 534 ASSIGN_INT_IF("read_buffer_assoc", mcp.read_buffer_assoc); 535 ASSIGN_INT_IF("read_buffer_nbanks", mcp.read_buffer_nbanks); 536 ASSIGN_INT_IF("read_buffer_tag_width", mcp.read_buffer_tag_width); 537 ASSIGN_INT_IF("write_buffer_assoc", mcp.write_buffer_assoc); 538 ASSIGN_INT_IF("write_buffer_nbanks", mcp.write_buffer_nbanks); 539 ASSIGN_INT_IF("write_buffer_tag_width", mcp.write_buffer_tag_width); 540 ASSIGN_INT_IF("wire_mat_type", mat_type); 541 ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type); 542 543 else { 544 warnUnrecognizedParam(node_name); 545 } 546 } 547 548 if (mcp.mc_type != MC) { 549 cout << "Unknown memory controller type: Only DRAM controller is " 550 << "supported for now" << endl; 551 exit(0); 552 } 553 554 // Change from MHz to Hz 555 mcp.clockRate *= 1e6; 556 557 interface_ip.data_arr_ram_cell_tech_type = tech_type; 558 interface_ip.data_arr_peri_global_tech_type = tech_type; 559 interface_ip.tag_arr_ram_cell_tech_type = tech_type; 560 interface_ip.tag_arr_peri_global_tech_type = tech_type; 561 interface_ip.wire_is_mat_type = mat_type; 562 interface_ip.wire_os_mat_type = mat_type; 563 564 num_children = xml_data->nChildNode("stat"); 565 for (i = 0; i < num_children; i++) { 566 XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); 567 XMLCSTR node_name = statNode->getAttribute("name"); 568 XMLCSTR value = statNode->getAttribute("value"); 569 570 if (!node_name) 571 warnMissingStatName(statNode->getAttribute("id")); 572 573 ASSIGN_FP_IF("duty_cycle", mcs.duty_cycle); 574 ASSIGN_FP_IF("perc_load", mcs.perc_load); 575 ASSIGN_FP_IF("memory_reads", mcs.reads); 576 ASSIGN_INT_IF("memory_writes", mcs.writes); 577 578 else { 579 warnUnrecognizedStat(node_name); 580 } 581 } 582 583 // Add ECC overhead 584 mcp.llcBlockSize = int(ceil(mcp.llc_line_length / BITS_PER_BYTE)) + 585 mcp.llc_line_length; 586 mcp.dataBusWidth = int(ceil(mcp.databus_width / BITS_PER_BYTE)) + 587 mcp.databus_width; 588} 589 590MCFrontEnd ::~MCFrontEnd() { 591 592 if (MC_arb) { 593 delete MC_arb; 594 MC_arb = NULL; 595 } 596 if (frontendBuffer) { 597 delete frontendBuffer; 598 frontendBuffer = NULL; 599 } 600 if (readBuffer) { 601 delete readBuffer; 602 readBuffer = NULL; 603 } 604 if (writeBuffer) { 605 delete writeBuffer; 606 writeBuffer = NULL; 607 } 608} 609 610MemoryController::~MemoryController() { 611 // TODO: use default constructor to delete children 612} 613 614