1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. 5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. 6 * All Rights Reserved 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are 10 * met: redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer; 12 * redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution; 15 * neither the name of the copyright holders nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 ***************************************************************************/ 32 33 34 35#include <cmath> 36#include <iostream> 37 38#include "uca.h" 39 40UCA::UCA(const DynamicParameter & dyn_p) 41 : dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) { 42 int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks) 43 / 2 : (_log2(nbanks) - _log2(nbanks) / 2)); 44 int num_banks_hor_dir = nbanks / num_banks_ver_dir; 45 46 if (dp.use_inp_params) { 47 RWP = dp.num_rw_ports; 48 ERP = dp.num_rd_ports; 49 EWP = dp.num_wr_ports; 50 SCHP = dp.num_search_ports; 51 } else { 52 RWP = g_ip->num_rw_ports; 53 ERP = g_ip->num_rd_ports; 54 EWP = g_ip->num_wr_ports; 55 SCHP = g_ip->num_search_ports; 56 } 57 58 num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode) * 59 (RWP + ERP + EWP); 60 num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP); 61 num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP); 62 num_si_b_bank = dp.num_si_b_bank_per_port * SCHP; 63 num_so_b_bank = dp.num_so_b_bank_per_port * SCHP; 64 65 if (!dp.fully_assoc && !dp.pure_cam) { 66 67 if (g_ip->fast_access && dp.is_tag == false) { 68 num_do_b_bank *= g_ip->data_assoc; 69 } 70 71 htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 72 num_addr_b_bank, num_di_b_bank, 0, 73 num_do_b_bank, 0, num_banks_ver_dir * 2, 74 num_banks_hor_dir * 2, Add_htree, true); 75 htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 76 num_addr_b_bank, num_di_b_bank, 0, 77 num_do_b_bank, 0, num_banks_ver_dir * 2, 78 num_banks_hor_dir * 2, Data_in_htree, true); 79 htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 80 num_addr_b_bank, num_di_b_bank, 0, 81 num_do_b_bank, 0, num_banks_ver_dir * 2, 82 num_banks_hor_dir * 2, Data_out_htree, true); 83 } 84 85 else { 86 87 htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 88 num_addr_b_bank, num_di_b_bank, 89 num_si_b_bank, num_do_b_bank, num_so_b_bank, 90 num_banks_ver_dir * 2, num_banks_hor_dir * 2, 91 Add_htree, true); 92 htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 93 num_addr_b_bank, num_di_b_bank, 94 num_si_b_bank, num_do_b_bank, num_so_b_bank, 95 num_banks_ver_dir * 2, num_banks_hor_dir * 2, 96 Data_in_htree, true); 97 htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 98 num_addr_b_bank, num_di_b_bank, 99 num_si_b_bank, num_do_b_bank, 100 num_so_b_bank, num_banks_ver_dir * 2, 101 num_banks_hor_dir * 2, Data_out_htree, true); 102 htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 103 num_addr_b_bank, num_di_b_bank, 104 num_si_b_bank, num_do_b_bank, 105 num_so_b_bank, num_banks_ver_dir * 2, 106 num_banks_hor_dir * 2, Data_in_htree, true); 107 htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 108 num_addr_b_bank, num_di_b_bank, 109 num_si_b_bank, num_do_b_bank, 110 num_so_b_bank, num_banks_ver_dir * 2, 111 num_banks_hor_dir * 2, Data_out_htree, 112 true); 113 } 114 115 area.w = htree_in_data->area.w; 116 area.h = htree_in_data->area.h; 117 118 area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks; 119// cout<<"area cell"<<area_all_dataramcells<<endl; 120// cout<<area.get_area()<<endl; 121 // delay calculation 122 double inrisetime = 0.0; 123 compute_delays(inrisetime); 124 compute_power_energy(); 125} 126 127 128 129UCA::~UCA() { 130 delete htree_in_add; 131 delete htree_in_data; 132 delete htree_out_data; 133} 134 135 136 137double UCA::compute_delays(double inrisetime) { 138 double outrisetime = bank.compute_delays(inrisetime); 139 140 double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay; 141 double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay; 142 delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat + 143 bank.mat.sa_mux_lev_1_predec->delay + 144 bank.mat.sa_mux_lev_1_dec->delay; 145 delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat + 146 bank.mat.sa_mux_lev_2_predec->delay + 147 bank.mat.sa_mux_lev_2_dec->delay; 148 double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa; 149 150 delay_before_subarray_output_driver = 151 MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path 152 delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path 153 MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path 154 delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path 155 delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree + 156 bank.htree_out_data->delay + htree_out_data->delay; 157 access_time = bank.mat.delay_comparator; 158 159 double ram_delay_inside_mat; 160 if (dp.fully_assoc) { 161 //delay of FA contains both CAM tag and RAM data 162 { //delay of CAM 163 ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; 164 access_time = htree_in_add->delay + bank.htree_in_add->delay; 165 //delay of fully-associative data array 166 access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out; 167 } 168 } else { 169 access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path 170 } 171 172 if (dp.is_main_mem) { 173 double t_rcd = max_delay_before_row_decoder + delay_inside_mat; 174 double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) + 175 delay_from_subarray_out_drv_to_out; 176 access_time = t_rcd + cas_latency; 177 } 178 179 double temp; 180 181 if (!dp.fully_assoc) { 182 temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit 183 if (dp.is_dram) { 184 temp += bank.mat.delay_writeback; // temp stores random cycle time 185 } 186 187 188 temp = MAX(temp, bank.mat.r_predec->delay); 189 temp = MAX(temp, bank.mat.b_mux_predec->delay); 190 temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); 191 temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); 192 } else { 193 ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; 194 temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore 195 + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset; 196 197 temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc. 198 temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); 199 temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); 200 } 201 202 // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav 203 if (g_ip->rpters_in_htree == false) { 204 temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay); 205 } 206 cycle_time = temp; 207 208 double delay_req_network = max_delay_before_row_decoder; 209 double delay_rep_network = delay_from_subarray_out_drv_to_out; 210 multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network); 211 212 if (dp.is_main_mem) { 213 multisubbank_interleave_cycle_time = htree_in_add->delay; 214 precharge_delay = htree_in_add->delay + 215 bank.htree_in_add->delay + bank.mat.delay_writeback + 216 bank.mat.delay_wl_reset + bank.mat.delay_bl_restore; 217 cycle_time = access_time + precharge_delay; 218 } else { 219 precharge_delay = 0; 220 } 221 222 double dram_array_availability = 0; 223 if (dp.is_dram) { 224 dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100; 225 } 226 227 return outrisetime; 228} 229 230 231 232// note: currently, power numbers are for a bank of an array 233void UCA::compute_power_energy() { 234 bank.compute_power_energy(); 235 power = bank.power; 236 237 power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic; 238 power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic; 239 if (dp.fully_assoc || dp.pure_cam) 240 power_routing_to_bank.searchOp.dynamic = 241 htree_in_search->power.searchOp.dynamic + 242 htree_out_search->power.searchOp.dynamic; 243 244 power_routing_to_bank.readOp.leakage += 245 htree_in_add->power.readOp.leakage + 246 htree_in_data->power.readOp.leakage + 247 htree_out_data->power.readOp.leakage; 248 249 power_routing_to_bank.readOp.gate_leakage += 250 htree_in_add->power.readOp.gate_leakage + 251 htree_in_data->power.readOp.gate_leakage + 252 htree_out_data->power.readOp.gate_leakage; 253 if (dp.fully_assoc || dp.pure_cam) { 254 power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; 255 power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; 256 } 257 258 power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic; 259 power.readOp.dynamic += power_routing_to_bank.readOp.dynamic; 260 power.readOp.leakage += power_routing_to_bank.readOp.leakage; 261 power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage; 262 263 // calculate total write energy per access 264 power.writeOp.dynamic = power.readOp.dynamic 265 - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir 266 + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir 267 - power_routing_to_bank.readOp.dynamic 268 + power_routing_to_bank.writeOp.dynamic 269 + bank.htree_in_data->power.readOp.dynamic 270 - bank.htree_out_data->power.readOp.dynamic; 271 272 if (dp.is_dram == false) { 273 power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; 274 } 275 276 dyn_read_energy_from_closed_page = power.readOp.dynamic; 277 dyn_read_energy_from_open_page = power.readOp.dynamic - 278 (bank.mat.r_predec->power.readOp.dynamic + 279 bank.mat.power_row_decoders.readOp.dynamic + 280 bank.mat.power_bl_precharge_eq_drv.readOp.dynamic + 281 bank.mat.power_sa.readOp.dynamic + 282 bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir; 283 284 dyn_read_energy_remaining_words_in_burst = 285 (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) * 286 ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + 287 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + 288 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + 289 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + 290 bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + 291 bank.htree_out_data->power.readOp.dynamic + 292 power_routing_to_bank.readOp.dynamic); 293 dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst; 294 dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst; 295 296 activate_energy = htree_in_add->power.readOp.dynamic + 297 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act + 298 (bank.mat.r_predec->power.readOp.dynamic + 299 bank.mat.power_row_decoders.readOp.dynamic + 300 bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir; 301 read_energy = (htree_in_add->power.readOp.dynamic + 302 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + 303 (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + 304 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + 305 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + 306 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + 307 bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + 308 bank.htree_out_data->power.readOp.dynamic + 309 htree_in_data->power.readOp.dynamic) * g_ip->burst_len; 310 write_energy = (htree_in_add->power.readOp.dynamic + 311 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + 312 htree_in_data->power.readOp.dynamic + 313 bank.htree_in_data->power.readOp.dynamic + 314 (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + 315 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + 316 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + 317 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len; 318 precharge_energy = (bank.mat.power_bitline.readOp.dynamic + 319 bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir; 320 321 leak_power_subbank_closed_page = 322 (bank.mat.r_predec->power.readOp.leakage + 323 bank.mat.b_mux_predec->power.readOp.leakage + 324 bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + 325 bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + 326 bank.mat.power_row_decoders.readOp.leakage + 327 bank.mat.power_bit_mux_decoders.readOp.leakage + 328 bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + 329 bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + 330 bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; 331 332 leak_power_subbank_closed_page += 333 (bank.mat.r_predec->power.readOp.gate_leakage + 334 bank.mat.b_mux_predec->power.readOp.gate_leakage + 335 bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + 336 bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + 337 bank.mat.power_row_decoders.readOp.gate_leakage + 338 bank.mat.power_bit_mux_decoders.readOp.gate_leakage + 339 bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + 340 bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+ 341 //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; 342 343 leak_power_subbank_open_page = 344 (bank.mat.r_predec->power.readOp.leakage + 345 bank.mat.b_mux_predec->power.readOp.leakage + 346 bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + 347 bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + 348 bank.mat.power_row_decoders.readOp.leakage + 349 bank.mat.power_bit_mux_decoders.readOp.leakage + 350 bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + 351 bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + 352 bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; 353 354 leak_power_subbank_open_page += 355 (bank.mat.r_predec->power.readOp.gate_leakage + 356 bank.mat.b_mux_predec->power.readOp.gate_leakage + 357 bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + 358 bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + 359 bank.mat.power_row_decoders.readOp.gate_leakage + 360 bank.mat.power_bit_mux_decoders.readOp.gate_leakage + 361 bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + 362 bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir; 363 //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; 364 365 leak_power_request_and_reply_networks = 366 power_routing_to_bank.readOp.leakage + 367 bank.htree_in_add->power.readOp.leakage + 368 bank.htree_in_data->power.readOp.leakage + 369 bank.htree_out_data->power.readOp.leakage; 370 371 leak_power_request_and_reply_networks += 372 power_routing_to_bank.readOp.gate_leakage + 373 bank.htree_in_add->power.readOp.gate_leakage + 374 bank.htree_in_data->power.readOp.gate_leakage + 375 bank.htree_out_data->power.readOp.gate_leakage; 376 377 if (dp.fully_assoc || dp.pure_cam) { 378 leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; 379 leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; 380 } 381 382 383 // if DRAM, add contribution of power spent in row predecoder drivers, 384 // blocks and decoders to refresh power 385 if (dp.is_dram) { 386 refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir + 387 bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays; 388 refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays; 389 refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir; 390 refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; 391 refresh_power /= dp.dram_refresh_period; 392 } 393 394 395 if (dp.is_tag == false) { 396 power.readOp.dynamic = dyn_read_energy_from_closed_page; 397 power.writeOp.dynamic = dyn_read_energy_from_closed_page 398 - dyn_read_energy_remaining_words_in_burst 399 - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir 400 + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir 401 + (power_routing_to_bank.writeOp.dynamic - 402 power_routing_to_bank.readOp.dynamic - 403 bank.htree_out_data->power.readOp.dynamic + 404 bank.htree_in_data->power.readOp.dynamic) * 405 (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME 406 407 if (dp.is_dram == false) { 408 power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; 409 } 410 } 411 412 // if DRAM, add refresh power to total leakage 413 if (dp.is_dram) { 414 power.readOp.leakage += refresh_power; 415 } 416 417 // TODO: below should be avoided. 418 /*if (dp.is_main_mem) 419 { 420 power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks; 421 }*/ 422 423 assert(power.readOp.dynamic > 0); 424 assert(power.writeOp.dynamic > 0); 425 assert(power.readOp.leakage > 0); 426} 427 428