uca.cc revision 10152
1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. 5 * All Rights Reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer; 11 * redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution; 14 * neither the name of the copyright holders nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 * 30 ***************************************************************************/ 31 32 33 34#include <cmath> 35#include <iostream> 36 37#include "uca.h" 38 39UCA::UCA(const DynamicParameter & dyn_p) 40 :dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) 41{ 42 int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2)); 43 int num_banks_hor_dir = nbanks/num_banks_ver_dir; 44 45 if (dp.use_inp_params) 46 { 47 RWP = dp.num_rw_ports; 48 ERP = dp.num_rd_ports; 49 EWP = dp.num_wr_ports; 50 SCHP = dp.num_search_ports; 51 } 52 else 53 { 54 RWP = g_ip->num_rw_ports; 55 ERP = g_ip->num_rd_ports; 56 EWP = g_ip->num_wr_ports; 57 SCHP = g_ip->num_search_ports; 58 } 59 60 num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP); 61 num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP); 62 num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP); 63 num_si_b_bank = dp.num_si_b_bank_per_port * SCHP; 64 num_so_b_bank = dp.num_so_b_bank_per_port * SCHP; 65 66 if (!dp.fully_assoc && !dp.pure_cam) 67 { 68 69 if (g_ip->fast_access && dp.is_tag == false) 70 { 71 num_do_b_bank *= g_ip->data_assoc; 72 } 73 74 htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 75 num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true); 76 htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 77 num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); 78 htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 79 num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); 80 } 81 82 else 83 { 84 85 htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 86 num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true); 87 htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 88 num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); 89 htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 90 num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); 91 htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 92 num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); 93 htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 94 num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); 95 } 96 97 area.w = htree_in_data->area.w; 98 area.h = htree_in_data->area.h; 99 100 area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks; 101// cout<<"area cell"<<area_all_dataramcells<<endl; 102// cout<<area.get_area()<<endl; 103 // delay calculation 104 double inrisetime = 0.0; 105 compute_delays(inrisetime); 106 compute_power_energy(); 107} 108 109 110 111UCA::~UCA() 112{ 113 delete htree_in_add; 114 delete htree_in_data; 115 delete htree_out_data; 116} 117 118 119 120double UCA::compute_delays(double inrisetime) 121{ 122 double outrisetime = bank.compute_delays(inrisetime); 123 124 double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay; 125 double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay; 126 delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat + 127 bank.mat.sa_mux_lev_1_predec->delay + 128 bank.mat.sa_mux_lev_1_dec->delay; 129 delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat + 130 bank.mat.sa_mux_lev_2_predec->delay + 131 bank.mat.sa_mux_lev_2_dec->delay; 132 double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa; 133 134 delay_before_subarray_output_driver = 135 MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path 136 delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path 137 MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path 138 delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path 139 delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree + 140 bank.htree_out_data->delay + htree_out_data->delay; 141 access_time = bank.mat.delay_comparator; 142 143 double ram_delay_inside_mat; 144 if (dp.fully_assoc) 145 { 146 //delay of FA contains both CAM tag and RAM data 147 { //delay of CAM 148 ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; 149 access_time = htree_in_add->delay + bank.htree_in_add->delay; 150 //delay of fully-associative data array 151 access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out; 152 } 153 } 154 else 155 { 156 access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path 157 } 158 159 if (dp.is_main_mem) 160 { 161 double t_rcd = max_delay_before_row_decoder + delay_inside_mat; 162 double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) + 163 delay_from_subarray_out_drv_to_out; 164 access_time = t_rcd + cas_latency; 165 } 166 167 double temp; 168 169 if (!dp.fully_assoc) 170 { 171 temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit 172 if (dp.is_dram) 173 { 174 temp += bank.mat.delay_writeback; // temp stores random cycle time 175 } 176 177 178 temp = MAX(temp, bank.mat.r_predec->delay); 179 temp = MAX(temp, bank.mat.b_mux_predec->delay); 180 temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); 181 temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); 182 } 183 else 184 { 185 ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; 186 temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore 187 + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset; 188 189 temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc. 190 temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); 191 temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); 192 } 193 194 // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav 195 if (g_ip->rpters_in_htree == false) 196 { 197 temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay); 198 } 199 cycle_time = temp; 200 201 double delay_req_network = max_delay_before_row_decoder; 202 double delay_rep_network = delay_from_subarray_out_drv_to_out; 203 multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network); 204 205 if (dp.is_main_mem) 206 { 207 multisubbank_interleave_cycle_time = htree_in_add->delay; 208 precharge_delay = htree_in_add->delay + 209 bank.htree_in_add->delay + bank.mat.delay_writeback + 210 bank.mat.delay_wl_reset + bank.mat.delay_bl_restore; 211 cycle_time = access_time + precharge_delay; 212 } 213 else 214 { 215 precharge_delay = 0; 216 } 217 218 double dram_array_availability = 0; 219 if (dp.is_dram) 220 { 221 dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100; 222 } 223 224 return outrisetime; 225} 226 227 228 229// note: currently, power numbers are for a bank of an array 230void UCA::compute_power_energy() 231{ 232 bank.compute_power_energy(); 233 power = bank.power; 234 235 power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic; 236 power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic; 237 if (dp.fully_assoc || dp.pure_cam) 238 power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic; 239 240 power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage + 241 htree_in_data->power.readOp.leakage + 242 htree_out_data->power.readOp.leakage; 243 244 power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage + 245 htree_in_data->power.readOp.gate_leakage + 246 htree_out_data->power.readOp.gate_leakage; 247 if (dp.fully_assoc || dp.pure_cam) 248 { 249 power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; 250 power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; 251 } 252 253 power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic; 254 power.readOp.dynamic += power_routing_to_bank.readOp.dynamic; 255 power.readOp.leakage += power_routing_to_bank.readOp.leakage; 256 power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage; 257 258 // calculate total write energy per access 259 power.writeOp.dynamic = power.readOp.dynamic 260 - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir 261 + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir 262 - power_routing_to_bank.readOp.dynamic 263 + power_routing_to_bank.writeOp.dynamic 264 + bank.htree_in_data->power.readOp.dynamic 265 - bank.htree_out_data->power.readOp.dynamic; 266 267 if (dp.is_dram == false) 268 { 269 power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; 270 } 271 272 dyn_read_energy_from_closed_page = power.readOp.dynamic; 273 dyn_read_energy_from_open_page = power.readOp.dynamic - 274 (bank.mat.r_predec->power.readOp.dynamic + 275 bank.mat.power_row_decoders.readOp.dynamic + 276 bank.mat.power_bl_precharge_eq_drv.readOp.dynamic + 277 bank.mat.power_sa.readOp.dynamic + 278 bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir; 279 280 dyn_read_energy_remaining_words_in_burst = 281 (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) * 282 ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + 283 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + 284 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + 285 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + 286 bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + 287 bank.htree_out_data->power.readOp.dynamic + 288 power_routing_to_bank.readOp.dynamic); 289 dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst; 290 dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst; 291 292 activate_energy = htree_in_add->power.readOp.dynamic + 293 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act + 294 (bank.mat.r_predec->power.readOp.dynamic + 295 bank.mat.power_row_decoders.readOp.dynamic + 296 bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir; 297 read_energy = (htree_in_add->power.readOp.dynamic + 298 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + 299 (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + 300 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + 301 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + 302 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + 303 bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + 304 bank.htree_out_data->power.readOp.dynamic + 305 htree_in_data->power.readOp.dynamic) * g_ip->burst_len; 306 write_energy = (htree_in_add->power.readOp.dynamic + 307 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + 308 htree_in_data->power.readOp.dynamic + 309 bank.htree_in_data->power.readOp.dynamic + 310 (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + 311 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + 312 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + 313 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len; 314 precharge_energy = (bank.mat.power_bitline.readOp.dynamic + 315 bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir; 316 317 leak_power_subbank_closed_page = 318 (bank.mat.r_predec->power.readOp.leakage + 319 bank.mat.b_mux_predec->power.readOp.leakage + 320 bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + 321 bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + 322 bank.mat.power_row_decoders.readOp.leakage + 323 bank.mat.power_bit_mux_decoders.readOp.leakage + 324 bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + 325 bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + 326 bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; 327 328 leak_power_subbank_closed_page += 329 (bank.mat.r_predec->power.readOp.gate_leakage + 330 bank.mat.b_mux_predec->power.readOp.gate_leakage + 331 bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + 332 bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + 333 bank.mat.power_row_decoders.readOp.gate_leakage + 334 bank.mat.power_bit_mux_decoders.readOp.gate_leakage + 335 bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + 336 bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+ 337 //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; 338 339 leak_power_subbank_open_page = 340 (bank.mat.r_predec->power.readOp.leakage + 341 bank.mat.b_mux_predec->power.readOp.leakage + 342 bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + 343 bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + 344 bank.mat.power_row_decoders.readOp.leakage + 345 bank.mat.power_bit_mux_decoders.readOp.leakage + 346 bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + 347 bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + 348 bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; 349 350 leak_power_subbank_open_page += 351 (bank.mat.r_predec->power.readOp.gate_leakage + 352 bank.mat.b_mux_predec->power.readOp.gate_leakage + 353 bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + 354 bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + 355 bank.mat.power_row_decoders.readOp.gate_leakage + 356 bank.mat.power_bit_mux_decoders.readOp.gate_leakage + 357 bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + 358 bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir; 359 //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; 360 361 leak_power_request_and_reply_networks = 362 power_routing_to_bank.readOp.leakage + 363 bank.htree_in_add->power.readOp.leakage + 364 bank.htree_in_data->power.readOp.leakage + 365 bank.htree_out_data->power.readOp.leakage; 366 367 leak_power_request_and_reply_networks += 368 power_routing_to_bank.readOp.gate_leakage + 369 bank.htree_in_add->power.readOp.gate_leakage + 370 bank.htree_in_data->power.readOp.gate_leakage + 371 bank.htree_out_data->power.readOp.gate_leakage; 372 373 if (dp.fully_assoc || dp.pure_cam) 374 { 375 leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; 376 leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; 377 } 378 379 380 if (dp.is_dram) 381 { // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power 382 refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir + 383 bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays; 384 refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays; 385 refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir; 386 refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; 387 refresh_power /= dp.dram_refresh_period; 388 } 389 390 391 if (dp.is_tag == false) 392 { 393 power.readOp.dynamic = dyn_read_energy_from_closed_page; 394 power.writeOp.dynamic = dyn_read_energy_from_closed_page 395 - dyn_read_energy_remaining_words_in_burst 396 - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir 397 + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir 398 + (power_routing_to_bank.writeOp.dynamic - 399 power_routing_to_bank.readOp.dynamic - 400 bank.htree_out_data->power.readOp.dynamic + 401 bank.htree_in_data->power.readOp.dynamic) * 402 (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME 403 404 if (dp.is_dram == false) 405 { 406 power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; 407 } 408 } 409 410 // if DRAM, add refresh power to total leakage 411 if (dp.is_dram) 412 { 413 power.readOp.leakage += refresh_power; 414 } 415 416 // TODO: below should be avoided. 417 /*if (dp.is_main_mem) 418 { 419 power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks; 420 }*/ 421 422 assert(power.readOp.dynamic > 0); 423 assert(power.writeOp.dynamic > 0); 424 assert(power.readOp.leakage > 0); 425} 426 427