mat.cc revision 10152:52c552138ba1
1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. 5 * All Rights Reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer; 11 * redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution; 14 * neither the name of the copyright holders nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 * 30 ***************************************************************************/ 31 32 33 34#include <cassert> 35 36#include "mat.h" 37 38Mat::Mat(const DynamicParameter & dyn_p) 39 :dp(dyn_p), 40 power_subarray_out_drv(), 41 delay_fa_tag(0), delay_cam(0), 42 delay_before_decoder(0), delay_bitline(0), 43 delay_wl_reset(0), delay_bl_restore(0), 44 delay_searchline(0), delay_matchchline(0), 45 delay_cam_sl_restore(0), delay_cam_ml_reset(0), 46 delay_fa_ram_wl(0),delay_hit_miss_reset(0), 47 delay_hit_miss(0), 48 subarray(dp, dp.fully_assoc), 49 power_bitline(), per_bitline_read_energy(0), 50 deg_bl_muxing(dp.deg_bl_muxing), 51 num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir), 52 delay_writeback(0), 53 cell(subarray.cell), cam_cell(subarray.cam_cell), 54 is_dram(dyn_p.is_dram), 55 pure_cam(dyn_p.pure_cam), 56 num_mats(dp.num_mats), 57 power_sa(), delay_sa(0), 58 leak_power_sense_amps_closed_page_state(0), 59 leak_power_sense_amps_open_page_state(0), 60 delay_subarray_out_drv(0), 61 delay_comparator(0), power_comparator(), 62 num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat), 63 num_subarrays_per_mat(dp.num_subarrays/dp.num_mats), 64 num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir) 65{ 66 assert(num_subarrays_per_mat <= 4); 67 assert(num_subarrays_per_row <= 2); 68 is_fa = (dp.fully_assoc) ? true : false; 69 camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them. 70 71 if (is_fa || pure_cam) 72 num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat; 73 74 if (dp.use_inp_params == 1) { 75 RWP = dp.num_rw_ports; 76 ERP = dp.num_rd_ports; 77 EWP = dp.num_wr_ports; 78 SCHP = dp.num_search_ports; 79 } 80 else { 81 RWP = g_ip->num_rw_ports; 82 ERP = g_ip->num_rd_ports; 83 EWP = g_ip->num_wr_ports; 84 SCHP = g_ip->num_search_ports; 85 86 } 87 88 double number_sa_subarray; 89 90 if (!is_fa && !pure_cam) 91 { 92 number_sa_subarray = subarray.num_cols / deg_bl_muxing; 93 } 94 else if (is_fa && !pure_cam) 95 { 96 number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing; 97 } 98 99 else 100 { 101 number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing; 102 } 103 104 int num_dec_signals = subarray.num_rows; 105 double C_ld_bit_mux_dec_out = 0; 106 double C_ld_sa_mux_lev_1_dec_out = 0; 107 double C_ld_sa_mux_lev_2_dec_out = 0; 108 double R_wire_wl_drv_out; 109 110 if (!is_fa && !pure_cam) 111 { 112 R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um; 113 } 114 else if (is_fa && !pure_cam) 115 { 116 R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ; 117 } 118 else 119 { 120 R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um; 121 } 122 123 double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA 124 double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w; 125 126 if (deg_bl_muxing > 1) 127 { 128 C_ld_bit_mux_dec_out = 129 (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell 130 num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); 131 } 132 133 if (dp.Ndsam_lev_1 > 1) 134 { 135 C_ld_sa_mux_lev_1_dec_out = 136 (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + 137 num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); 138 } 139 if (dp.Ndsam_lev_2 > 1) 140 { 141 C_ld_sa_mux_lev_2_dec_out = 142 (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + 143 num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); 144 } 145 146 if (num_subarrays_per_row >= 2) 147 { 148 // wire heads for both right and left side of a mat, so half the resistance 149 R_wire_bit_mux_dec_out /= 2.0; 150 R_wire_sa_mux_dec_out /= 2.0; 151 } 152 153 154 row_dec = new Decoder( 155 num_dec_signals, 156 false, 157 subarray.C_wl, 158 R_wire_wl_drv_out, 159 false/*is_fa*/, 160 is_dram, 161 true, 162 camFlag? cam_cell:cell); 163// if (is_fa && (!dp.is_tag)) 164// { 165// row_dec->exist = true; 166// } 167 bit_mux_dec = new Decoder( 168 deg_bl_muxing,// This number is 1 for FA or CAM 169 false, 170 C_ld_bit_mux_dec_out, 171 R_wire_bit_mux_dec_out, 172 false/*is_fa*/, 173 is_dram, 174 false, 175 camFlag? cam_cell:cell); 176 sa_mux_lev_1_dec = new Decoder( 177 dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM 178 dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal 179 C_ld_sa_mux_lev_1_dec_out, 180 R_wire_sa_mux_dec_out, 181 false/*is_fa*/, 182 is_dram, 183 false, 184 camFlag? cam_cell:cell); 185 sa_mux_lev_2_dec = new Decoder( 186 dp.Ndsam_lev_2, // This number is 1 for FA or CAM 187 false, 188 C_ld_sa_mux_lev_2_dec_out, 189 R_wire_sa_mux_dec_out, 190 false/*is_fa*/, 191 is_dram, 192 false, 193 camFlag? cam_cell:cell); 194 195 double C_wire_predec_blk_out; 196 double R_wire_predec_blk_out; 197 198 if (!is_fa && !pure_cam) 199 { 200 201 C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h; 202 R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h; 203 204 } 205 else //for pre-decode block's load is same for both FA and CAM 206 { 207 C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h; 208 R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h; 209 } 210 211 212 if (is_fa||pure_cam) 213 num_dec_signals += _log2(num_subarrays_per_mat); 214 215 PredecBlk * r_predec_blk1 = new PredecBlk( 216 num_dec_signals, 217 row_dec, 218 C_wire_predec_blk_out, 219 R_wire_predec_blk_out, 220 num_subarrays_per_mat, 221 is_dram, 222 true); 223 PredecBlk * r_predec_blk2 = new PredecBlk( 224 num_dec_signals, 225 row_dec, 226 C_wire_predec_blk_out, 227 R_wire_predec_blk_out, 228 num_subarrays_per_mat, 229 is_dram, 230 false); 231 PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true); 232 PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false); 233 PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true); 234 PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false); 235 PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true); 236 PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false); 237 dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true); 238 dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false); 239 240 PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram); 241 PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram); 242 PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram); 243 PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram); 244 PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram); 245 PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram); 246 PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram); 247 PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram); 248 way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram); 249 dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram); 250 251 r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2); 252 b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2); 253 sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2); 254 sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2); 255 256 subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng 257 258 double driver_c_gate_load; 259 double driver_c_wire_load; 260 double driver_r_wire_load; 261 262 if (is_fa || pure_cam) 263 264 { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same 265 driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); 266 driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; 267 driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; 268 cam_bl_precharge_eq_drv = new Driver( 269 driver_c_gate_load, 270 driver_c_wire_load, 271 driver_r_wire_load, 272 is_dram); 273 274 if (!pure_cam) 275 { 276 //This is only used for fully asso not pure CAM 277 driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); 278 driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um; 279 driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um; 280 bl_precharge_eq_drv = new Driver( 281 driver_c_gate_load, 282 driver_c_wire_load, 283 driver_r_wire_load, 284 is_dram); 285 } 286 } 287 288 else 289 { 290 driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); 291 driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um; 292 driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um; 293 bl_precharge_eq_drv = new Driver( 294 driver_c_gate_load, 295 driver_c_wire_load, 296 driver_r_wire_load, 297 is_dram); 298 } 299 double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP); 300 double w_row_decoder = area_row_decoder / subarray.area.get_h(); 301 302 double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux = 303 compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); 304 305 double h_subarray_out_drv = subarray_out_wire->area.get_area() * 306 (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w(); 307 308 309 h_subarray_out_drv *= (RWP + ERP + SCHP); 310 311 double h_comparators = 0.0; 312 double w_row_predecode_output_wires = 0.0; 313 double h_bit_mux_dec_out_wires = 0.0; 314 double h_senseamp_mux_dec_out_wires = 0.0; 315 316 if ((!is_fa)&&(dp.is_tag)) 317 { 318 //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat; 319 h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w()); 320 h_comparators *= (RWP + ERP); 321 } 322 323 324 int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits); 325 int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits); 326 w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) * 327 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); 328 329 330 double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) * 331 (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + 332 h_subarray_out_drv + h_comparators); 333 334 double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder); 335 336 if (deg_bl_muxing > 1) 337 { 338 h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); 339 } 340 if (dp.Ndsam_lev_1 > 1) 341 { 342 h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP); 343 } 344 if (dp.Ndsam_lev_2 > 1) 345 { 346 h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP); 347 } 348 349 double h_addr_datain_wires; 350 if (!g_ip->ver_htree_wires_over_array) 351 { 352 h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + 353 (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) * 354 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); 355 356 if (is_fa || pure_cam) 357 { 358 h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit 359 (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) * 360 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) + 361 (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP; 362 } 363 //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux + 364 //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv); 365 h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators + 366 h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) + 367 h_addr_datain_wires + 368 h_bit_mux_dec_out_wires + 369 h_senseamp_mux_dec_out_wires; 370 371 } 372 373 // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area; 374 double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() + 375 b_mux_predec_blk_drv1->area.get_area() + 376 sa_mux_lev_1_predec_blk_drv1->area.get_area() + 377 sa_mux_lev_2_predec_blk_drv1->area.get_area() + 378 way_sel_drv1->area.get_area() + 379 r_predec_blk_drv2->area.get_area() + 380 b_mux_predec_blk_drv2->area.get_area() + 381 sa_mux_lev_1_predec_blk_drv2->area.get_area() + 382 sa_mux_lev_2_predec_blk_drv2->area.get_area() + 383 r_predec_blk1->area.get_area() + 384 b_mux_predec_blk1->area.get_area() + 385 sa_mux_lev_1_predec_blk1->area.get_area() + 386 sa_mux_lev_2_predec_blk1->area.get_area() + 387 r_predec_blk2->area.get_area() + 388 b_mux_predec_blk2->area.get_area() + 389 sa_mux_lev_1_predec_blk2->area.get_area() + 390 sa_mux_lev_2_predec_blk2->area.get_area() + 391 bit_mux_dec->area.get_area() + 392 sa_mux_lev_1_dec->area.get_area() + 393 sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP); 394 395 double area_efficiency_mat; 396 397// if (!is_fa) 398// { 399 assert(num_subarrays_per_mat/num_subarrays_per_row>0); 400 area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area; 401 area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area; 402 area.w = (area.h*area.w + area_mat_center_circuitry) / area.h; 403 area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area(); 404 405// cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl; 406// cout<<"h_comparators"<<h_comparators<<endl; 407// cout<<"h_subarray_out_drv"<<h_subarray_out_drv<<endl; 408// cout<<"h_addr_datain_wires"<<h_addr_datain_wires<<endl; 409// cout<<"h_bit_mux_dec_out_wires"<<h_bit_mux_dec_out_wires<<endl; 410// cout<<"h_senseamp_mux_dec_out_wires"<<h_senseamp_mux_dec_out_wires<<endl; 411// cout<<"h_non_cell_area"<<h_non_cell_area<<endl; 412// cout<<"area.h =" << (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h<<endl; 413// cout<<"w_non_cell_area"<<w_non_cell_area<<endl; 414// cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl; 415 416 assert(area.h>0); 417 assert(area.w>0); 418// } 419// else 420// { 421// area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.get_h() + h_non_cell_area; 422// area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area; 423// area.w = (area.h*area.w + area_mat_center_circuitry) / area.h; 424// area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area(); 425// } 426 } 427 428 429 430Mat::~Mat() 431{ 432 delete row_dec; 433 delete bit_mux_dec; 434 delete sa_mux_lev_1_dec; 435 delete sa_mux_lev_2_dec; 436 437 delete r_predec->blk1; 438 delete r_predec->blk2; 439 delete b_mux_predec->blk1; 440 delete b_mux_predec->blk2; 441 delete sa_mux_lev_1_predec->blk1; 442 delete sa_mux_lev_1_predec->blk2; 443 delete sa_mux_lev_2_predec->blk1; 444 delete sa_mux_lev_2_predec->blk2; 445 delete dummy_way_sel_predec_blk1; 446 delete dummy_way_sel_predec_blk2; 447 448 delete r_predec->drv1; 449 delete r_predec->drv2; 450 delete b_mux_predec->drv1; 451 delete b_mux_predec->drv2; 452 delete sa_mux_lev_1_predec->drv1; 453 delete sa_mux_lev_1_predec->drv2; 454 delete sa_mux_lev_2_predec->drv1; 455 delete sa_mux_lev_2_predec->drv2; 456 delete way_sel_drv1; 457 delete dummy_way_sel_predec_blk_drv2; 458 459 delete r_predec; 460 delete b_mux_predec; 461 delete sa_mux_lev_1_predec; 462 delete sa_mux_lev_2_predec; 463 464 delete subarray_out_wire; 465 if (!pure_cam) 466 delete bl_precharge_eq_drv; 467 468 if (is_fa || pure_cam) 469 { 470 delete sl_precharge_eq_drv ; 471 delete sl_data_drv ; 472 delete cam_bl_precharge_eq_drv; 473 delete ml_precharge_drv; 474 delete ml_to_ram_wl_drv; 475 } 476} 477 478 479 480double Mat::compute_delays(double inrisetime) 481{ 482 int k; 483 double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl; 484 double outrisetime_search, outrisetime, row_dec_outrisetime; 485 // delay calculation for tags of fully associative cache 486 if (is_fa || pure_cam) 487 { 488 //Compute search access time 489 outrisetime_search = compute_cam_delay(inrisetime); 490 if (is_fa) 491 { 492 bl_precharge_eq_drv->compute_delay(0); 493 k = ml_to_ram_wl_drv->number_gates - 1; 494 rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true); 495 C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) + 496 drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true); 497 C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load; 498 tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2; 499 delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); 500 501 R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); 502 r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in 503 R_bl = subarray.num_rows * r_b_metal; 504 C_bl = subarray.C_bl; 505 delay_bl_restore = bl_precharge_eq_drv->delay + 506 log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))* 507 (R_bl_precharge * C_bl + R_bl * C_bl / 2); 508 509 510 outrisetime_search = compute_bitline_delay(outrisetime_search); 511 outrisetime_search = compute_sa_delay(outrisetime_search); 512 } 513 outrisetime_search = compute_subarray_out_drv(outrisetime_search); 514 subarray_out_wire->set_in_rise_time(outrisetime_search); 515 outrisetime_search = subarray_out_wire->signal_rise_time(); 516 delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; 517 518 519 //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited. 520 outrisetime = r_predec->compute_delays(inrisetime); 521 row_dec_outrisetime = row_dec->compute_delays(outrisetime); 522 523 outrisetime = b_mux_predec->compute_delays(inrisetime); 524 bit_mux_dec->compute_delays(outrisetime); 525 526 outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); 527 sa_mux_lev_1_dec->compute_delays(outrisetime); 528 529 outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); 530 sa_mux_lev_2_dec->compute_delays(outrisetime); 531 532 if (pure_cam) 533 { 534 outrisetime = compute_bitline_delay(row_dec_outrisetime); 535 outrisetime = compute_sa_delay(outrisetime); 536 } 537 return outrisetime_search; 538 } 539 else 540 { 541 bl_precharge_eq_drv->compute_delay(0); 542 if (row_dec->exist == true) 543 { 544 int k = row_dec->num_gates - 1; 545 double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true); 546 // TODO: this 4*cell.h number must be revisited 547 double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) + 548 drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true); 549 double C_ld = row_dec->C_ld_dec_out; 550 double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2; 551 delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); 552 } 553 double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); 554 double r_b_metal = cell.h * g_tp.wire_local.R_per_um; 555 double R_bl = subarray.num_rows * r_b_metal; 556 double C_bl = subarray.C_bl; 557 558 if (is_dram) 559 { 560 delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2); 561 } 562 else 563 { 564 delay_bl_restore = bl_precharge_eq_drv->delay + 565 log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))* 566 (R_bl_precharge * C_bl + R_bl * C_bl / 2); 567 } 568 } 569 570 571 572 outrisetime = r_predec->compute_delays(inrisetime); 573 row_dec_outrisetime = row_dec->compute_delays(outrisetime); 574 575 outrisetime = b_mux_predec->compute_delays(inrisetime); 576 bit_mux_dec->compute_delays(outrisetime); 577 578 outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); 579 sa_mux_lev_1_dec->compute_delays(outrisetime); 580 581 outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); 582 sa_mux_lev_2_dec->compute_delays(outrisetime); 583 584 outrisetime = compute_bitline_delay(row_dec_outrisetime); 585 outrisetime = compute_sa_delay(outrisetime); 586 outrisetime = compute_subarray_out_drv(outrisetime); 587 subarray_out_wire->set_in_rise_time(outrisetime); 588 outrisetime = subarray_out_wire->signal_rise_time(); 589 590 delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; 591 592 if (dp.is_tag == true && dp.fully_assoc == false) 593 { 594 compute_comparator_delay(0); 595 } 596 597 if (row_dec->exist == false) 598 { 599 delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay); 600 } 601 return outrisetime; 602} 603 604 605 606double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() 607{ 608 609 double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) + 610 compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP)); // precharge circuitry 611 612 if (deg_bl_muxing > 1) 613 { 614 height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP))); // col mux tr height 615 // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height 616 } 617 618 height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height 619 620 if (dp.Ndsam_lev_1 > 1) 621 { 622 height += compute_tr_width_after_folding( 623 g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height 624 //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); 625 } 626 627 if (dp.Ndsam_lev_2 > 1) 628 { 629 height += compute_tr_width_after_folding( 630 g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height 631 //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); 632 633 // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux 634 height += 2 * compute_tr_width_after_folding( 635 pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); 636 height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); 637 } 638 639 // TODO: this should be uncommented... 640 /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1) 641 { 642 //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP); 643 double width_write_driver_write_mux = width_write_driver_or_write_mux(); 644 double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux, 645 cell.w * 646 // deg_bl_muxing * 647 dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP)); 648 height += height_write_driver_write_mux; 649 }*/ 650 651 return height; 652} 653 654 655 656double Mat::compute_cam_delay(double inrisetime) 657{ 658 659 double out_time_ramp, this_delay; 660 double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load; 661 662 663 double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p, 664 Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp, 665 Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp, 666 Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p; 667 668 double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng; 669 int Htagbits; 670 671 double driver_c_gate_load; 672 double driver_c_wire_load; 673 double driver_r_wire_load; 674 //double searchline_precharge_time; 675 676 double leak_power_cc_inverters_sram_cell = 0; 677 double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0; 678 double leak_power_RD_port_sram_cell = 0; 679 double leak_power_SCHP_port_sram_cell = 0; 680 double leak_comparator_cam_cell =0; 681 682 double gate_leak_comparator_cam_cell = 0; 683 double gate_leak_power_cc_inverters_sram_cell = 0; 684 double gate_leak_power_RD_port_sram_cell = 0; 685 double gate_leak_power_SCHP_port_sram_cell = 0; 686 687 c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um; 688 c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um; 689 r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um; 690 r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um; 691 692 dynSearchEng = 0.0; 693 delay_matchchline = 0.0; 694 double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram); 695 bool linear_scaling = false; 696 697 if (linear_scaling) 698 { 699 Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process 700 Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process 701 Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 702 Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process 703 Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process 704 Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process 705 Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process 706 Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 707 Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process 708 Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process 709 Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 710 Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process 711 Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process 712 Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 713 Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process 714 Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process 715 Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process 716 Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 717 Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process 718 719 Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 720 Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 721 Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process 722 Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process 723 Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 724 Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 725 Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process 726 Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 727 W_hit_miss_n = Wdummyn; 728 W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; 729 //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort 730 } 731 else 732 { 733 Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process 734 Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process 735 Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 736 Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process 737 Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process 738 Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process 739 Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process 740 Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 741 Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process 742 Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process 743 Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 744 Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process 745 Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process 746 Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 747 Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process 748 Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process 749 Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process 750 Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 751 Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process 752 753 Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process 754 Wdummyn = g_tp.cam.cell_nmos_w; 755 Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process 756 Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process 757 Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 758 Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 759 Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process 760 Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 761 W_hit_miss_n = Wdummyn; 762 W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; 763 } 764 765 Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0)); 766 767 /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators. 768 search_line_delay, search_line_power, search_line_restore_delay for cycle time computation. 769 From the driver(am and an) to the comparators in all the rows including the dummy row, 770 Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */ 771 772 //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports 773 //Searchline precharge routes horizontally 774 driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); 775 driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; 776 driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; 777 778 sl_precharge_eq_drv = new Driver( 779 driver_c_gate_load, 780 driver_c_wire_load, 781 driver_r_wire_load, 782 is_dram); 783 784 //searchline data driver ; subarray.num_rows + 1 is because of the dummy row 785 //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines 786 driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false); 787 driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; 788 driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; 789 sl_data_drv = new Driver( 790 driver_c_gate_load, 791 driver_c_wire_load, 792 driver_r_wire_load, 793 is_dram); 794 795 sl_precharge_eq_drv->compute_delay(0); 796 double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr 797 double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um; 798 double R_bl = (subarray.num_rows + 1) * r_b_metal; 799 double C_bl = subarray.C_bl_cam; 800 delay_cam_sl_restore = sl_precharge_eq_drv->delay 801 + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2); 802 803 out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside) 804 805 //matchline ops delay 806 delay_matchchline += sl_data_drv->delay; 807 808 /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/ 809 //matchline delay, matchline power, matchline_reset for cycle time computation, 810 811 ////matchline precharge circuitry routes vertically 812 //There are two matchline precharge driver chains per subarray. 813 driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram); 814 driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; 815 driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; 816 817 ml_precharge_drv = new Driver( 818 driver_c_gate_load, 819 driver_c_wire_load, 820 driver_r_wire_load, 821 is_dram); 822 823 ml_precharge_drv->compute_delay(0); 824 825 826 rd = tr_R_on(Wdummyn, NCH, 2, is_dram); 827 c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit 828 + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline 829 830 Cwire = c_matchline_metal * Htagbits; 831 Rwire = r_matchline_metal * Htagbits; 832 c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram); 833 834 double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram); 835 //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; 836 double R_ml = Rwire; 837 double C_ml = Cwire + c_intrinsic; 838 delay_cam_ml_reset = ml_precharge_drv->delay 839 + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too 840 841 //matchline ops delay 842 tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); 843 this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL); 844 delay_matchchline += this_delay; 845 out_time_ramp = this_delay / VTHFA3; 846 847 dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise 848 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves 849 850 /* third stage, from the NAND2 gates to the drivers in the dummy row */ 851 rd = tr_R_on(Waddrnandn, NCH, 2, is_dram); 852 c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + 853 drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2; 854 c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram); 855 tf = rd * (c_intrinsic + c_gate_load); 856 this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE); 857 out_time_ramp = this_delay / (1 - VTHFA4); 858 delay_matchchline += this_delay; 859 860 //only the dummy row has the extra inverter between NAND and NOR gates 861 dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl; 862 863 /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */ 864 rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram); 865 c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram); 866 Cwire = c_matchline_metal * Htagbits + c_searchline_metal * (subarray.num_rows+1)/2; 867 Rwire = r_matchline_metal * Htagbits + r_searchline_metal * (subarray.num_rows+1)/2; 868 c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram); 869 tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); 870 this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL); 871 out_time_ramp = this_delay / VTHFA5; 872 delay_matchchline += this_delay; 873 874 dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; 875 876 /*final statge from the NOR gate to drive the wordline of the data portion */ 877 878 //searchline data driver There are two matchline precharge driver chains per subarray. 879 driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic 880 driver_c_wire_load = subarray.C_wl_ram; 881 driver_r_wire_load = subarray.R_wl_ram; 882 883 ml_to_ram_wl_drv = new Driver( 884 driver_c_gate_load, 885 driver_c_wire_load, 886 driver_r_wire_load, 887 is_dram); 888 889 890 891 rd = tr_R_on(Wfanorn, NCH, 1, is_dram); 892 c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram); 893 c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram); 894 tf = rd * (c_intrinsic + c_gate_load); 895 this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE); 896 out_time_ramp = this_delay / (1-0.5); 897 delay_matchchline += this_delay; 898 899 out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp); 900 901 //c_gate_load energy is computed in ml_to_ram_wl_drv 902 dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; 903 904 905 /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/ 906 /*Precharge the hitting logic */ 907 c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram); 908 Cwire = c_searchline_metal * subarray.num_rows; 909 Rwire = r_searchline_metal * subarray.num_rows; 910 c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows; 911 912 rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false); 913 //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; 914 double R_hit_miss = Rwire; 915 double C_hit_miss = Cwire + c_intrinsic; 916 delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2); 917 dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 918 919 /*hitting logic evaluation */ 920 c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram); 921 Cwire = c_searchline_metal * subarray.num_rows; 922 Rwire = r_searchline_metal * subarray.num_rows; 923 c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows; 924 925 rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false); 926 tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); 927 928 delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL); 929 930 if (is_fa) 931 delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss); 932 933 dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 934 935 /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/ 936 937 power_matchline.searchOp.dynamic = dynSearchEng; 938 939 //leakage in one subarray 940 double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2? 941 double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true); 942 double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2; 943 double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv 944 945 leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd; 946 leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd; 947 leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd; 948 leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd; 949 leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports 950 951 power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell + 952 leak_comparator_cam_cell + 953 leak_power_acc_tr_RW_or_WR_port_sram_cell + 954 leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + 955 leak_power_RD_port_sram_cell * ERP + 956 leak_power_SCHP_port_sram_cell*SCHP; 957// power_matchline.searchOp.leakage += leak_comparator_cam_cell; 958 power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise 959 power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd; 960 power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd; 961 power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd; 962 //In idle states, the hit/miss txs are closed (on) therefore no Isub 963 power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+ 964 // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd; 965 966 //in idle state, Ig_on only possibly exist in access transistors of read only ports 967 double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true); 968 double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2; 969 double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2; 970 971 gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd; 972 gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd; 973 gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd; 974 gate_leak_power_SCHP_port_sram_cell = 0; 975 976 //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl; 977 978 power_matchline.searchOp.gate_leakage += gate_leak_power_cc_inverters_sram_cell; 979 power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell; 980 power_matchline.searchOp.gate_leakage += gate_leak_power_SCHP_port_sram_cell*SCHP + gate_leak_power_RD_port_sram_cell * ERP; 981 power_matchline.searchOp.gate_leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise 982 power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(0, Wfaprechp,1, pmos) * g_tp.cam_cell.Vdd; 983 power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd; 984 power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd; 985 power_matchline.searchOp.gate_leakage += subarray.num_rows * cmos_Ig_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+ 986 + cmos_Ig_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd; 987 988 989 return out_time_ramp; 990} 991 992 993double Mat::width_write_driver_or_write_mux() 994{ 995 // calculate resistance of SRAM cell pull-up PMOS transistor 996 // cam and sram have same cell trasistor properties 997 double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true); 998 double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true); 999 double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2; 1000 double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram); 1001 1002 return width_write_driver_nmos; 1003} 1004 1005 1006 1007double Mat::compute_comparators_height( 1008 int tagbits, 1009 int number_ways_in_mat, 1010 double subarray_mem_cell_area_width) 1011{ 1012 double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def); 1013 double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4; 1014 return cumulative_area / subarray_mem_cell_area_width; 1015} 1016 1017 1018 1019double Mat::compute_bitline_delay(double inrisetime) 1020{ 1021 double V_b_pre, v_th_mem_cell, V_wl; 1022 double tstep; 1023 double dynRdEnergy = 0.0, dynWriteEnergy = 0.0; 1024 double R_cell_pull_down=0.0, R_cell_acc =0.0, r_dev=0.0; 1025 int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2; 1026 1027 double R_b_metal = camFlag? cam_cell.h:cell.h * g_tp.wire_local.R_per_um; 1028 double R_bl = subarray.num_rows * R_b_metal; 1029 double C_bl = subarray.C_bl; 1030 1031 // TODO: no leakage for DRAMs? 1032 double leak_power_cc_inverters_sram_cell = 0; 1033 double gate_leak_power_cc_inverters_sram_cell = 0; 1034 double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0; 1035 double leak_power_RD_port_sram_cell = 0; 1036 double gate_leak_power_RD_port_sram_cell = 0; 1037 1038 if (is_dram == true) 1039 { 1040 V_b_pre = g_tp.dram.Vbitpre; 1041 v_th_mem_cell = g_tp.dram_acc.Vth; 1042 V_wl = g_tp.vpp; 1043 //The access transistor is not folded. So we just need to specify a threshold value for the 1044 //folding width that is equal to or greater than Wmemcella. 1045 R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true); 1046 r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2; 1047 } 1048 else 1049 { //SRAM 1050 V_b_pre = g_tp.sram.Vbitpre; 1051 v_th_mem_cell = g_tp.sram_cell.Vth; 1052 V_wl = g_tp.sram_cell.Vdd; 1053 R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true); 1054 R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true); 1055 1056 //Leakage current of an SRAM cell 1057 double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);//TODO: how much is the idle time? just by *2? 1058 double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,false, true); 1059 double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true)*2;//two invs per cell 1060 1061 leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd; 1062 leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd; 1063 leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd; 1064 1065 1066 //in idle state, Ig_on only possibly exist in access transistors of read only ports 1067 double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true); 1068 double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true); 1069 1070 gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd; 1071 gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd; 1072 } 1073 1074 1075 double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram); 1076 double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram); 1077 double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); 1078 double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram); 1079 double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) + 1080 drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + 1081 drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); 1082 double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); 1083 1084 if (is_dram) 1085 { 1086 double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl)); 1087 tstep = 2.3 * fraction * r_dev * 1088 (g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) / 1089 (g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)); 1090 delay_writeback = tstep; 1091 dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * 1092 (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/; 1093 dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) * 1094 (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100; 1095 per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * 1096 (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd; 1097 } 1098 else 1099 { 1100 double tau; 1101 1102 if (deg_bl_muxing > 1) 1103 { 1104 tau = (R_cell_pull_down + R_cell_acc) * 1105 (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + 1106 R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + 1107 R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + 1108 R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); 1109 dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* 1110 subarray.num_cols * num_subarrays_per_mat*/; 1111 dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * 1112 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing); 1113 dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) * 1114 num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2; 1115 //Write Ops are differential for SRAM 1116 } 1117 else 1118 { 1119 tau = (R_cell_pull_down + R_cell_acc) * 1120 (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 + 1121 R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); 1122 dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * 1123 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/; 1124 dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) * 1125 num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2; 1126 1127 } 1128 tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense)); 1129 power_bitline.readOp.leakage = 1130 leak_power_cc_inverters_sram_cell + 1131 leak_power_acc_tr_RW_or_WR_port_sram_cell + 1132 leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + 1133 leak_power_RD_port_sram_cell * ERP; 1134 power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell + 1135 gate_leak_power_RD_port_sram_cell * ERP; 1136 1137 } 1138 1139// cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl; 1140// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl; 1141// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl; 1142// cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl; 1143 1144 1145 /* take input rise time into account */ 1146 double m = V_wl / inrisetime; 1147 if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m)) 1148 { 1149 delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell)/ m); 1150 } 1151 else 1152 { 1153 delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m); 1154 } 1155 1156 bool is_fa = (dp.fully_assoc) ? true : false; 1157 1158 if (dp.is_tag == false || is_fa == false) 1159 { 1160 power_bitline.readOp.dynamic = dynRdEnergy; 1161 power_bitline.writeOp.dynamic = dynWriteEnergy; 1162 } 1163 1164 double outrisetime = 0; 1165 return outrisetime; 1166} 1167 1168 1169 1170double Mat::compute_sa_delay(double inrisetime) 1171{ 1172 //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray 1173 1174 //Bitline circuitry leakage. 1175 double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram); 1176 double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram); 1177 double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram); 1178 double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram); 1179 1180 double lkgIdlePh = IsenseEn;//+ 2*IoBufP; 1181 //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch; 1182 double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ; 1183 //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir + 1184 // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir); 1185 double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/; 1186 leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/; 1187 leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/; 1188 1189 // sense amplifier has to drive logic in "data out driver" and sense precharge load. 1190 // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time 1191 //constant as well as the magnitude of input differential voltage. 1192 double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) + 1193 drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + 1194 drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + 1195 drain_C_(g_tp.w_iso,PCH,1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + 1196 drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); 1197 double tau = C_ld / g_tp.gm_sense_amp_latch; 1198 delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense); 1199 power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray 1200 num_subarrays_per_mat * num_act_mats_hor_dir*/; 1201 power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd; 1202 1203 double outrisetime = 0; 1204 return outrisetime; 1205} 1206 1207 1208 1209double Mat::compute_subarray_out_drv(double inrisetime) 1210{ 1211 double C_ld, rd, tf, this_delay; 1212 double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram); 1213 1214 // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer. 1215 rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram); 1216 C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + 1217 gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram); 1218 tf = rd * C_ld; 1219 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 1220 delay_subarray_out_drv += this_delay; 1221 inrisetime = this_delay/(1.0 - 0.5); 1222 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 1223 power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 1224 power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd; 1225 // delay of signal through inverter-buffer to second level of sense-amp mux. 1226 // internal delay of buffer 1227 rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram); 1228 C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) + 1229 drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) + 1230 gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram); 1231 tf = rd * C_ld; 1232 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 1233 delay_subarray_out_drv += this_delay; 1234 inrisetime = this_delay/(1.0 - 0.5); 1235 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 1236 power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram)* g_tp.peri_global.Vdd; 1237 power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd; 1238 1239 // inverter driving drain of pass transistor of second level of sense-amp mux. 1240 rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram); 1241 C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) + 1242 drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) + 1243 drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram); 1244 tf = rd * C_ld; 1245 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 1246 delay_subarray_out_drv += this_delay; 1247 inrisetime = this_delay/(1.0 - 0.5); 1248 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 1249 power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd; 1250 power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd; 1251 1252 1253 // delay of signal through pass-transistor to input of subarray output driver. 1254 rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram); 1255 C_ld = dp.Ndsam_lev_2 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram) + 1256 //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); 1257 gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); 1258 tf = rd * C_ld; 1259 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 1260 delay_subarray_out_drv += this_delay; 1261 inrisetime = this_delay/(1.0 - 0.5); 1262 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 1263 power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 1264 power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd; 1265 1266 1267 return inrisetime; 1268} 1269 1270 1271 1272double Mat::compute_comparator_delay(double inrisetime) 1273{ 1274 int A = g_ip->tag_assoc; 1275 1276 int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already 1277 // a multiple of 4. 1278 1279 /* First Inverter */ 1280 double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) + 1281 drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) + 1282 drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram); 1283 double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram); 1284 double tf = Req*Ceq; 1285 double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL); 1286 double nextinputtime = st1del/VTHCOMPINV; 1287 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; 1288 1289 //For each degree of associativity 1290 //there are 4 such quarter comparators 1291 double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A; 1292 double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A; 1293 /* Second Inverter */ 1294 Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) + 1295 drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) + 1296 drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram); 1297 Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram); 1298 tf = Req*Ceq; 1299 double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE); 1300 nextinputtime = st2del/(1.0-VTHCOMPINV); 1301 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; 1302 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A; 1303 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A; 1304 1305 /* Third Inverter */ 1306 Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) + 1307 drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) + 1308 drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram); 1309 Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram); 1310 tf = Req*Ceq; 1311 double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL); 1312 nextinputtime = st3del/(VTHEVALINV); 1313 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; 1314 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A; 1315 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A; 1316 1317 /* Final Inverter (virtual ground driver) discharging compare part */ 1318 double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram); 1319 double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */ 1320 double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) + 1321 drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) + 1322 drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) + 1323 drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram); 1324 double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) + 1325 drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) + 1326 drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) + 1327 gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram); 1328 power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; 1329 power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1); 1330 lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A; 1331 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2 1332 1333 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A; 1334 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter 1335 1336 /* time to go to threshold of mux driver */ 1337 double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND); 1338 /* take into account non-zero input rise time */ 1339 double m = g_tp.peri_global.Vdd/nextinputtime; 1340 double Tcomparatorni; 1341 1342 if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m)) 1343 { 1344 double a = m; 1345 double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth); 1346 double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth); 1347 Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a); 1348 } 1349 else 1350 { 1351 Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m; 1352 } 1353 delay_comparator = Tcomparatorni+st1del+st2del+st3del; 1354 power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd; 1355 power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd; 1356 1357 return Tcomparatorni / (1.0 - VTHMUXNAND);; 1358} 1359 1360 1361 1362void Mat::compute_power_energy() 1363{ 1364 //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power 1365 //when search all subarrays and all mats are fully active 1366 //when plain read/write only one subarray in a single mat is active. 1367 1368 // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat. 1369 power.readOp.dynamic += r_predec->power.readOp.dynamic + 1370 b_mux_predec->power.readOp.dynamic + 1371 sa_mux_lev_1_predec->power.readOp.dynamic + 1372 sa_mux_lev_2_predec->power.readOp.dynamic; 1373 1374 // add energy consumed in decoders 1375 power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic; 1376 if (!(is_fa||pure_cam)) 1377 power_row_decoders.readOp.dynamic *= num_subarrays_per_mat; 1378 1379 // add energy consumed in bitline prechagers, SAs, and bitlines 1380 if (!(is_fa||pure_cam)) 1381 { 1382 // add energy consumed in bitline prechagers 1383 power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; 1384 power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; 1385 1386 //Add sense amps energy 1387 num_sa_subarray = subarray.num_cols / deg_bl_muxing; 1388 power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ; 1389 1390 // add energy consumed in bitlines 1391 //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl; 1392 power_bitline.readOp.dynamic *= num_subarrays_per_mat*subarray.num_cols; 1393 power_bitline.writeOp.dynamic *= num_subarrays_per_mat*subarray.num_cols; 1394 //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl; 1395 //Add subarray output energy 1396 power_subarray_out_drv.readOp.dynamic = 1397 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; 1398 1399 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + 1400 power_sa.readOp.dynamic + 1401 power_bitline.readOp.dynamic + 1402 power_subarray_out_drv.readOp.dynamic; 1403 1404 power.readOp.dynamic += power_row_decoders.readOp.dynamic + 1405 bit_mux_dec->power.readOp.dynamic + 1406 sa_mux_lev_1_dec->power.readOp.dynamic + 1407 sa_mux_lev_2_dec->power.readOp.dynamic + 1408 power_comparator.readOp.dynamic; 1409 } 1410 1411 else if (is_fa) 1412 { 1413 //for plain read/write only one subarray in a mat is active 1414 // add energy consumed in bitline prechagers 1415 power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic 1416 + cam_bl_precharge_eq_drv->power.readOp.dynamic; 1417 power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; 1418 1419 //Add sense amps energy 1420 num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing; 1421 num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing; 1422 power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search; 1423 power_sa.readOp.dynamic *= num_sa_subarray; 1424 1425 1426 // add energy consumed in bitlines 1427 power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic; 1428 power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram); 1429 power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram); 1430 power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram; 1431 1432 //Add subarray output energy 1433 power_subarray_out_drv.searchOp.dynamic = 1434 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; 1435 power_subarray_out_drv.readOp.dynamic = 1436 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; 1437 1438 1439 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + 1440 power_sa.readOp.dynamic + 1441 power_bitline.readOp.dynamic + 1442 power_subarray_out_drv.readOp.dynamic; 1443 1444 power.readOp.dynamic += power_row_decoders.readOp.dynamic + 1445 bit_mux_dec->power.readOp.dynamic + 1446 sa_mux_lev_1_dec->power.readOp.dynamic + 1447 sa_mux_lev_2_dec->power.readOp.dynamic + 1448 power_comparator.readOp.dynamic; 1449 1450 //add energy consumed inside cam 1451 power_matchline.searchOp.dynamic *= num_subarrays_per_mat; 1452 power_searchline_precharge = sl_precharge_eq_drv->power; 1453 power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; 1454 power_searchline = sl_data_drv->power; 1455 power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;; 1456 power_matchline_precharge = ml_precharge_drv->power; 1457 power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat; 1458 power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power; 1459 power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic; 1460 1461 power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; 1462 power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic; 1463 power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic; 1464 power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic; 1465 1466 power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; 1467 //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; 1468 1469 } 1470 else 1471 { 1472 // add energy consumed in bitline prechagers 1473 power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; 1474 //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; 1475 //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; 1476 //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat; 1477 1478 //Add sense amps energy 1479 num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing; 1480 power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat; 1481 power_sa.searchOp.dynamic = 0; 1482 1483 power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam; 1484 power_bitline.searchOp.dynamic = 0; 1485 power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam; 1486 1487 power_subarray_out_drv.searchOp.dynamic = 1488 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; 1489 power_subarray_out_drv.readOp.dynamic = 1490 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; 1491 1492 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + 1493 power_sa.readOp.dynamic + 1494 power_bitline.readOp.dynamic + 1495 power_subarray_out_drv.readOp.dynamic; 1496 1497 power.readOp.dynamic += power_row_decoders.readOp.dynamic + 1498 bit_mux_dec->power.readOp.dynamic + 1499 sa_mux_lev_1_dec->power.readOp.dynamic + 1500 sa_mux_lev_2_dec->power.readOp.dynamic + 1501 power_comparator.readOp.dynamic; 1502 1503 1504 ////add energy consumed inside cam 1505 power_matchline.searchOp.dynamic *= num_subarrays_per_mat; 1506 power_searchline_precharge = sl_precharge_eq_drv->power; 1507 power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; 1508 power_searchline = sl_data_drv->power; 1509 power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;; 1510 power_matchline_precharge = ml_precharge_drv->power; 1511 power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat; 1512 power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power; 1513 power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic; 1514 1515 power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; 1516 power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic; 1517 power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic; 1518 power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic; 1519 1520 power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; 1521 //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; 1522 1523 } 1524 1525 1526 1527 // calculate leakage power 1528 if (!(is_fa || pure_cam)) 1529 { 1530 int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); 1531 1532 power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; 1533 power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; 1534 power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP); 1535 1536 //num_sa_subarray = subarray.num_cols / deg_bl_muxing; 1537 power_subarray_out_drv.readOp.leakage = 1538 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * 1539 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); 1540 1541 power.readOp.leakage += power_bitline.readOp.leakage + 1542 power_bl_precharge_eq_drv.readOp.leakage + 1543 power_sa.readOp.leakage + 1544 power_subarray_out_drv.readOp.leakage; 1545 //cout<<"leakage"<<power.readOp.leakage<<endl; 1546 1547 power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP); 1548 power.readOp.leakage += power_comparator.readOp.leakage; 1549 1550 //cout<<"leakage1"<<power.readOp.leakage<<endl; 1551 1552 // leakage power 1553 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; 1554 power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing; 1555 power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1; 1556 power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2; 1557 1558 power.readOp.leakage += r_predec->power.readOp.leakage + 1559 b_mux_predec->power.readOp.leakage + 1560 sa_mux_lev_1_predec->power.readOp.leakage + 1561 sa_mux_lev_2_predec->power.readOp.leakage + 1562 power_row_decoders.readOp.leakage + 1563 power_bit_mux_decoders.readOp.leakage + 1564 power_sa_mux_lev_1_decoders.readOp.leakage + 1565 power_sa_mux_lev_2_decoders.readOp.leakage; 1566 //cout<<"leakage2"<<power.readOp.leakage<<endl; 1567 1568 //++++Below is gate leakage 1569 power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; 1570 power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; 1571 power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP); 1572 1573 //num_sa_subarray = subarray.num_cols / deg_bl_muxing; 1574 power_subarray_out_drv.readOp.gate_leakage = 1575 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * 1576 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); 1577 1578 power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + 1579 power_bl_precharge_eq_drv.readOp.gate_leakage + 1580 power_sa.readOp.gate_leakage + 1581 power_subarray_out_drv.readOp.gate_leakage; 1582 //cout<<"leakage"<<power.readOp.leakage<<endl; 1583 1584 power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP); 1585 power.readOp.gate_leakage += power_comparator.readOp.gate_leakage; 1586 1587 //cout<<"leakage1"<<power.readOp.gate_leakage<<endl; 1588 1589 // gate_leakage power 1590 power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; 1591 power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing; 1592 power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1; 1593 power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2; 1594 1595 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + 1596 b_mux_predec->power.readOp.gate_leakage + 1597 sa_mux_lev_1_predec->power.readOp.gate_leakage + 1598 sa_mux_lev_2_predec->power.readOp.gate_leakage + 1599 power_row_decoders.readOp.gate_leakage + 1600 power_bit_mux_decoders.readOp.gate_leakage + 1601 power_sa_mux_lev_1_decoders.readOp.gate_leakage + 1602 power_sa_mux_lev_2_decoders.readOp.gate_leakage; 1603 } 1604 else if (is_fa) 1605 { 1606 int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); 1607 1608 power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; 1609 power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; 1610 power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; 1611 power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); 1612 1613 //cout<<"leakage3"<<power.readOp.leakage<<endl; 1614 1615 1616 power_subarray_out_drv.readOp.leakage = 1617 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * 1618 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); 1619 1620 power.readOp.leakage += power_bitline.readOp.leakage + 1621 power_bl_precharge_eq_drv.readOp.leakage + 1622 power_bl_precharge_eq_drv.searchOp.leakage + 1623 power_sa.readOp.leakage + 1624 power_subarray_out_drv.readOp.leakage; 1625 1626 //cout<<"leakage4"<<power.readOp.leakage<<endl; 1627 1628 // leakage power 1629 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; 1630 power.readOp.leakage += r_predec->power.readOp.leakage + 1631 power_row_decoders.readOp.leakage; 1632 1633 //cout<<"leakage5"<<power.readOp.leakage<<endl; 1634 1635 //inside cam 1636 power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; 1637 power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage; 1638 power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam; 1639 power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic; 1640 power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; 1641 1642 power.readOp.leakage += power_cam_all_active.searchOp.leakage; 1643 1644// cout<<"leakage6"<<power.readOp.leakage<<endl; 1645 1646 //+++Below is gate leakage 1647 power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; 1648 power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; 1649 power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; 1650 power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); 1651 1652 //cout<<"leakage3"<<power.readOp.gate_leakage<<endl; 1653 1654 1655 power_subarray_out_drv.readOp.gate_leakage = 1656 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * 1657 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); 1658 1659 power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + 1660 power_bl_precharge_eq_drv.readOp.gate_leakage + 1661 power_bl_precharge_eq_drv.searchOp.gate_leakage + 1662 power_sa.readOp.gate_leakage + 1663 power_subarray_out_drv.readOp.gate_leakage; 1664 1665 //cout<<"leakage4"<<power.readOp.gate_leakage<<endl; 1666 1667 // gate_leakage power 1668 power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; 1669 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + 1670 power_row_decoders.readOp.gate_leakage; 1671 1672 //cout<<"leakage5"<<power.readOp.gate_leakage<<endl; 1673 1674 //inside cam 1675 power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage; 1676 power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage; 1677 power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam; 1678 power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic; 1679 power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; 1680 1681 power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; 1682 1683 } 1684 else 1685 { 1686 int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); 1687 1688 //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; 1689 //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; 1690 power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; 1691 power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); 1692 1693 1694 power_subarray_out_drv.readOp.leakage = 1695 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * 1696 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); 1697 1698 power.readOp.leakage += //power_bitline.readOp.leakage + 1699 //power_bl_precharge_eq_drv.readOp.leakage + 1700 power_bl_precharge_eq_drv.searchOp.leakage + 1701 power_sa.readOp.leakage + 1702 power_subarray_out_drv.readOp.leakage; 1703 1704 // leakage power 1705 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP); 1706 power.readOp.leakage += r_predec->power.readOp.leakage + 1707 power_row_decoders.readOp.leakage; 1708 1709 //inside cam 1710 power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; 1711 power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage; 1712 power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam; 1713 power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic; 1714 power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; 1715 1716 power.readOp.leakage += power_cam_all_active.searchOp.leakage; 1717 1718 //+++Below is gate leakage 1719 power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; 1720 power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); 1721 1722 1723 power_subarray_out_drv.readOp.gate_leakage = 1724 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * 1725 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); 1726 1727 power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage + 1728 //power_bl_precharge_eq_drv.readOp.gate_leakage + 1729 power_bl_precharge_eq_drv.searchOp.gate_leakage + 1730 power_sa.readOp.gate_leakage + 1731 power_subarray_out_drv.readOp.gate_leakage; 1732 1733 // gate_leakage power 1734 power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP); 1735 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + 1736 power_row_decoders.readOp.gate_leakage; 1737 1738 //inside cam 1739 power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage; 1740 power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage; 1741 power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam; 1742 power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic; 1743 power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; 1744 1745 power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; 1746 } 1747} 1748 1749