1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. 5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. 6 * All Rights Reserved 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are 10 * met: redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer; 12 * redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution; 15 * neither the name of the copyright holders nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 ***************************************************************************/ 32 33 34 35#include <cassert> 36 37#include "mat.h" 38 39Mat::Mat(const DynamicParameter & dyn_p) 40 : dp(dyn_p), 41 power_subarray_out_drv(), 42 delay_fa_tag(0), delay_cam(0), 43 delay_before_decoder(0), delay_bitline(0), 44 delay_wl_reset(0), delay_bl_restore(0), 45 delay_searchline(0), delay_matchchline(0), 46 delay_cam_sl_restore(0), delay_cam_ml_reset(0), 47 delay_fa_ram_wl(0), delay_hit_miss_reset(0), 48 delay_hit_miss(0), 49 subarray(dp, dp.fully_assoc), 50 power_bitline(), per_bitline_read_energy(0), 51 deg_bl_muxing(dp.deg_bl_muxing), 52 num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir), 53 delay_writeback(0), 54 cell(subarray.cell), cam_cell(subarray.cam_cell), 55 is_dram(dyn_p.is_dram), 56 pure_cam(dyn_p.pure_cam), 57 num_mats(dp.num_mats), 58 power_sa(), delay_sa(0), 59 leak_power_sense_amps_closed_page_state(0), 60 leak_power_sense_amps_open_page_state(0), 61 delay_subarray_out_drv(0), 62 delay_comparator(0), power_comparator(), 63 num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat), 64 num_subarrays_per_mat(dp.num_subarrays / dp.num_mats), 65 num_subarrays_per_row(dp.Ndwl / dp.num_mats_h_dir) { 66 assert(num_subarrays_per_mat <= 4); 67 assert(num_subarrays_per_row <= 2); 68 is_fa = (dp.fully_assoc) ? true : false; 69 camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them. 70 71 if (is_fa || pure_cam) { 72 num_subarrays_per_row = num_subarrays_per_mat > 2 ? 73 num_subarrays_per_mat / 2 : num_subarrays_per_mat; 74 } 75 76 if (dp.use_inp_params == 1) { 77 RWP = dp.num_rw_ports; 78 ERP = dp.num_rd_ports; 79 EWP = dp.num_wr_ports; 80 SCHP = dp.num_search_ports; 81 } else { 82 RWP = g_ip->num_rw_ports; 83 ERP = g_ip->num_rd_ports; 84 EWP = g_ip->num_wr_ports; 85 SCHP = g_ip->num_search_ports; 86 87 } 88 89 double number_sa_subarray; 90 91 if (!is_fa && !pure_cam) { 92 number_sa_subarray = subarray.num_cols / deg_bl_muxing; 93 } else if (is_fa && !pure_cam) { 94 number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing; 95 } 96 97 else { 98 number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing; 99 } 100 101 int num_dec_signals = subarray.num_rows; 102 double C_ld_bit_mux_dec_out = 0; 103 double C_ld_sa_mux_lev_1_dec_out = 0; 104 double C_ld_sa_mux_lev_2_dec_out = 0; 105 double R_wire_wl_drv_out; 106 107 if (!is_fa && !pure_cam) { 108 R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um; 109 } else if (is_fa && !pure_cam) { 110 R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ; 111 } else { 112 R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um; 113 } 114 115 double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA 116 double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w; 117 118 if (deg_bl_muxing > 1) { 119 C_ld_bit_mux_dec_out = 120 (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing) * 121 gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell 122 num_subarrays_per_row * subarray.num_cols * 123 g_tp.wire_inside_mat.C_per_um * cell.get_w(); 124 } 125 126 if (dp.Ndsam_lev_1 > 1) { 127 C_ld_sa_mux_lev_1_dec_out = 128 (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1) * 129 gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + 130 num_subarrays_per_row * subarray.num_cols * 131 g_tp.wire_inside_mat.C_per_um * cell.get_w(); 132 } 133 if (dp.Ndsam_lev_2 > 1) { 134 C_ld_sa_mux_lev_2_dec_out = 135 (num_subarrays_per_mat * number_sa_subarray / 136 (dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) * 137 gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + 138 num_subarrays_per_row * subarray.num_cols * 139 g_tp.wire_inside_mat.C_per_um * cell.get_w(); 140 } 141 142 if (num_subarrays_per_row >= 2) { 143 // wire heads for both right and left side of a mat, so half the resistance 144 R_wire_bit_mux_dec_out /= 2.0; 145 R_wire_sa_mux_dec_out /= 2.0; 146 } 147 148 149 row_dec = new Decoder( 150 num_dec_signals, 151 false, 152 subarray.C_wl, 153 R_wire_wl_drv_out, 154 false/*is_fa*/, 155 is_dram, 156 true, 157 camFlag ? cam_cell : cell); 158// if (is_fa && (!dp.is_tag)) 159// { 160// row_dec->exist = true; 161// } 162 bit_mux_dec = new Decoder( 163 deg_bl_muxing,// This number is 1 for FA or CAM 164 false, 165 C_ld_bit_mux_dec_out, 166 R_wire_bit_mux_dec_out, 167 false/*is_fa*/, 168 is_dram, 169 false, 170 camFlag ? cam_cell : cell); 171 sa_mux_lev_1_dec = new Decoder( 172 dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM 173 dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal 174 C_ld_sa_mux_lev_1_dec_out, 175 R_wire_sa_mux_dec_out, 176 false/*is_fa*/, 177 is_dram, 178 false, 179 camFlag ? cam_cell : cell); 180 sa_mux_lev_2_dec = new Decoder( 181 dp.Ndsam_lev_2, // This number is 1 for FA or CAM 182 false, 183 C_ld_sa_mux_lev_2_dec_out, 184 R_wire_sa_mux_dec_out, 185 false/*is_fa*/, 186 is_dram, 187 false, 188 camFlag ? cam_cell : cell); 189 190 double C_wire_predec_blk_out; 191 double R_wire_predec_blk_out; 192 193 if (!is_fa && !pure_cam) { 194 195 C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h; 196 R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h; 197 198 } else { //for pre-decode block's load is same for both FA and CAM 199 C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h; 200 R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h; 201 } 202 203 204 if (is_fa || pure_cam) 205 num_dec_signals += _log2(num_subarrays_per_mat); 206 207 PredecBlk * r_predec_blk1 = new PredecBlk( 208 num_dec_signals, 209 row_dec, 210 C_wire_predec_blk_out, 211 R_wire_predec_blk_out, 212 num_subarrays_per_mat, 213 is_dram, 214 true); 215 PredecBlk * r_predec_blk2 = new PredecBlk( 216 num_dec_signals, 217 row_dec, 218 C_wire_predec_blk_out, 219 R_wire_predec_blk_out, 220 num_subarrays_per_mat, 221 is_dram, 222 false); 223 PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true); 224 PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false); 225 PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true); 226 PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false); 227 PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true); 228 PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false); 229 dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true); 230 dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false); 231 232 PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram); 233 PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram); 234 PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram); 235 PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram); 236 PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram); 237 PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram); 238 PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram); 239 PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram); 240 way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram); 241 dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram); 242 243 r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2); 244 b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2); 245 sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2); 246 sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2); 247 248 subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng 249 250 double driver_c_gate_load; 251 double driver_c_wire_load; 252 double driver_r_wire_load; 253 254 if (is_fa || pure_cam) 255 256 { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same 257 driver_c_gate_load = (subarray.num_cols_fa_cam ) * 258 gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, 259 is_dram, false, false); 260 driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * 261 g_tp.wire_outside_mat.C_per_um; 262 driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * 263 g_tp.wire_outside_mat.R_per_um; 264 cam_bl_precharge_eq_drv = new Driver( 265 driver_c_gate_load, 266 driver_c_wire_load, 267 driver_r_wire_load, 268 is_dram); 269 270 if (!pure_cam) { 271 //This is only used for fully asso not pure CAM 272 driver_c_gate_load = (subarray.num_cols_fa_ram ) * 273 gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, 274 is_dram, false, false); 275 driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * 276 g_tp.wire_outside_mat.C_per_um; 277 driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * 278 g_tp.wire_outside_mat.R_per_um; 279 bl_precharge_eq_drv = new Driver( 280 driver_c_gate_load, 281 driver_c_wire_load, 282 driver_r_wire_load, 283 is_dram); 284 } 285 } 286 287 else { 288 driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); 289 driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um; 290 driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um; 291 bl_precharge_eq_drv = new Driver( 292 driver_c_gate_load, 293 driver_c_wire_load, 294 driver_r_wire_load, 295 is_dram); 296 } 297 double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP); 298 double w_row_decoder = area_row_decoder / subarray.area.get_h(); 299 300 double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux = 301 compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); 302 303 double h_subarray_out_drv = subarray_out_wire->area.get_area() * 304 (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w(); 305 306 307 h_subarray_out_drv *= (RWP + ERP + SCHP); 308 309 double h_comparators = 0.0; 310 double w_row_predecode_output_wires = 0.0; 311 double h_bit_mux_dec_out_wires = 0.0; 312 double h_senseamp_mux_dec_out_wires = 0.0; 313 314 if ((!is_fa) && (dp.is_tag)) { 315 //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat; 316 h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w()); 317 h_comparators *= (RWP + ERP); 318 } 319 320 321 int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits); 322 int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits); 323 w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) * 324 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); 325 326 327 double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) * 328 (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + 329 h_subarray_out_drv + h_comparators); 330 331 double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder); 332 333 if (deg_bl_muxing > 1) { 334 h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); 335 } 336 if (dp.Ndsam_lev_1 > 1) { 337 h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP); 338 } 339 if (dp.Ndsam_lev_2 > 1) { 340 h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP); 341 } 342 343 double h_addr_datain_wires; 344 if (!g_ip->ver_htree_wires_over_array) { 345 h_addr_datain_wires = (dp.number_addr_bits_mat + 346 dp.number_way_select_signals_mat + 347 (dp.num_di_b_mat + dp.num_do_b_mat) / 348 num_subarrays_per_row) * 349 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); 350 351 if (is_fa || pure_cam) { 352 h_addr_datain_wires = 353 (dp.number_addr_bits_mat + 354 dp.number_way_select_signals_mat + //TODO: revisit 355 (dp.num_di_b_mat + dp.num_do_b_mat ) / num_subarrays_per_row) * 356 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) + 357 (dp.num_si_b_mat + dp.num_so_b_mat ) / num_subarrays_per_row * 358 g_tp.wire_inside_mat.pitch * SCHP; 359 } 360 //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux + 361 //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv); 362 h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators + 363 h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) + 364 h_addr_datain_wires + 365 h_bit_mux_dec_out_wires + 366 h_senseamp_mux_dec_out_wires; 367 368 } 369 370 // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area; 371 double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() + 372 b_mux_predec_blk_drv1->area.get_area() + 373 sa_mux_lev_1_predec_blk_drv1->area.get_area() + 374 sa_mux_lev_2_predec_blk_drv1->area.get_area() + 375 way_sel_drv1->area.get_area() + 376 r_predec_blk_drv2->area.get_area() + 377 b_mux_predec_blk_drv2->area.get_area() + 378 sa_mux_lev_1_predec_blk_drv2->area.get_area() + 379 sa_mux_lev_2_predec_blk_drv2->area.get_area() + 380 r_predec_blk1->area.get_area() + 381 b_mux_predec_blk1->area.get_area() + 382 sa_mux_lev_1_predec_blk1->area.get_area() + 383 sa_mux_lev_2_predec_blk1->area.get_area() + 384 r_predec_blk2->area.get_area() + 385 b_mux_predec_blk2->area.get_area() + 386 sa_mux_lev_1_predec_blk2->area.get_area() + 387 sa_mux_lev_2_predec_blk2->area.get_area() + 388 bit_mux_dec->area.get_area() + 389 sa_mux_lev_1_dec->area.get_area() + 390 sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP); 391 392 double area_efficiency_mat; 393 394// if (!is_fa) 395// { 396 assert(num_subarrays_per_mat / num_subarrays_per_row > 0); 397 area.h = (num_subarrays_per_mat / num_subarrays_per_row) * 398 subarray.area.h + h_non_cell_area; 399 area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area; 400 area.w = (area.h * area.w + area_mat_center_circuitry) / area.h; 401 area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 402 100.0 / area.get_area(); 403 404// cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl; 405// cout<<"h_comparators"<<h_comparators<<endl; 406// cout<<"h_subarray_out_drv"<<h_subarray_out_drv<<endl; 407// cout<<"h_addr_datain_wires"<<h_addr_datain_wires<<endl; 408// cout<<"h_bit_mux_dec_out_wires"<<h_bit_mux_dec_out_wires<<endl; 409// cout<<"h_senseamp_mux_dec_out_wires"<<h_senseamp_mux_dec_out_wires<<endl; 410// cout<<"h_non_cell_area"<<h_non_cell_area<<endl; 411// cout<<"area.h =" << (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h<<endl; 412// cout<<"w_non_cell_area"<<w_non_cell_area<<endl; 413// cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl; 414 415 assert(area.h > 0); 416 assert(area.w > 0); 417// } 418// else 419// { 420// area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.get_h() + h_non_cell_area; 421// area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area; 422// area.w = (area.h*area.w + area_mat_center_circuitry) / area.h; 423// area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area(); 424// } 425} 426 427 428 429Mat::~Mat() { 430 delete row_dec; 431 delete bit_mux_dec; 432 delete sa_mux_lev_1_dec; 433 delete sa_mux_lev_2_dec; 434 435 delete r_predec->blk1; 436 delete r_predec->blk2; 437 delete b_mux_predec->blk1; 438 delete b_mux_predec->blk2; 439 delete sa_mux_lev_1_predec->blk1; 440 delete sa_mux_lev_1_predec->blk2; 441 delete sa_mux_lev_2_predec->blk1; 442 delete sa_mux_lev_2_predec->blk2; 443 delete dummy_way_sel_predec_blk1; 444 delete dummy_way_sel_predec_blk2; 445 446 delete r_predec->drv1; 447 delete r_predec->drv2; 448 delete b_mux_predec->drv1; 449 delete b_mux_predec->drv2; 450 delete sa_mux_lev_1_predec->drv1; 451 delete sa_mux_lev_1_predec->drv2; 452 delete sa_mux_lev_2_predec->drv1; 453 delete sa_mux_lev_2_predec->drv2; 454 delete way_sel_drv1; 455 delete dummy_way_sel_predec_blk_drv2; 456 457 delete r_predec; 458 delete b_mux_predec; 459 delete sa_mux_lev_1_predec; 460 delete sa_mux_lev_2_predec; 461 462 delete subarray_out_wire; 463 if (!pure_cam) 464 delete bl_precharge_eq_drv; 465 466 if (is_fa || pure_cam) { 467 delete sl_precharge_eq_drv ; 468 delete sl_data_drv ; 469 delete cam_bl_precharge_eq_drv; 470 delete ml_precharge_drv; 471 delete ml_to_ram_wl_drv; 472 } 473} 474 475 476 477double Mat::compute_delays(double inrisetime) { 478 int k; 479 double rd, C_intrinsic, C_ld, tf, R_bl_precharge, r_b_metal, R_bl, C_bl; 480 double outrisetime_search, outrisetime, row_dec_outrisetime; 481 // delay calculation for tags of fully associative cache 482 if (is_fa || pure_cam) { 483 //Compute search access time 484 outrisetime_search = compute_cam_delay(inrisetime); 485 if (is_fa) { 486 bl_precharge_eq_drv->compute_delay(0); 487 k = ml_to_ram_wl_drv->number_gates - 1; 488 rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true); 489 C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4 * 490 cell.h, is_dram, false, true) + 491 drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4 * cell.h, 492 is_dram, false, true); 493 C_ld = ml_to_ram_wl_drv->c_gate_load + 494 ml_to_ram_wl_drv->c_wire_load; 495 tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2; 496 delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); 497 498 R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); 499 r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in 500 R_bl = subarray.num_rows * r_b_metal; 501 C_bl = subarray.C_bl; 502 delay_bl_restore = bl_precharge_eq_drv->delay + 503 log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / 504 (g_tp.sram.Vbitpre - dp.V_b_sense)) * 505 (R_bl_precharge * C_bl + R_bl * C_bl / 2); 506 507 508 outrisetime_search = compute_bitline_delay(outrisetime_search); 509 outrisetime_search = compute_sa_delay(outrisetime_search); 510 } 511 outrisetime_search = compute_subarray_out_drv(outrisetime_search); 512 subarray_out_wire->set_in_rise_time(outrisetime_search); 513 outrisetime_search = subarray_out_wire->signal_rise_time(); 514 delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; 515 516 517 //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited. 518 outrisetime = r_predec->compute_delays(inrisetime); 519 row_dec_outrisetime = row_dec->compute_delays(outrisetime); 520 521 outrisetime = b_mux_predec->compute_delays(inrisetime); 522 bit_mux_dec->compute_delays(outrisetime); 523 524 outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); 525 sa_mux_lev_1_dec->compute_delays(outrisetime); 526 527 outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); 528 sa_mux_lev_2_dec->compute_delays(outrisetime); 529 530 if (pure_cam) { 531 outrisetime = compute_bitline_delay(row_dec_outrisetime); 532 outrisetime = compute_sa_delay(outrisetime); 533 } 534 return outrisetime_search; 535 } else { 536 bl_precharge_eq_drv->compute_delay(0); 537 if (row_dec->exist == true) { 538 int k = row_dec->num_gates - 1; 539 double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true); 540 // TODO: this 4*cell.h number must be revisited 541 double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4 * 542 cell.h, is_dram, false, true) + 543 drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, is_dram, 544 false, true); 545 double C_ld = row_dec->C_ld_dec_out; 546 double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2; 547 delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); 548 } 549 double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); 550 double r_b_metal = cell.h * g_tp.wire_local.R_per_um; 551 double R_bl = subarray.num_rows * r_b_metal; 552 double C_bl = subarray.C_bl; 553 554 if (is_dram) { 555 delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2); 556 } else { 557 delay_bl_restore = bl_precharge_eq_drv->delay + 558 log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / 559 (g_tp.sram.Vbitpre - dp.V_b_sense)) * 560 (R_bl_precharge * C_bl + R_bl * C_bl / 2); 561 } 562 } 563 564 565 566 outrisetime = r_predec->compute_delays(inrisetime); 567 row_dec_outrisetime = row_dec->compute_delays(outrisetime); 568 569 outrisetime = b_mux_predec->compute_delays(inrisetime); 570 bit_mux_dec->compute_delays(outrisetime); 571 572 outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); 573 sa_mux_lev_1_dec->compute_delays(outrisetime); 574 575 outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); 576 sa_mux_lev_2_dec->compute_delays(outrisetime); 577 578 outrisetime = compute_bitline_delay(row_dec_outrisetime); 579 outrisetime = compute_sa_delay(outrisetime); 580 outrisetime = compute_subarray_out_drv(outrisetime); 581 subarray_out_wire->set_in_rise_time(outrisetime); 582 outrisetime = subarray_out_wire->signal_rise_time(); 583 584 delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; 585 586 if (dp.is_tag == true && dp.fully_assoc == false) { 587 compute_comparator_delay(0); 588 } 589 590 if (row_dec->exist == false) { 591 delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay); 592 } 593 return outrisetime; 594} 595 596 597 598double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() { 599 600 double height = 601 compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, 602 camFlag ? cam_cell.w : 603 cell.w / (2 * (RWP + ERP + SCHP))) + 604 // precharge circuitry 605 compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, 606 camFlag ? cam_cell.w : 607 cell.w / (RWP + ERP + SCHP)); 608 609 if (deg_bl_muxing > 1) { 610 // col mux tr height 611 height += 612 compute_tr_width_after_folding(g_tp.w_nmos_b_mux, 613 cell.w / (2 * (RWP + ERP))); 614 // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height 615 } 616 617 height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height 618 619 if (dp.Ndsam_lev_1 > 1) { 620 height += compute_tr_width_after_folding( 621 g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height 622 //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); 623 } 624 625 if (dp.Ndsam_lev_2 > 1) { 626 height += compute_tr_width_after_folding( 627 g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height 628 //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); 629 630 // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux 631 height += 2 * compute_tr_width_after_folding( 632 pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); 633 height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); 634 } 635 636 // TODO: this should be uncommented... 637 /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1) 638 { 639 //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP); 640 double width_write_driver_write_mux = width_write_driver_or_write_mux(); 641 double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux, 642 cell.w * 643 // deg_bl_muxing * 644 dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP)); 645 height += height_write_driver_write_mux; 646 }*/ 647 648 return height; 649} 650 651 652 653double Mat::compute_cam_delay(double inrisetime) { 654 655 double out_time_ramp, this_delay; 656 double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load; 657 658 659 double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p, 660 Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp, 661 Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp, 662 Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p; 663 664 double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng; 665 int Htagbits; 666 667 double driver_c_gate_load; 668 double driver_c_wire_load; 669 double driver_r_wire_load; 670 //double searchline_precharge_time; 671 672 double leak_power_cc_inverters_sram_cell = 0; 673 double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0; 674 double leak_power_RD_port_sram_cell = 0; 675 double leak_power_SCHP_port_sram_cell = 0; 676 double leak_comparator_cam_cell =0; 677 678 double gate_leak_comparator_cam_cell = 0; 679 double gate_leak_power_cc_inverters_sram_cell = 0; 680 double gate_leak_power_RD_port_sram_cell = 0; 681 double gate_leak_power_SCHP_port_sram_cell = 0; 682 683 c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um; 684 c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um; 685 r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um; 686 r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um; 687 688 dynSearchEng = 0.0; 689 delay_matchchline = 0.0; 690 double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram); 691 bool linear_scaling = false; 692 693 if (linear_scaling) { 694 Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process 695 Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process 696 Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 697 Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process 698 Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process 699 Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process 700 Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process 701 Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 702 Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process 703 Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process 704 Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 705 Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process 706 Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process 707 Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 708 Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process 709 Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process 710 Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process 711 Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 712 Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process 713 714 Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 715 Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 716 Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process 717 Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process 718 Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 719 Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 720 Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process 721 Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 722 W_hit_miss_n = Wdummyn; 723 W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; 724 //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort 725 } else { 726 Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process 727 Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process 728 Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 729 Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process 730 Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process 731 Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process 732 Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process 733 Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 734 Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process 735 Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process 736 Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 737 Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process 738 Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process 739 Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 740 Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process 741 Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process 742 Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process 743 Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 744 Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process 745 746 Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process 747 Wdummyn = g_tp.cam.cell_nmos_w; 748 Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process 749 Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process 750 Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 751 Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process 752 Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process 753 Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process 754 W_hit_miss_n = Wdummyn; 755 W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; 756 } 757 758 Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0)); 759 760 /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators. 761 search_line_delay, search_line_power, search_line_restore_delay for cycle time computation. 762 From the driver(am and an) to the comparators in all the rows including the dummy row, 763 Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */ 764 765 //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports 766 //Searchline precharge routes horizontally 767 driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); 768 driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; 769 driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; 770 771 sl_precharge_eq_drv = new Driver( 772 driver_c_gate_load, 773 driver_c_wire_load, 774 driver_r_wire_load, 775 is_dram); 776 777 //searchline data driver ; subarray.num_rows + 1 is because of the dummy row 778 //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines 779 driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false); 780 driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; 781 driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; 782 sl_data_drv = new Driver( 783 driver_c_gate_load, 784 driver_c_wire_load, 785 driver_r_wire_load, 786 is_dram); 787 788 sl_precharge_eq_drv->compute_delay(0); 789 double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr 790 double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um; 791 double R_bl = (subarray.num_rows + 1) * r_b_metal; 792 double C_bl = subarray.C_bl_cam; 793 delay_cam_sl_restore = sl_precharge_eq_drv->delay 794 + log(g_tp.cam.Vbitpre) * (R_bl_precharge * C_bl + R_bl * C_bl / 2); 795 796 out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside) 797 798 //matchline ops delay 799 delay_matchchline += sl_data_drv->delay; 800 801 /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/ 802 //matchline delay, matchline power, matchline_reset for cycle time computation, 803 804 ////matchline precharge circuitry routes vertically 805 //There are two matchline precharge driver chains per subarray. 806 driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram); 807 driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; 808 driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; 809 810 ml_precharge_drv = new Driver( 811 driver_c_gate_load, 812 driver_c_wire_load, 813 driver_r_wire_load, 814 is_dram); 815 816 ml_precharge_drv->compute_delay(0); 817 818 819 rd = tr_R_on(Wdummyn, NCH, 2, is_dram); 820 c_intrinsic = Htagbits * 821 (2 * drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, 822 is_dram)//TODO: the cell_h_def should be revisit 823 + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram) / 824 Htagbits);//since each halve only has one precharge tx per matchline 825 826 Cwire = c_matchline_metal * Htagbits; 827 Rwire = r_matchline_metal * Htagbits; 828 c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram); 829 830 double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram); 831 //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; 832 double R_ml = Rwire; 833 double C_ml = Cwire + c_intrinsic; 834 //TODO: latest CAM has sense amps on matchlines too 835 delay_cam_ml_reset = ml_precharge_drv->delay 836 + log(g_tp.cam.Vbitpre) * (R_ml_precharge * C_ml + R_ml * C_ml / 2); 837 838 //matchline ops delay 839 tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); 840 this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL); 841 delay_matchchline += this_delay; 842 out_time_ramp = this_delay / VTHFA3; 843 844 dynSearchEng += ((c_intrinsic + Cwire + c_gate_load) * 845 (subarray.num_rows + 1)) //TODO: need to be precise 846 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 847 2;//each subarry has two halves 848 849 /* third stage, from the NAND2 gates to the drivers in the dummy row */ 850 rd = tr_R_on(Waddrnandn, NCH, 2, is_dram); 851 c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + 852 drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram) * 2; 853 c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram); 854 tf = rd * (c_intrinsic + c_gate_load); 855 this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE); 856 out_time_ramp = this_delay / (1 - VTHFA4); 857 delay_matchchline += this_delay; 858 859 //only the dummy row has the extra inverter between NAND and NOR gates 860 dynSearchEng += (c_intrinsic * (subarray.num_rows + 1) + c_gate_load * 2) * 861 g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl; 862 863 /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */ 864 rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram); 865 c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram); 866 Cwire = c_matchline_metal * Htagbits + c_searchline_metal * 867 (subarray.num_rows + 1) / 2; 868 Rwire = r_matchline_metal * Htagbits + r_searchline_metal * 869 (subarray.num_rows + 1) / 2; 870 c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram); 871 tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); 872 this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL); 873 out_time_ramp = this_delay / VTHFA5; 874 delay_matchchline += this_delay; 875 876 dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows * c_gate_load) * 877 g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; 878 879 /*final statge from the NOR gate to drive the wordline of the data portion */ 880 881 //searchline data driver There are two matchline precharge driver chains per subarray. 882 driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic 883 driver_c_wire_load = subarray.C_wl_ram; 884 driver_r_wire_load = subarray.R_wl_ram; 885 886 ml_to_ram_wl_drv = new Driver( 887 driver_c_gate_load, 888 driver_c_wire_load, 889 driver_r_wire_load, 890 is_dram); 891 892 893 894 rd = tr_R_on(Wfanorn, NCH, 1, is_dram); 895 c_intrinsic = 2 * drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + 896 drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram); 897 c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram); 898 tf = rd * (c_intrinsic + c_gate_load); 899 this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE); 900 out_time_ramp = this_delay / (1 - 0.5); 901 delay_matchchline += this_delay; 902 903 out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp); 904 905 //c_gate_load energy is computed in ml_to_ram_wl_drv 906 dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; 907 908 909 /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/ 910 /*Precharge the hitting logic */ 911 c_intrinsic = 2 * 912 drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram); 913 Cwire = c_searchline_metal * subarray.num_rows; 914 Rwire = r_searchline_metal * subarray.num_rows; 915 c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) * 916 subarray.num_rows; 917 918 rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false); 919 //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; 920 double R_hit_miss = Rwire; 921 double C_hit_miss = Cwire + c_intrinsic; 922 delay_hit_miss_reset = log(g_tp.cam.Vbitpre) * 923 (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2); 924 dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 925 926 /*hitting logic evaluation */ 927 c_intrinsic = 2 * 928 drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram); 929 Cwire = c_searchline_metal * subarray.num_rows; 930 Rwire = r_searchline_metal * subarray.num_rows; 931 c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) * 932 subarray.num_rows; 933 934 rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false); 935 tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); 936 937 delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL); 938 939 if (is_fa) 940 delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss); 941 942 dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 943 944 /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/ 945 946 power_matchline.searchOp.dynamic = dynSearchEng; 947 948 //leakage in one subarray 949 double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2? 950 double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true); 951 double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 952 1, inv, false, true) * 2; 953 //approx XOR with Inv 954 double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, 955 false, true) * 2; 956 957 leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd; 958 leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd; 959 leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd; 960 leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd; 961 leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports 962 963 power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell + 964 leak_comparator_cam_cell + 965 leak_power_acc_tr_RW_or_WR_port_sram_cell + 966 leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + 967 leak_power_RD_port_sram_cell * ERP + 968 leak_power_SCHP_port_sram_cell * SCHP; 969// power_matchline.searchOp.leakage += leak_comparator_cam_cell; 970 power_matchline.searchOp.leakage *= (subarray.num_rows + 1) * 971 subarray.num_cols_fa_cam;//TODO:dumy line precise 972 power_matchline.searchOp.leakage += (subarray.num_rows + 1) * 973 cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd; 974 power_matchline.searchOp.leakage += (subarray.num_rows + 1) * 975 cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd; 976 power_matchline.searchOp.leakage += (subarray.num_rows + 1) * 977 cmos_Isub_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd; 978 //In idle states, the hit/miss txs are closed (on) therefore no Isub 979 power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+ 980 // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd; 981 982 //in idle state, Ig_on only possibly exist in access transistors of read only ports 983 double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true); 984 double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 985 1, inv, false, true) * 2; 986 double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, 987 false, true) * 2; 988 989 gate_leak_comparator_cam_cell = Ig_cell_comparator * g_tp.cam_cell.Vdd; 990 gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.cam_cell.Vdd; 991 gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd; 992 gate_leak_power_SCHP_port_sram_cell = 0; 993 994 //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl; 995 996 power_matchline.searchOp.gate_leakage += 997 gate_leak_power_cc_inverters_sram_cell; 998 power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell; 999 power_matchline.searchOp.gate_leakage += 1000 gate_leak_power_SCHP_port_sram_cell * SCHP + 1001 gate_leak_power_RD_port_sram_cell * ERP; 1002 power_matchline.searchOp.gate_leakage *= (subarray.num_rows + 1) * 1003 subarray.num_cols_fa_cam;//TODO:dumy line precise 1004 power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) * 1005 cmos_Ig_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd; 1006 power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) * 1007 cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd; 1008 power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) * 1009 cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd; 1010 power_matchline.searchOp.gate_leakage += subarray.num_rows * 1011 cmos_Ig_leakage(W_hit_miss_n, 0, 1, nmos) * g_tp.cam_cell.Vdd + 1012 + cmos_Ig_leakage(0, W_hit_miss_p, 1, pmos) * g_tp.cam_cell.Vdd; 1013 1014 1015 return out_time_ramp; 1016} 1017 1018 1019double Mat::width_write_driver_or_write_mux() { 1020 // calculate resistance of SRAM cell pull-up PMOS transistor 1021 // cam and sram have same cell trasistor properties 1022 double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true); 1023 double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true); 1024 double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2; 1025 double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram); 1026 1027 return width_write_driver_nmos; 1028} 1029 1030 1031 1032double Mat::compute_comparators_height( 1033 int tagbits, 1034 int number_ways_in_mat, 1035 double subarray_mem_cell_area_width) { 1036 double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def); 1037 double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4; 1038 return cumulative_area / subarray_mem_cell_area_width; 1039} 1040 1041 1042 1043double Mat::compute_bitline_delay(double inrisetime) { 1044 double V_b_pre, v_th_mem_cell, V_wl; 1045 double tstep; 1046 double dynRdEnergy = 0.0, dynWriteEnergy = 0.0; 1047 double R_cell_pull_down = 0.0, R_cell_acc = 0.0, r_dev = 0.0; 1048 int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2; 1049 1050 double R_b_metal = camFlag ? cam_cell.h : cell.h * g_tp.wire_local.R_per_um; 1051 double R_bl = subarray.num_rows * R_b_metal; 1052 double C_bl = subarray.C_bl; 1053 1054 // TODO: no leakage for DRAMs? 1055 double leak_power_cc_inverters_sram_cell = 0; 1056 double gate_leak_power_cc_inverters_sram_cell = 0; 1057 double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0; 1058 double leak_power_RD_port_sram_cell = 0; 1059 double gate_leak_power_RD_port_sram_cell = 0; 1060 1061 if (is_dram == true) { 1062 V_b_pre = g_tp.dram.Vbitpre; 1063 v_th_mem_cell = g_tp.dram_acc.Vth; 1064 V_wl = g_tp.vpp; 1065 //The access transistor is not folded. So we just need to specify a 1066 // threshold value for the folding width that is equal to or greater 1067 // than Wmemcella. 1068 R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true); 1069 r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2; 1070 } else { //SRAM 1071 V_b_pre = g_tp.sram.Vbitpre; 1072 v_th_mem_cell = g_tp.sram_cell.Vth; 1073 V_wl = g_tp.sram_cell.Vdd; 1074 R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true); 1075 R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true); 1076 1077 //Leakage current of an SRAM cell 1078 //TODO: how much is the idle time? just by *2? 1079 double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos, 1080 false, true); 1081 double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos, 1082 false, true); 1083 double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w, 1084 g_tp.sram.cell_pmos_w, 1, inv, false, 1085 true) * 2;//two invs per cell 1086 1087 leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd; 1088 leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd; 1089 leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd; 1090 1091 1092 //in idle state, Ig_on only possibly exist in access transistors of read only ports 1093 double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos, 1094 false, true); 1095 double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, 1096 g_tp.sram.cell_pmos_w, 1, inv, false, 1097 true); 1098 1099 gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.sram_cell.Vdd; 1100 gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd; 1101 } 1102 1103 1104 double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, 1105 camFlag ? cam_cell.w : cell.w / 1106 (2 * (RWP + ERP + SCHP)), is_dram); 1107 double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram); 1108 double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, 1109 camFlag ? cam_cell.w : 1110 cell.w * deg_bl_muxing / 1111 (RWP + ERP + SCHP), is_dram); 1112 double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram); 1113 double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, 1114 is_dram) + 1115 drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag ? cam_cell.w : 1116 cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + 1117 drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ? cam_cell.w : 1118 cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); 1119 double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, 1120 camFlag ? cam_cell.w : 1121 cell.w * deg_bl_muxing / 1122 (RWP + ERP + SCHP), is_dram); 1123 1124 if (is_dram) { 1125 double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd / 2) * 1126 g_tp.dram_cell_C / 1127 (g_tp.dram_cell_C + C_bl)); 1128 tstep = 2.3 * fraction * r_dev * 1129 (g_tp.dram_cell_C * (C_bl + 2 * C_drain_sense_amp_iso + 1130 C_sense_amp_latch + C_drain_sense_amp_mux)) / 1131 (g_tp.dram_cell_C + (C_bl + 2 * C_drain_sense_amp_iso + 1132 C_sense_amp_latch + C_drain_sense_amp_mux)); 1133 delay_writeback = tstep; 1134 dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + 1135 C_drain_sense_amp_mux) * 1136 (g_tp.dram_cell_Vdd / 2) * 1137 g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/; 1138 dynWriteEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch) * 1139 (g_tp.dram_cell_Vdd / 2) * 1140 g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * 1141 num_act_mats_hor_dir * 100; 1142 per_bitline_read_energy = (C_bl + 2 * C_drain_sense_amp_iso + 1143 C_sense_amp_latch + C_drain_sense_amp_mux) * 1144 (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd; 1145 } else { 1146 double tau; 1147 1148 if (deg_bl_muxing > 1) { 1149 tau = (R_cell_pull_down + R_cell_acc) * 1150 (C_bl + 2 * C_drain_bit_mux + 2 * C_drain_sense_amp_iso + 1151 C_sense_amp_latch + C_drain_sense_amp_mux) + 1152 R_bl * (C_bl / 2 + 2 * C_drain_bit_mux + 2 * 1153 C_drain_sense_amp_iso + C_sense_amp_latch + 1154 C_drain_sense_amp_mux) + 1155 R_bit_mux * (C_drain_bit_mux + 2 * C_drain_sense_amp_iso + 1156 C_sense_amp_latch + C_drain_sense_amp_mux) + 1157 R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + 1158 C_drain_sense_amp_mux); 1159 dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * 1160 g_tp.sram_cell.Vdd; 1161 dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + 1162 C_drain_sense_amp_mux) * 1163 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * 1164 (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / 1165 deg_bl_muxing); 1166 dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / 1167 deg_bl_muxing) / deg_senseamp_muxing) * 1168 num_act_mats_hor_dir * (C_bl + 2 * C_drain_bit_mux) * 1169 g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2; 1170 //Write Ops are differential for SRAM 1171 } else { 1172 tau = (R_cell_pull_down + R_cell_acc) * 1173 (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 + 1174 R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); 1175 dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * 1176 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/; 1177 dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / 1178 deg_bl_muxing) / deg_senseamp_muxing) * 1179 num_act_mats_hor_dir * C_bl) * 1180 g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2; 1181 1182 } 1183 tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense)); 1184 power_bitline.readOp.leakage = 1185 leak_power_cc_inverters_sram_cell + 1186 leak_power_acc_tr_RW_or_WR_port_sram_cell + 1187 leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + 1188 leak_power_RD_port_sram_cell * ERP; 1189 power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell + 1190 gate_leak_power_RD_port_sram_cell * ERP; 1191 1192 } 1193 1194// cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl; 1195// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl; 1196// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl; 1197// cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl; 1198 1199 1200 /* take input rise time into account */ 1201 double m = V_wl / inrisetime; 1202 if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m)) { 1203 delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell) / m); 1204 } else { 1205 delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m); 1206 } 1207 1208 bool is_fa = (dp.fully_assoc) ? true : false; 1209 1210 if (dp.is_tag == false || is_fa == false) { 1211 power_bitline.readOp.dynamic = dynRdEnergy; 1212 power_bitline.writeOp.dynamic = dynWriteEnergy; 1213 } 1214 1215 double outrisetime = 0; 1216 return outrisetime; 1217} 1218 1219 1220 1221double Mat::compute_sa_delay(double inrisetime) { 1222 //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray 1223 1224 //Bitline circuitry leakage. 1225 double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram); 1226 double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram); 1227 double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram); 1228 double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram); 1229 1230 double lkgIdlePh = IsenseEn;//+ 2*IoBufP; 1231 //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch; 1232 double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ; 1233 //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir + 1234 // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir); 1235 double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/; 1236 leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/; 1237 leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/; 1238 1239 // sense amplifier has to drive logic in "data out driver" and sense precharge load. 1240 // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time 1241 //constant as well as the magnitude of input differential voltage. 1242 double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) + 1243 drain_C_(g_tp.w_sense_n, NCH, 1, 0, 1244 camFlag ? cam_cell.w : cell.w * deg_bl_muxing / 1245 (RWP + ERP + SCHP), is_dram) + 1246 drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ? 1247 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), 1248 is_dram) + 1249 drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag ? 1250 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), 1251 is_dram) + 1252 drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? 1253 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), 1254 is_dram); 1255 double tau = C_ld / g_tp.gm_sense_amp_latch; 1256 delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense); 1257 power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray 1258 num_subarrays_per_mat * num_act_mats_hor_dir*/; 1259 power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd; 1260 1261 double outrisetime = 0; 1262 return outrisetime; 1263} 1264 1265 1266 1267double Mat::compute_subarray_out_drv(double inrisetime) { 1268 double C_ld, rd, tf, this_delay; 1269 double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram); 1270 1271 // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer. 1272 rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram); 1273 C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, 1274 camFlag ? cam_cell.w : cell.w * 1275 deg_bl_muxing / (RWP + ERP + SCHP), 1276 is_dram) + 1277 gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram); 1278 tf = rd * C_ld; 1279 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 1280 delay_subarray_out_drv += this_delay; 1281 inrisetime = this_delay / (1.0 - 0.5); 1282 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 1283 power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 1284 power_subarray_out_drv.readOp.gate_leakage += 1285 cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd; 1286 // delay of signal through inverter-buffer to second level of sense-amp mux. 1287 // internal delay of buffer 1288 rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram); 1289 C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) + 1290 drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) + 1291 gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram); 1292 tf = rd * C_ld; 1293 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 1294 delay_subarray_out_drv += this_delay; 1295 inrisetime = this_delay / (1.0 - 0.5); 1296 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 1297 power_subarray_out_drv.readOp.leakage += 1298 cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, 1299 inv, is_dram) * g_tp.peri_global.Vdd; 1300 power_subarray_out_drv.readOp.gate_leakage += 1301 cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, 1302 inv) * g_tp.peri_global.Vdd; 1303 1304 // inverter driving drain of pass transistor of second level of sense-amp mux. 1305 rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram); 1306 C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) + 1307 drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, 1308 is_dram) + 1309 drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? 1310 cam_cell.w : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / 1311 (RWP + ERP + SCHP), is_dram); 1312 tf = rd * C_ld; 1313 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 1314 delay_subarray_out_drv += this_delay; 1315 inrisetime = this_delay / (1.0 - 0.5); 1316 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 1317 power_subarray_out_drv.readOp.leakage += 1318 cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, 1319 inv) * g_tp.peri_global.Vdd; 1320 power_subarray_out_drv.readOp.gate_leakage += 1321 cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, 1322 inv) * g_tp.peri_global.Vdd; 1323 1324 1325 // delay of signal through pass-transistor to input of subarray output driver. 1326 rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram); 1327 C_ld = dp.Ndsam_lev_2 * 1328 drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? cam_cell.w : 1329 cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), 1330 is_dram) + 1331 //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); 1332 gate_C(subarray_out_wire->repeater_size * 1333 (subarray_out_wire->wire_length / 1334 subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * 1335 (1 + p_to_n_sz_r), 0.0, is_dram); 1336 tf = rd * C_ld; 1337 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 1338 delay_subarray_out_drv += this_delay; 1339 inrisetime = this_delay / (1.0 - 0.5); 1340 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 1341 power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 1342 power_subarray_out_drv.readOp.gate_leakage += 1343 cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd; 1344 1345 1346 return inrisetime; 1347} 1348 1349 1350 1351double Mat::compute_comparator_delay(double inrisetime) { 1352 int A = g_ip->tag_assoc; 1353 1354 int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already 1355 // a multiple of 4. 1356 1357 /* First Inverter */ 1358 double Ceq = gate_C(g_tp.w_comp_inv_n2 + g_tp.w_comp_inv_p2, 0, is_dram) + 1359 drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) + 1360 drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram); 1361 double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram); 1362 double tf = Req * Ceq; 1363 double st1del = horowitz(inrisetime, tf, VTHCOMPINV, VTHCOMPINV, FALL); 1364 double nextinputtime = st1del / VTHCOMPINV; 1365 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; 1366 1367 //For each degree of associativity 1368 //there are 4 such quarter comparators 1369 double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, 1370 g_tp.w_comp_inv_p1, 1, inv, 1371 is_dram) * 4 * A; 1372 double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, 1373 g_tp.w_comp_inv_p1, 1, inv, 1374 is_dram) * 4 * A; 1375 /* Second Inverter */ 1376 Ceq = gate_C(g_tp.w_comp_inv_n3 + g_tp.w_comp_inv_p3, 0, is_dram) + 1377 drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) + 1378 drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram); 1379 Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram); 1380 tf = Req * Ceq; 1381 double st2del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHCOMPINV, RISE); 1382 nextinputtime = st2del / (1.0 - VTHCOMPINV); 1383 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; 1384 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, 1385 inv, is_dram) * 4 * A; 1386 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, 1387 inv, is_dram) * 4 * A; 1388 1389 /* Third Inverter */ 1390 Ceq = gate_C(g_tp.w_eval_inv_n + g_tp.w_eval_inv_p, 0, is_dram) + 1391 drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) + 1392 drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram); 1393 Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram); 1394 tf = Req * Ceq; 1395 double st3del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHEVALINV, FALL); 1396 nextinputtime = st3del / (VTHEVALINV); 1397 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; 1398 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, 1399 inv, is_dram) * 4 * A; 1400 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1401 1, inv, is_dram) * 4 * A; 1402 1403 /* Final Inverter (virtual ground driver) discharging compare part */ 1404 double r1 = tr_R_on(g_tp.w_comp_n, NCH, 2, is_dram); 1405 double r2 = tr_R_on(g_tp.w_eval_inv_n, NCH, 1, is_dram); /* was switch */ 1406 double c2 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1, 1407 g_tp.cell_h_def, is_dram) + 1408 drain_C_(g_tp.w_comp_n, NCH, 2, 1, 1409 g_tp.cell_h_def, is_dram)) + 1410 drain_C_(g_tp.w_eval_inv_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) + 1411 drain_C_(g_tp.w_eval_inv_n, NCH, 1, 1, g_tp.cell_h_def, is_dram); 1412 double c1 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1, 1413 g_tp.cell_h_def, is_dram) + 1414 drain_C_(g_tp.w_comp_n, NCH, 2, 1, 1415 g_tp.cell_h_def, is_dram)) + 1416 drain_C_(g_tp.w_comp_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) + 1417 gate_C(WmuxdrvNANDn + WmuxdrvNANDp, 0, is_dram); 1418 power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; 1419 power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1); 1420 lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, 1421 inv, is_dram) * 4 * A; 1422 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, 1423 is_dram) * 4 * A; // stack factor of 0.2 1424 1425 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, 1426 inv, is_dram) * 4 * A; 1427 //for gate leakage this equals to a inverter 1428 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, 1429 is_dram) * 4 * A; 1430 1431 /* time to go to threshold of mux driver */ 1432 double tstep = (r2 * c2 + (r1 + r2) * c1) * log(1.0 / VTHMUXNAND); 1433 /* take into account non-zero input rise time */ 1434 double m = g_tp.peri_global.Vdd / nextinputtime; 1435 double Tcomparatorni; 1436 1437 if ((tstep) <= (0.5*(g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / m)) { 1438 double a = m; 1439 double b = 2 * ((g_tp.peri_global.Vdd * VTHEVALINV) - 1440 g_tp.peri_global.Vth); 1441 double c = -2 * (tstep) * (g_tp.peri_global.Vdd - 1442 g_tp.peri_global.Vth) + 1 / m * 1443 ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) * 1444 ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth); 1445 Tcomparatorni = (-b + sqrt(b * b - 4 * a * c)) / (2 * a); 1446 } else { 1447 Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd + 1448 g_tp.peri_global.Vth) / (2 * m) - 1449 (g_tp.peri_global.Vdd * VTHEVALINV) / m; 1450 } 1451 delay_comparator = Tcomparatorni + st1del + st2del + st3del; 1452 power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd; 1453 power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd; 1454 1455 return Tcomparatorni / (1.0 - VTHMUXNAND);; 1456} 1457 1458 1459 1460void Mat::compute_power_energy() { 1461 //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power 1462 //when search all subarrays and all mats are fully active 1463 //when plain read/write only one subarray in a single mat is active. 1464 1465 // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat. 1466 power.readOp.dynamic += r_predec->power.readOp.dynamic + 1467 b_mux_predec->power.readOp.dynamic + 1468 sa_mux_lev_1_predec->power.readOp.dynamic + 1469 sa_mux_lev_2_predec->power.readOp.dynamic; 1470 1471 // add energy consumed in decoders 1472 power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic; 1473 if (!(is_fa || pure_cam)) 1474 power_row_decoders.readOp.dynamic *= num_subarrays_per_mat; 1475 1476 // add energy consumed in bitline prechagers, SAs, and bitlines 1477 if (!(is_fa || pure_cam)) { 1478 // add energy consumed in bitline prechagers 1479 power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; 1480 power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; 1481 1482 //Add sense amps energy 1483 num_sa_subarray = subarray.num_cols / deg_bl_muxing; 1484 power_sa.readOp.dynamic *= num_sa_subarray * num_subarrays_per_mat ; 1485 1486 // add energy consumed in bitlines 1487 //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl; 1488 power_bitline.readOp.dynamic *= num_subarrays_per_mat * 1489 subarray.num_cols; 1490 power_bitline.writeOp.dynamic *= num_subarrays_per_mat * 1491 subarray.num_cols; 1492 //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl; 1493 //Add subarray output energy 1494 power_subarray_out_drv.readOp.dynamic = 1495 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; 1496 1497 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + 1498 power_sa.readOp.dynamic + 1499 power_bitline.readOp.dynamic + 1500 power_subarray_out_drv.readOp.dynamic; 1501 1502 power.readOp.dynamic += power_row_decoders.readOp.dynamic + 1503 bit_mux_dec->power.readOp.dynamic + 1504 sa_mux_lev_1_dec->power.readOp.dynamic + 1505 sa_mux_lev_2_dec->power.readOp.dynamic + 1506 power_comparator.readOp.dynamic; 1507 } 1508 1509 else if (is_fa) { 1510 //for plain read/write only one subarray in a mat is active 1511 // add energy consumed in bitline prechagers 1512 power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic 1513 + cam_bl_precharge_eq_drv->power.readOp.dynamic; 1514 power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; 1515 1516 //Add sense amps energy 1517 num_sa_subarray = (subarray.num_cols_fa_cam + 1518 subarray.num_cols_fa_ram) / deg_bl_muxing; 1519 num_sa_subarray_search = subarray.num_cols_fa_ram / deg_bl_muxing; 1520 power_sa.searchOp.dynamic = power_sa.readOp.dynamic * 1521 num_sa_subarray_search; 1522 power_sa.readOp.dynamic *= num_sa_subarray; 1523 1524 1525 // add energy consumed in bitlines 1526 power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic; 1527 power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam + 1528 subarray.num_cols_fa_ram); 1529 power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam + 1530 subarray.num_cols_fa_ram); 1531 power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram; 1532 1533 //Add subarray output energy 1534 power_subarray_out_drv.searchOp.dynamic = 1535 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; 1536 power_subarray_out_drv.readOp.dynamic = 1537 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; 1538 1539 1540 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + 1541 power_sa.readOp.dynamic + 1542 power_bitline.readOp.dynamic + 1543 power_subarray_out_drv.readOp.dynamic; 1544 1545 power.readOp.dynamic += power_row_decoders.readOp.dynamic + 1546 bit_mux_dec->power.readOp.dynamic + 1547 sa_mux_lev_1_dec->power.readOp.dynamic + 1548 sa_mux_lev_2_dec->power.readOp.dynamic + 1549 power_comparator.readOp.dynamic; 1550 1551 //add energy consumed inside cam 1552 power_matchline.searchOp.dynamic *= num_subarrays_per_mat; 1553 power_searchline_precharge = sl_precharge_eq_drv->power; 1554 power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; 1555 power_searchline = sl_data_drv->power; 1556 power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic * 1557 subarray.num_cols_fa_cam * num_subarrays_per_mat;; 1558 power_matchline_precharge = ml_precharge_drv->power; 1559 power_matchline_precharge.searchOp.dynamic = 1560 power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat; 1561 power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power; 1562 power_ml_to_ram_wl_drv.searchOp.dynamic = 1563 ml_to_ram_wl_drv->power.readOp.dynamic; 1564 1565 power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; 1566 power_cam_all_active.searchOp.dynamic += 1567 power_searchline_precharge.searchOp.dynamic; 1568 power_cam_all_active.searchOp.dynamic += 1569 power_searchline.searchOp.dynamic; 1570 power_cam_all_active.searchOp.dynamic += 1571 power_matchline_precharge.searchOp.dynamic; 1572 1573 power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; 1574 //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; 1575 1576 } else { 1577 // add energy consumed in bitline prechagers 1578 power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; 1579 //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; 1580 //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; 1581 //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat; 1582 1583 //Add sense amps energy 1584 num_sa_subarray = subarray.num_cols_fa_cam / deg_bl_muxing; 1585 power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat; 1586 power_sa.searchOp.dynamic = 0; 1587 1588 power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam; 1589 power_bitline.searchOp.dynamic = 0; 1590 power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam; 1591 1592 power_subarray_out_drv.searchOp.dynamic = 1593 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; 1594 power_subarray_out_drv.readOp.dynamic = 1595 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; 1596 1597 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + 1598 power_sa.readOp.dynamic + 1599 power_bitline.readOp.dynamic + 1600 power_subarray_out_drv.readOp.dynamic; 1601 1602 power.readOp.dynamic += power_row_decoders.readOp.dynamic + 1603 bit_mux_dec->power.readOp.dynamic + 1604 sa_mux_lev_1_dec->power.readOp.dynamic + 1605 sa_mux_lev_2_dec->power.readOp.dynamic + 1606 power_comparator.readOp.dynamic; 1607 1608 1609 ////add energy consumed inside cam 1610 power_matchline.searchOp.dynamic *= num_subarrays_per_mat; 1611 power_searchline_precharge = sl_precharge_eq_drv->power; 1612 power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; 1613 power_searchline = sl_data_drv->power; 1614 power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic * 1615 subarray.num_cols_fa_cam * num_subarrays_per_mat;; 1616 power_matchline_precharge = ml_precharge_drv->power; 1617 power_matchline_precharge.searchOp.dynamic = 1618 power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat; 1619 power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power; 1620 power_ml_to_ram_wl_drv.searchOp.dynamic = 1621 ml_to_ram_wl_drv->power.readOp.dynamic; 1622 1623 power_cam_all_active.searchOp.dynamic = 1624 power_matchline.searchOp.dynamic; 1625 power_cam_all_active.searchOp.dynamic += 1626 power_searchline_precharge.searchOp.dynamic; 1627 power_cam_all_active.searchOp.dynamic += 1628 power_searchline.searchOp.dynamic; 1629 power_cam_all_active.searchOp.dynamic += 1630 power_matchline_precharge.searchOp.dynamic; 1631 1632 power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; 1633 //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; 1634 1635 } 1636 1637 1638 1639 // calculate leakage power 1640 if (!(is_fa || pure_cam)) { 1641 int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); 1642 1643 power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; 1644 power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; 1645 power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat * 1646 (RWP + ERP); 1647 1648 //num_sa_subarray = subarray.num_cols / deg_bl_muxing; 1649 power_subarray_out_drv.readOp.leakage = 1650 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * 1651 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); 1652 1653 power.readOp.leakage += power_bitline.readOp.leakage + 1654 power_bl_precharge_eq_drv.readOp.leakage + 1655 power_sa.readOp.leakage + 1656 power_subarray_out_drv.readOp.leakage; 1657 //cout<<"leakage"<<power.readOp.leakage<<endl; 1658 1659 power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP); 1660 power.readOp.leakage += power_comparator.readOp.leakage; 1661 1662 //cout<<"leakage1"<<power.readOp.leakage<<endl; 1663 1664 // leakage power 1665 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; 1666 power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing; 1667 power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1; 1668 power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2; 1669 1670 power.readOp.leakage += r_predec->power.readOp.leakage + 1671 b_mux_predec->power.readOp.leakage + 1672 sa_mux_lev_1_predec->power.readOp.leakage + 1673 sa_mux_lev_2_predec->power.readOp.leakage + 1674 power_row_decoders.readOp.leakage + 1675 power_bit_mux_decoders.readOp.leakage + 1676 power_sa_mux_lev_1_decoders.readOp.leakage + 1677 power_sa_mux_lev_2_decoders.readOp.leakage; 1678 //cout<<"leakage2"<<power.readOp.leakage<<endl; 1679 1680 //++++Below is gate leakage 1681 power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; 1682 power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; 1683 power_sa.readOp.gate_leakage *= num_sa_subarray * 1684 num_subarrays_per_mat * (RWP + ERP); 1685 1686 //num_sa_subarray = subarray.num_cols / deg_bl_muxing; 1687 power_subarray_out_drv.readOp.gate_leakage = 1688 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * 1689 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); 1690 1691 power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + 1692 power_bl_precharge_eq_drv.readOp.gate_leakage + 1693 power_sa.readOp.gate_leakage + 1694 power_subarray_out_drv.readOp.gate_leakage; 1695 //cout<<"leakage"<<power.readOp.leakage<<endl; 1696 1697 power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP); 1698 power.readOp.gate_leakage += power_comparator.readOp.gate_leakage; 1699 1700 //cout<<"leakage1"<<power.readOp.gate_leakage<<endl; 1701 1702 // gate_leakage power 1703 power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; 1704 power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing; 1705 power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1; 1706 power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2; 1707 1708 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + 1709 b_mux_predec->power.readOp.gate_leakage + 1710 sa_mux_lev_1_predec->power.readOp.gate_leakage + 1711 sa_mux_lev_2_predec->power.readOp.gate_leakage + 1712 power_row_decoders.readOp.gate_leakage + 1713 power_bit_mux_decoders.readOp.gate_leakage + 1714 power_sa_mux_lev_1_decoders.readOp.gate_leakage + 1715 power_sa_mux_lev_2_decoders.readOp.gate_leakage; 1716 } else if (is_fa) { 1717 int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); 1718 1719 power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; 1720 power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; 1721 power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; 1722 power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat * 1723 (RWP + ERP + SCHP); 1724 1725 //cout<<"leakage3"<<power.readOp.leakage<<endl; 1726 1727 1728 power_subarray_out_drv.readOp.leakage = 1729 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * 1730 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); 1731 1732 power.readOp.leakage += power_bitline.readOp.leakage + 1733 power_bl_precharge_eq_drv.readOp.leakage + 1734 power_bl_precharge_eq_drv.searchOp.leakage + 1735 power_sa.readOp.leakage + 1736 power_subarray_out_drv.readOp.leakage; 1737 1738 //cout<<"leakage4"<<power.readOp.leakage<<endl; 1739 1740 // leakage power 1741 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; 1742 power.readOp.leakage += r_predec->power.readOp.leakage + 1743 power_row_decoders.readOp.leakage; 1744 1745 //cout<<"leakage5"<<power.readOp.leakage<<endl; 1746 1747 //inside cam 1748 power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; 1749 power_cam_all_active.searchOp.leakage += 1750 sl_precharge_eq_drv->power.readOp.leakage; 1751 power_cam_all_active.searchOp.leakage += 1752 sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam; 1753 power_cam_all_active.searchOp.leakage += 1754 ml_precharge_drv->power.readOp.dynamic; 1755 power_cam_all_active.searchOp.leakage *= 1756 num_subarrays_per_mat; 1757 1758 power.readOp.leakage += power_cam_all_active.searchOp.leakage; 1759 1760// cout<<"leakage6"<<power.readOp.leakage<<endl; 1761 1762 //+++Below is gate leakage 1763 power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; 1764 power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; 1765 power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; 1766 power_sa.readOp.gate_leakage *= num_sa_subarray * 1767 num_subarrays_per_mat * (RWP + ERP + SCHP); 1768 1769 //cout<<"leakage3"<<power.readOp.gate_leakage<<endl; 1770 1771 1772 power_subarray_out_drv.readOp.gate_leakage = 1773 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * 1774 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); 1775 1776 power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + 1777 power_bl_precharge_eq_drv.readOp.gate_leakage + 1778 power_bl_precharge_eq_drv.searchOp.gate_leakage + 1779 power_sa.readOp.gate_leakage + 1780 power_subarray_out_drv.readOp.gate_leakage; 1781 1782 //cout<<"leakage4"<<power.readOp.gate_leakage<<endl; 1783 1784 // gate_leakage power 1785 power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; 1786 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + 1787 power_row_decoders.readOp.gate_leakage; 1788 1789 //cout<<"leakage5"<<power.readOp.gate_leakage<<endl; 1790 1791 //inside cam 1792 power_cam_all_active.searchOp.gate_leakage = 1793 power_matchline.searchOp.gate_leakage; 1794 power_cam_all_active.searchOp.gate_leakage += 1795 sl_precharge_eq_drv->power.readOp.gate_leakage; 1796 power_cam_all_active.searchOp.gate_leakage += 1797 sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam; 1798 power_cam_all_active.searchOp.gate_leakage += 1799 ml_precharge_drv->power.readOp.dynamic; 1800 power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; 1801 1802 power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; 1803 1804 } else { 1805 int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); 1806 1807 //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; 1808 //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; 1809 power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; 1810 power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat * 1811 (RWP + ERP + SCHP); 1812 1813 1814 power_subarray_out_drv.readOp.leakage = 1815 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * 1816 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); 1817 1818 power.readOp.leakage += //power_bitline.readOp.leakage + 1819 //power_bl_precharge_eq_drv.readOp.leakage + 1820 power_bl_precharge_eq_drv.searchOp.leakage + 1821 power_sa.readOp.leakage + 1822 power_subarray_out_drv.readOp.leakage; 1823 1824 // leakage power 1825 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * 1826 subarray.num_rows * num_subarrays_per_mat * (RWP + ERP + EWP); 1827 power.readOp.leakage += r_predec->power.readOp.leakage + 1828 power_row_decoders.readOp.leakage; 1829 1830 //inside cam 1831 power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; 1832 power_cam_all_active.searchOp.leakage += 1833 sl_precharge_eq_drv->power.readOp.leakage; 1834 power_cam_all_active.searchOp.leakage += 1835 sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam; 1836 power_cam_all_active.searchOp.leakage += 1837 ml_precharge_drv->power.readOp.dynamic; 1838 power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; 1839 1840 power.readOp.leakage += power_cam_all_active.searchOp.leakage; 1841 1842 //+++Below is gate leakage 1843 power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; 1844 power_sa.readOp.gate_leakage *= num_sa_subarray * 1845 num_subarrays_per_mat * (RWP + ERP + SCHP); 1846 1847 1848 power_subarray_out_drv.readOp.gate_leakage = 1849 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * 1850 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); 1851 1852 power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage + 1853 //power_bl_precharge_eq_drv.readOp.gate_leakage + 1854 power_bl_precharge_eq_drv.searchOp.gate_leakage + 1855 power_sa.readOp.gate_leakage + 1856 power_subarray_out_drv.readOp.gate_leakage; 1857 1858 // gate_leakage power 1859 power_row_decoders.readOp.gate_leakage = 1860 row_dec->power.readOp.gate_leakage * subarray.num_rows * 1861 num_subarrays_per_mat * (RWP + ERP + EWP); 1862 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + 1863 power_row_decoders.readOp.gate_leakage; 1864 1865 //inside cam 1866 power_cam_all_active.searchOp.gate_leakage = 1867 power_matchline.searchOp.gate_leakage; 1868 power_cam_all_active.searchOp.gate_leakage += 1869 sl_precharge_eq_drv->power.readOp.gate_leakage; 1870 power_cam_all_active.searchOp.gate_leakage += 1871 sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam; 1872 power_cam_all_active.searchOp.gate_leakage += 1873 ml_precharge_drv->power.readOp.dynamic; 1874 power_cam_all_active.searchOp.gate_leakage *= 1875 num_subarrays_per_mat; 1876 1877 power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; 1878 } 1879} 1880 1881