4a5
> * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
28c29
< * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
---
> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39,69c40,69
< :dp(dyn_p),
< power_subarray_out_drv(),
< delay_fa_tag(0), delay_cam(0),
< delay_before_decoder(0), delay_bitline(0),
< delay_wl_reset(0), delay_bl_restore(0),
< delay_searchline(0), delay_matchchline(0),
< delay_cam_sl_restore(0), delay_cam_ml_reset(0),
< delay_fa_ram_wl(0),delay_hit_miss_reset(0),
< delay_hit_miss(0),
< subarray(dp, dp.fully_assoc),
< power_bitline(), per_bitline_read_energy(0),
< deg_bl_muxing(dp.deg_bl_muxing),
< num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
< delay_writeback(0),
< cell(subarray.cell), cam_cell(subarray.cam_cell),
< is_dram(dyn_p.is_dram),
< pure_cam(dyn_p.pure_cam),
< num_mats(dp.num_mats),
< power_sa(), delay_sa(0),
< leak_power_sense_amps_closed_page_state(0),
< leak_power_sense_amps_open_page_state(0),
< delay_subarray_out_drv(0),
< delay_comparator(0), power_comparator(),
< num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
< num_subarrays_per_mat(dp.num_subarrays/dp.num_mats),
< num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir)
< {
< assert(num_subarrays_per_mat <= 4);
< assert(num_subarrays_per_row <= 2);
< is_fa = (dp.fully_assoc) ? true : false;
< camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
---
> : dp(dyn_p),
> power_subarray_out_drv(),
> delay_fa_tag(0), delay_cam(0),
> delay_before_decoder(0), delay_bitline(0),
> delay_wl_reset(0), delay_bl_restore(0),
> delay_searchline(0), delay_matchchline(0),
> delay_cam_sl_restore(0), delay_cam_ml_reset(0),
> delay_fa_ram_wl(0), delay_hit_miss_reset(0),
> delay_hit_miss(0),
> subarray(dp, dp.fully_assoc),
> power_bitline(), per_bitline_read_energy(0),
> deg_bl_muxing(dp.deg_bl_muxing),
> num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
> delay_writeback(0),
> cell(subarray.cell), cam_cell(subarray.cam_cell),
> is_dram(dyn_p.is_dram),
> pure_cam(dyn_p.pure_cam),
> num_mats(dp.num_mats),
> power_sa(), delay_sa(0),
> leak_power_sense_amps_closed_page_state(0),
> leak_power_sense_amps_open_page_state(0),
> delay_subarray_out_drv(0),
> delay_comparator(0), power_comparator(),
> num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
> num_subarrays_per_mat(dp.num_subarrays / dp.num_mats),
> num_subarrays_per_row(dp.Ndwl / dp.num_mats_h_dir) {
> assert(num_subarrays_per_mat <= 4);
> assert(num_subarrays_per_row <= 2);
> is_fa = (dp.fully_assoc) ? true : false;
> camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
71,72c71,74
< if (is_fa || pure_cam)
< num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat;
---
> if (is_fa || pure_cam) {
> num_subarrays_per_row = num_subarrays_per_mat > 2 ?
> num_subarrays_per_mat / 2 : num_subarrays_per_mat;
> }
74,84c76,85
< if (dp.use_inp_params == 1) {
< RWP = dp.num_rw_ports;
< ERP = dp.num_rd_ports;
< EWP = dp.num_wr_ports;
< SCHP = dp.num_search_ports;
< }
< else {
< RWP = g_ip->num_rw_ports;
< ERP = g_ip->num_rd_ports;
< EWP = g_ip->num_wr_ports;
< SCHP = g_ip->num_search_ports;
---
> if (dp.use_inp_params == 1) {
> RWP = dp.num_rw_ports;
> ERP = dp.num_rd_ports;
> EWP = dp.num_wr_ports;
> SCHP = dp.num_search_ports;
> } else {
> RWP = g_ip->num_rw_ports;
> ERP = g_ip->num_rd_ports;
> EWP = g_ip->num_wr_ports;
> SCHP = g_ip->num_search_ports;
86c87
< }
---
> }
88c89
< double number_sa_subarray;
---
> double number_sa_subarray;
90,97c91,95
< if (!is_fa && !pure_cam)
< {
< number_sa_subarray = subarray.num_cols / deg_bl_muxing;
< }
< else if (is_fa && !pure_cam)
< {
< number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
< }
---
> if (!is_fa && !pure_cam) {
> number_sa_subarray = subarray.num_cols / deg_bl_muxing;
> } else if (is_fa && !pure_cam) {
> number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
> }
99,102c97,99
< else
< {
< number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing;
< }
---
> else {
> number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing;
> }
104,108c101,105
< int num_dec_signals = subarray.num_rows;
< double C_ld_bit_mux_dec_out = 0;
< double C_ld_sa_mux_lev_1_dec_out = 0;
< double C_ld_sa_mux_lev_2_dec_out = 0;
< double R_wire_wl_drv_out;
---
> int num_dec_signals = subarray.num_rows;
> double C_ld_bit_mux_dec_out = 0;
> double C_ld_sa_mux_lev_1_dec_out = 0;
> double C_ld_sa_mux_lev_2_dec_out = 0;
> double R_wire_wl_drv_out;
110,115c107,109
< if (!is_fa && !pure_cam)
< {
< R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
< }
< else if (is_fa && !pure_cam)
< {
---
> if (!is_fa && !pure_cam) {
> R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
> } else if (is_fa && !pure_cam) {
117,119c111
< }
< else
< {
---
> } else {
123,124c115,116
< double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
< double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
---
> double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
> double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
126,131c118,124
< if (deg_bl_muxing > 1)
< {
< C_ld_bit_mux_dec_out =
< (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell
< num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
< }
---
> if (deg_bl_muxing > 1) {
> C_ld_bit_mux_dec_out =
> (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing) *
> gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell
> num_subarrays_per_row * subarray.num_cols *
> g_tp.wire_inside_mat.C_per_um * cell.get_w();
> }
133,144c126,140
< if (dp.Ndsam_lev_1 > 1)
< {
< C_ld_sa_mux_lev_1_dec_out =
< (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
< num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
< }
< if (dp.Ndsam_lev_2 > 1)
< {
< C_ld_sa_mux_lev_2_dec_out =
< (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
< num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
< }
---
> if (dp.Ndsam_lev_1 > 1) {
> C_ld_sa_mux_lev_1_dec_out =
> (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1) *
> gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
> num_subarrays_per_row * subarray.num_cols *
> g_tp.wire_inside_mat.C_per_um * cell.get_w();
> }
> if (dp.Ndsam_lev_2 > 1) {
> C_ld_sa_mux_lev_2_dec_out =
> (num_subarrays_per_mat * number_sa_subarray /
> (dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) *
> gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
> num_subarrays_per_row * subarray.num_cols *
> g_tp.wire_inside_mat.C_per_um * cell.get_w();
> }
146,151c142,146
< if (num_subarrays_per_row >= 2)
< {
< // wire heads for both right and left side of a mat, so half the resistance
< R_wire_bit_mux_dec_out /= 2.0;
< R_wire_sa_mux_dec_out /= 2.0;
< }
---
> if (num_subarrays_per_row >= 2) {
> // wire heads for both right and left side of a mat, so half the resistance
> R_wire_bit_mux_dec_out /= 2.0;
> R_wire_sa_mux_dec_out /= 2.0;
> }
154,162c149,157
< row_dec = new Decoder(
< num_dec_signals,
< false,
< subarray.C_wl,
< R_wire_wl_drv_out,
< false/*is_fa*/,
< is_dram,
< true,
< camFlag? cam_cell:cell);
---
> row_dec = new Decoder(
> num_dec_signals,
> false,
> subarray.C_wl,
> R_wire_wl_drv_out,
> false/*is_fa*/,
> is_dram,
> true,
> camFlag ? cam_cell : cell);
167,193c162,188
< bit_mux_dec = new Decoder(
< deg_bl_muxing,// This number is 1 for FA or CAM
< false,
< C_ld_bit_mux_dec_out,
< R_wire_bit_mux_dec_out,
< false/*is_fa*/,
< is_dram,
< false,
< camFlag? cam_cell:cell);
< sa_mux_lev_1_dec = new Decoder(
< dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
< dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
< C_ld_sa_mux_lev_1_dec_out,
< R_wire_sa_mux_dec_out,
< false/*is_fa*/,
< is_dram,
< false,
< camFlag? cam_cell:cell);
< sa_mux_lev_2_dec = new Decoder(
< dp.Ndsam_lev_2, // This number is 1 for FA or CAM
< false,
< C_ld_sa_mux_lev_2_dec_out,
< R_wire_sa_mux_dec_out,
< false/*is_fa*/,
< is_dram,
< false,
< camFlag? cam_cell:cell);
---
> bit_mux_dec = new Decoder(
> deg_bl_muxing,// This number is 1 for FA or CAM
> false,
> C_ld_bit_mux_dec_out,
> R_wire_bit_mux_dec_out,
> false/*is_fa*/,
> is_dram,
> false,
> camFlag ? cam_cell : cell);
> sa_mux_lev_1_dec = new Decoder(
> dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
> dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
> C_ld_sa_mux_lev_1_dec_out,
> R_wire_sa_mux_dec_out,
> false/*is_fa*/,
> is_dram,
> false,
> camFlag ? cam_cell : cell);
> sa_mux_lev_2_dec = new Decoder(
> dp.Ndsam_lev_2, // This number is 1 for FA or CAM
> false,
> C_ld_sa_mux_lev_2_dec_out,
> R_wire_sa_mux_dec_out,
> false/*is_fa*/,
> is_dram,
> false,
> camFlag ? cam_cell : cell);
195,196c190,191
< double C_wire_predec_blk_out;
< double R_wire_predec_blk_out;
---
> double C_wire_predec_blk_out;
> double R_wire_predec_blk_out;
198,199c193
< if (!is_fa && !pure_cam)
< {
---
> if (!is_fa && !pure_cam) {
201,202c195,196
< C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
< R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
---
> C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
> R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
204,209c198,201
< }
< else //for pre-decode block's load is same for both FA and CAM
< {
< C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
< R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
< }
---
> } else { //for pre-decode block's load is same for both FA and CAM
> C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
> R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
> }
212,213c204,205
< if (is_fa||pure_cam)
< num_dec_signals += _log2(num_subarrays_per_mat);
---
> if (is_fa || pure_cam)
> num_dec_signals += _log2(num_subarrays_per_mat);
215,238c207,230
< PredecBlk * r_predec_blk1 = new PredecBlk(
< num_dec_signals,
< row_dec,
< C_wire_predec_blk_out,
< R_wire_predec_blk_out,
< num_subarrays_per_mat,
< is_dram,
< true);
< PredecBlk * r_predec_blk2 = new PredecBlk(
< num_dec_signals,
< row_dec,
< C_wire_predec_blk_out,
< R_wire_predec_blk_out,
< num_subarrays_per_mat,
< is_dram,
< false);
< PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
< PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
< PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
< PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
< PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
< PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
< dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
< dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
---
> PredecBlk * r_predec_blk1 = new PredecBlk(
> num_dec_signals,
> row_dec,
> C_wire_predec_blk_out,
> R_wire_predec_blk_out,
> num_subarrays_per_mat,
> is_dram,
> true);
> PredecBlk * r_predec_blk2 = new PredecBlk(
> num_dec_signals,
> row_dec,
> C_wire_predec_blk_out,
> R_wire_predec_blk_out,
> num_subarrays_per_mat,
> is_dram,
> false);
> PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
> PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
> PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
> PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
> PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
> PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
> dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
> dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
240,249c232,241
< PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
< PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
< PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
< PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
< PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
< PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
< PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
< PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
< way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
< dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
---
> PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
> PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
> PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
> PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
> PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
> PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
> PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
> PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
> way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
> dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
251,254c243,246
< r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
< b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
< sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
< sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
---
> r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
> b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
> sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
> sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
256c248
< subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
---
> subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
258,260c250,252
< double driver_c_gate_load;
< double driver_c_wire_load;
< double driver_r_wire_load;
---
> double driver_c_gate_load;
> double driver_c_wire_load;
> double driver_r_wire_load;
262c254
< if (is_fa || pure_cam)
---
> if (is_fa || pure_cam)
264,272c256,268
< { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
< driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
< driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
< driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
< cam_bl_precharge_eq_drv = new Driver(
< driver_c_gate_load,
< driver_c_wire_load,
< driver_r_wire_load,
< is_dram);
---
> { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
> driver_c_gate_load = (subarray.num_cols_fa_cam ) *
> gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
> is_dram, false, false);
> driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
> g_tp.wire_outside_mat.C_per_um;
> driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
> g_tp.wire_outside_mat.R_per_um;
> cam_bl_precharge_eq_drv = new Driver(
> driver_c_gate_load,
> driver_c_wire_load,
> driver_r_wire_load,
> is_dram);
274,286c270,285
< if (!pure_cam)
< {
< //This is only used for fully asso not pure CAM
< driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
< driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um;
< driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um;
< bl_precharge_eq_drv = new Driver(
< driver_c_gate_load,
< driver_c_wire_load,
< driver_r_wire_load,
< is_dram);
< }
< }
---
> if (!pure_cam) {
> //This is only used for fully asso not pure CAM
> driver_c_gate_load = (subarray.num_cols_fa_ram ) *
> gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
> is_dram, false, false);
> driver_c_wire_load = subarray.num_cols_fa_ram * cell.w *
> g_tp.wire_outside_mat.C_per_um;
> driver_r_wire_load = subarray.num_cols_fa_ram * cell.w *
> g_tp.wire_outside_mat.R_per_um;
> bl_precharge_eq_drv = new Driver(
> driver_c_gate_load,
> driver_c_wire_load,
> driver_r_wire_load,
> is_dram);
> }
> }
288,300c287,298
< else
< {
< driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
< driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
< driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
< bl_precharge_eq_drv = new Driver(
< driver_c_gate_load,
< driver_c_wire_load,
< driver_r_wire_load,
< is_dram);
< }
< double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
< double w_row_decoder = area_row_decoder / subarray.area.get_h();
---
> else {
> driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
> driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
> driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
> bl_precharge_eq_drv = new Driver(
> driver_c_gate_load,
> driver_c_wire_load,
> driver_r_wire_load,
> is_dram);
> }
> double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
> double w_row_decoder = area_row_decoder / subarray.area.get_h();
302,303c300,301
< double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
< compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
---
> double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
> compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
305,306c303,304
< double h_subarray_out_drv = subarray_out_wire->area.get_area() *
< (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
---
> double h_subarray_out_drv = subarray_out_wire->area.get_area() *
> (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
309c307
< h_subarray_out_drv *= (RWP + ERP + SCHP);
---
> h_subarray_out_drv *= (RWP + ERP + SCHP);
311,314c309,312
< double h_comparators = 0.0;
< double w_row_predecode_output_wires = 0.0;
< double h_bit_mux_dec_out_wires = 0.0;
< double h_senseamp_mux_dec_out_wires = 0.0;
---
> double h_comparators = 0.0;
> double w_row_predecode_output_wires = 0.0;
> double h_bit_mux_dec_out_wires = 0.0;
> double h_senseamp_mux_dec_out_wires = 0.0;
316,321c314,318
< if ((!is_fa)&&(dp.is_tag))
< {
< //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
< h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
< h_comparators *= (RWP + ERP);
< }
---
> if ((!is_fa) && (dp.is_tag)) {
> //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
> h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
> h_comparators *= (RWP + ERP);
> }
327c324
< g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
---
> g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
330,332c327,329
< double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
< (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
< h_subarray_out_drv + h_comparators);
---
> double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
> (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
> h_subarray_out_drv + h_comparators);
334c331
< double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
---
> double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
336,347c333,341
< if (deg_bl_muxing > 1)
< {
< h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
< }
< if (dp.Ndsam_lev_1 > 1)
< {
< h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
< }
< if (dp.Ndsam_lev_2 > 1)
< {
< h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
< }
---
> if (deg_bl_muxing > 1) {
> h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
> }
> if (dp.Ndsam_lev_1 > 1) {
> h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
> }
> if (dp.Ndsam_lev_2 > 1) {
> h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
> }
349,354c343,349
< double h_addr_datain_wires;
< if (!g_ip->ver_htree_wires_over_array)
< {
< h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat +
< (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) *
< g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
---
> double h_addr_datain_wires;
> if (!g_ip->ver_htree_wires_over_array) {
> h_addr_datain_wires = (dp.number_addr_bits_mat +
> dp.number_way_select_signals_mat +
> (dp.num_di_b_mat + dp.num_do_b_mat) /
> num_subarrays_per_row) *
> g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
356,361c351,367
< if (is_fa || pure_cam)
< {
< h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit
< (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) *
< g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
< (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP;
---
> if (is_fa || pure_cam) {
> h_addr_datain_wires =
> (dp.number_addr_bits_mat +
> dp.number_way_select_signals_mat + //TODO: revisit
> (dp.num_di_b_mat + dp.num_do_b_mat ) / num_subarrays_per_row) *
> g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
> (dp.num_si_b_mat + dp.num_so_b_mat ) / num_subarrays_per_row *
> g_tp.wire_inside_mat.pitch * SCHP;
> }
> //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
> //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
> h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
> h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
> h_addr_datain_wires +
> h_bit_mux_dec_out_wires +
> h_senseamp_mux_dec_out_wires;
>
363,369d368
< //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
< //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
< h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
< h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
< h_addr_datain_wires +
< h_bit_mux_dec_out_wires +
< h_senseamp_mux_dec_out_wires;
371c370,390
< }
---
> // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
> double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
> b_mux_predec_blk_drv1->area.get_area() +
> sa_mux_lev_1_predec_blk_drv1->area.get_area() +
> sa_mux_lev_2_predec_blk_drv1->area.get_area() +
> way_sel_drv1->area.get_area() +
> r_predec_blk_drv2->area.get_area() +
> b_mux_predec_blk_drv2->area.get_area() +
> sa_mux_lev_1_predec_blk_drv2->area.get_area() +
> sa_mux_lev_2_predec_blk_drv2->area.get_area() +
> r_predec_blk1->area.get_area() +
> b_mux_predec_blk1->area.get_area() +
> sa_mux_lev_1_predec_blk1->area.get_area() +
> sa_mux_lev_2_predec_blk1->area.get_area() +
> r_predec_blk2->area.get_area() +
> b_mux_predec_blk2->area.get_area() +
> sa_mux_lev_1_predec_blk2->area.get_area() +
> sa_mux_lev_2_predec_blk2->area.get_area() +
> bit_mux_dec->area.get_area() +
> sa_mux_lev_1_dec->area.get_area() +
> sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
373,393c392
< // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
< double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
< b_mux_predec_blk_drv1->area.get_area() +
< sa_mux_lev_1_predec_blk_drv1->area.get_area() +
< sa_mux_lev_2_predec_blk_drv1->area.get_area() +
< way_sel_drv1->area.get_area() +
< r_predec_blk_drv2->area.get_area() +
< b_mux_predec_blk_drv2->area.get_area() +
< sa_mux_lev_1_predec_blk_drv2->area.get_area() +
< sa_mux_lev_2_predec_blk_drv2->area.get_area() +
< r_predec_blk1->area.get_area() +
< b_mux_predec_blk1->area.get_area() +
< sa_mux_lev_1_predec_blk1->area.get_area() +
< sa_mux_lev_2_predec_blk1->area.get_area() +
< r_predec_blk2->area.get_area() +
< b_mux_predec_blk2->area.get_area() +
< sa_mux_lev_1_predec_blk2->area.get_area() +
< sa_mux_lev_2_predec_blk2->area.get_area() +
< bit_mux_dec->area.get_area() +
< sa_mux_lev_1_dec->area.get_area() +
< sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
---
> double area_efficiency_mat;
395,396d393
< double area_efficiency_mat;
<
399,400c396,398
< assert(num_subarrays_per_mat/num_subarrays_per_row>0);
< area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area;
---
> assert(num_subarrays_per_mat / num_subarrays_per_row > 0);
> area.h = (num_subarrays_per_mat / num_subarrays_per_row) *
> subarray.area.h + h_non_cell_area;
402,403c400,402
< area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
< area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area();
---
> area.w = (area.h * area.w + area_mat_center_circuitry) / area.h;
> area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat *
> 100.0 / area.get_area();
416,417c415,416
< assert(area.h>0);
< assert(area.w>0);
---
> assert(area.h > 0);
> assert(area.w > 0);
426c425
< }
---
> }
430,435c429,433
< Mat::~Mat()
< {
< delete row_dec;
< delete bit_mux_dec;
< delete sa_mux_lev_1_dec;
< delete sa_mux_lev_2_dec;
---
> Mat::~Mat() {
> delete row_dec;
> delete bit_mux_dec;
> delete sa_mux_lev_1_dec;
> delete sa_mux_lev_2_dec;
437,446c435,444
< delete r_predec->blk1;
< delete r_predec->blk2;
< delete b_mux_predec->blk1;
< delete b_mux_predec->blk2;
< delete sa_mux_lev_1_predec->blk1;
< delete sa_mux_lev_1_predec->blk2;
< delete sa_mux_lev_2_predec->blk1;
< delete sa_mux_lev_2_predec->blk2;
< delete dummy_way_sel_predec_blk1;
< delete dummy_way_sel_predec_blk2;
---
> delete r_predec->blk1;
> delete r_predec->blk2;
> delete b_mux_predec->blk1;
> delete b_mux_predec->blk2;
> delete sa_mux_lev_1_predec->blk1;
> delete sa_mux_lev_1_predec->blk2;
> delete sa_mux_lev_2_predec->blk1;
> delete sa_mux_lev_2_predec->blk2;
> delete dummy_way_sel_predec_blk1;
> delete dummy_way_sel_predec_blk2;
448,457c446,455
< delete r_predec->drv1;
< delete r_predec->drv2;
< delete b_mux_predec->drv1;
< delete b_mux_predec->drv2;
< delete sa_mux_lev_1_predec->drv1;
< delete sa_mux_lev_1_predec->drv2;
< delete sa_mux_lev_2_predec->drv1;
< delete sa_mux_lev_2_predec->drv2;
< delete way_sel_drv1;
< delete dummy_way_sel_predec_blk_drv2;
---
> delete r_predec->drv1;
> delete r_predec->drv2;
> delete b_mux_predec->drv1;
> delete b_mux_predec->drv2;
> delete sa_mux_lev_1_predec->drv1;
> delete sa_mux_lev_1_predec->drv2;
> delete sa_mux_lev_2_predec->drv1;
> delete sa_mux_lev_2_predec->drv2;
> delete way_sel_drv1;
> delete dummy_way_sel_predec_blk_drv2;
459,462c457,460
< delete r_predec;
< delete b_mux_predec;
< delete sa_mux_lev_1_predec;
< delete sa_mux_lev_2_predec;
---
> delete r_predec;
> delete b_mux_predec;
> delete sa_mux_lev_1_predec;
> delete sa_mux_lev_2_predec;
464,466c462,464
< delete subarray_out_wire;
< if (!pure_cam)
< delete bl_precharge_eq_drv;
---
> delete subarray_out_wire;
> if (!pure_cam)
> delete bl_precharge_eq_drv;
468,475c466,472
< if (is_fa || pure_cam)
< {
< delete sl_precharge_eq_drv ;
< delete sl_data_drv ;
< delete cam_bl_precharge_eq_drv;
< delete ml_precharge_drv;
< delete ml_to_ram_wl_drv;
< }
---
> if (is_fa || pure_cam) {
> delete sl_precharge_eq_drv ;
> delete sl_data_drv ;
> delete cam_bl_precharge_eq_drv;
> delete ml_precharge_drv;
> delete ml_to_ram_wl_drv;
> }
480,499c477,496
< double Mat::compute_delays(double inrisetime)
< {
< int k;
< double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl;
< double outrisetime_search, outrisetime, row_dec_outrisetime;
< // delay calculation for tags of fully associative cache
< if (is_fa || pure_cam)
< {
< //Compute search access time
< outrisetime_search = compute_cam_delay(inrisetime);
< if (is_fa)
< {
< bl_precharge_eq_drv->compute_delay(0);
< k = ml_to_ram_wl_drv->number_gates - 1;
< rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
< C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
< drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
< C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load;
< tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
< delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
---
> double Mat::compute_delays(double inrisetime) {
> int k;
> double rd, C_intrinsic, C_ld, tf, R_bl_precharge, r_b_metal, R_bl, C_bl;
> double outrisetime_search, outrisetime, row_dec_outrisetime;
> // delay calculation for tags of fully associative cache
> if (is_fa || pure_cam) {
> //Compute search access time
> outrisetime_search = compute_cam_delay(inrisetime);
> if (is_fa) {
> bl_precharge_eq_drv->compute_delay(0);
> k = ml_to_ram_wl_drv->number_gates - 1;
> rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
> C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4 *
> cell.h, is_dram, false, true) +
> drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4 * cell.h,
> is_dram, false, true);
> C_ld = ml_to_ram_wl_drv->c_gate_load +
> ml_to_ram_wl_drv->c_wire_load;
> tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
> delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
501,507c498,505
< R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
< r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
< R_bl = subarray.num_rows * r_b_metal;
< C_bl = subarray.C_bl;
< delay_bl_restore = bl_precharge_eq_drv->delay +
< log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
< (R_bl_precharge * C_bl + R_bl * C_bl / 2);
---
> R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
> r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
> R_bl = subarray.num_rows * r_b_metal;
> C_bl = subarray.C_bl;
> delay_bl_restore = bl_precharge_eq_drv->delay +
> log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
> (g_tp.sram.Vbitpre - dp.V_b_sense)) *
> (R_bl_precharge * C_bl + R_bl * C_bl / 2);
510,516c508,514
< outrisetime_search = compute_bitline_delay(outrisetime_search);
< outrisetime_search = compute_sa_delay(outrisetime_search);
< }
< outrisetime_search = compute_subarray_out_drv(outrisetime_search);
< subarray_out_wire->set_in_rise_time(outrisetime_search);
< outrisetime_search = subarray_out_wire->signal_rise_time();
< delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
---
> outrisetime_search = compute_bitline_delay(outrisetime_search);
> outrisetime_search = compute_sa_delay(outrisetime_search);
> }
> outrisetime_search = compute_subarray_out_drv(outrisetime_search);
> subarray_out_wire->set_in_rise_time(outrisetime_search);
> outrisetime_search = subarray_out_wire->signal_rise_time();
> delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
519,521c517,519
< //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
< outrisetime = r_predec->compute_delays(inrisetime);
< row_dec_outrisetime = row_dec->compute_delays(outrisetime);
---
> //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
> outrisetime = r_predec->compute_delays(inrisetime);
> row_dec_outrisetime = row_dec->compute_delays(outrisetime);
523,524c521,522
< outrisetime = b_mux_predec->compute_delays(inrisetime);
< bit_mux_dec->compute_delays(outrisetime);
---
> outrisetime = b_mux_predec->compute_delays(inrisetime);
> bit_mux_dec->compute_delays(outrisetime);
526,527c524,525
< outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
< sa_mux_lev_1_dec->compute_delays(outrisetime);
---
> outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
> sa_mux_lev_1_dec->compute_delays(outrisetime);
529,530c527,528
< outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
< sa_mux_lev_2_dec->compute_delays(outrisetime);
---
> outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
> sa_mux_lev_2_dec->compute_delays(outrisetime);
532,537c530,561
< if (pure_cam)
< {
< outrisetime = compute_bitline_delay(row_dec_outrisetime);
< outrisetime = compute_sa_delay(outrisetime);
< }
< return outrisetime_search;
---
> if (pure_cam) {
> outrisetime = compute_bitline_delay(row_dec_outrisetime);
> outrisetime = compute_sa_delay(outrisetime);
> }
> return outrisetime_search;
> } else {
> bl_precharge_eq_drv->compute_delay(0);
> if (row_dec->exist == true) {
> int k = row_dec->num_gates - 1;
> double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
> // TODO: this 4*cell.h number must be revisited
> double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4 *
> cell.h, is_dram, false, true) +
> drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, is_dram,
> false, true);
> double C_ld = row_dec->C_ld_dec_out;
> double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
> delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
> }
> double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
> double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
> double R_bl = subarray.num_rows * r_b_metal;
> double C_bl = subarray.C_bl;
>
> if (is_dram) {
> delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
> } else {
> delay_bl_restore = bl_precharge_eq_drv->delay +
> log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
> (g_tp.sram.Vbitpre - dp.V_b_sense)) *
> (R_bl_precharge * C_bl + R_bl * C_bl / 2);
> }
539,556d562
< else
< {
< bl_precharge_eq_drv->compute_delay(0);
< if (row_dec->exist == true)
< {
< int k = row_dec->num_gates - 1;
< double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
< // TODO: this 4*cell.h number must be revisited
< double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
< drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
< double C_ld = row_dec->C_ld_dec_out;
< double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
< delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
< }
< double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
< double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
< double R_bl = subarray.num_rows * r_b_metal;
< double C_bl = subarray.C_bl;
558,568d563
< if (is_dram)
< {
< delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
< }
< else
< {
< delay_bl_restore = bl_precharge_eq_drv->delay +
< log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
< (R_bl_precharge * C_bl + R_bl * C_bl / 2);
< }
< }
570a566,567
> outrisetime = r_predec->compute_delays(inrisetime);
> row_dec_outrisetime = row_dec->compute_delays(outrisetime);
572,573c569,570
< outrisetime = r_predec->compute_delays(inrisetime);
< row_dec_outrisetime = row_dec->compute_delays(outrisetime);
---
> outrisetime = b_mux_predec->compute_delays(inrisetime);
> bit_mux_dec->compute_delays(outrisetime);
575,576c572,573
< outrisetime = b_mux_predec->compute_delays(inrisetime);
< bit_mux_dec->compute_delays(outrisetime);
---
> outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
> sa_mux_lev_1_dec->compute_delays(outrisetime);
578,579c575,576
< outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
< sa_mux_lev_1_dec->compute_delays(outrisetime);
---
> outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
> sa_mux_lev_2_dec->compute_delays(outrisetime);
581,582c578,582
< outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
< sa_mux_lev_2_dec->compute_delays(outrisetime);
---
> outrisetime = compute_bitline_delay(row_dec_outrisetime);
> outrisetime = compute_sa_delay(outrisetime);
> outrisetime = compute_subarray_out_drv(outrisetime);
> subarray_out_wire->set_in_rise_time(outrisetime);
> outrisetime = subarray_out_wire->signal_rise_time();
584,588c584
< outrisetime = compute_bitline_delay(row_dec_outrisetime);
< outrisetime = compute_sa_delay(outrisetime);
< outrisetime = compute_subarray_out_drv(outrisetime);
< subarray_out_wire->set_in_rise_time(outrisetime);
< outrisetime = subarray_out_wire->signal_rise_time();
---
> delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
590c586,588
< delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
---
> if (dp.is_tag == true && dp.fully_assoc == false) {
> compute_comparator_delay(0);
> }
592,599c590,591
< if (dp.is_tag == true && dp.fully_assoc == false)
< {
< compute_comparator_delay(0);
< }
<
< if (row_dec->exist == false)
< {
< delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
---
> if (row_dec->exist == false) {
> delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
601c593
< return outrisetime;
---
> return outrisetime;
606,607c598
< double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h()
< {
---
> double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() {
609,610c600,607
< double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) +
< compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP)); // precharge circuitry
---
> double height =
> compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge,
> camFlag ? cam_cell.w :
> cell.w / (2 * (RWP + ERP + SCHP))) +
> // precharge circuitry
> compute_tr_width_after_folding(g_tp.w_pmos_bl_eq,
> camFlag ? cam_cell.w :
> cell.w / (RWP + ERP + SCHP));
612,616c609,615
< if (deg_bl_muxing > 1)
< {
< height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP))); // col mux tr height
< // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
< }
---
> if (deg_bl_muxing > 1) {
> // col mux tr height
> height +=
> compute_tr_width_after_folding(g_tp.w_nmos_b_mux,
> cell.w / (2 * (RWP + ERP)));
> // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
> }
618c617
< height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height
---
> height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height
620,625c619,623
< if (dp.Ndsam_lev_1 > 1)
< {
< height += compute_tr_width_after_folding(
< g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
< //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
< }
---
> if (dp.Ndsam_lev_1 > 1) {
> height += compute_tr_width_after_folding(
> g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
> //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
> }
627,631c625,628
< if (dp.Ndsam_lev_2 > 1)
< {
< height += compute_tr_width_after_folding(
< g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
< //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
---
> if (dp.Ndsam_lev_2 > 1) {
> height += compute_tr_width_after_folding(
> g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
> //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
633,637c630,634
< // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
< height += 2 * compute_tr_width_after_folding(
< pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
< height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
< }
---
> // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
> height += 2 * compute_tr_width_after_folding(
> pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
> height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
> }
639,649c636,646
< // TODO: this should be uncommented...
< /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
< {
< //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
< double width_write_driver_write_mux = width_write_driver_or_write_mux();
< double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
< cell.w *
< // deg_bl_muxing *
< dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
< height += height_write_driver_write_mux;
< }*/
---
> // TODO: this should be uncommented...
> /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
> {
> //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
> double width_write_driver_write_mux = width_write_driver_or_write_mux();
> double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
> cell.w *
> // deg_bl_muxing *
> dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
> height += height_write_driver_write_mux;
> }*/
651c648
< return height;
---
> return height;
656,657c653
< double Mat::compute_cam_delay(double inrisetime)
< {
---
> double Mat::compute_cam_delay(double inrisetime) {
659,660c655,656
< double out_time_ramp, this_delay;
< double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
---
> double out_time_ramp, this_delay;
> double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
663c659
< double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
---
> double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
668,669c664,665
< double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng;
< int Htagbits;
---
> double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng;
> int Htagbits;
671,674c667,670
< double driver_c_gate_load;
< double driver_c_wire_load;
< double driver_r_wire_load;
< //double searchline_precharge_time;
---
> double driver_c_gate_load;
> double driver_c_wire_load;
> double driver_r_wire_load;
> //double searchline_precharge_time;
676,680c672,676
< double leak_power_cc_inverters_sram_cell = 0;
< double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
< double leak_power_RD_port_sram_cell = 0;
< double leak_power_SCHP_port_sram_cell = 0;
< double leak_comparator_cam_cell =0;
---
> double leak_power_cc_inverters_sram_cell = 0;
> double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
> double leak_power_RD_port_sram_cell = 0;
> double leak_power_SCHP_port_sram_cell = 0;
> double leak_comparator_cam_cell =0;
682,685c678,681
< double gate_leak_comparator_cam_cell = 0;
< double gate_leak_power_cc_inverters_sram_cell = 0;
< double gate_leak_power_RD_port_sram_cell = 0;
< double gate_leak_power_SCHP_port_sram_cell = 0;
---
> double gate_leak_comparator_cam_cell = 0;
> double gate_leak_power_cc_inverters_sram_cell = 0;
> double gate_leak_power_RD_port_sram_cell = 0;
> double gate_leak_power_SCHP_port_sram_cell = 0;
687,690c683,686
< c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um;
< c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um;
< r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um;
< r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um;
---
> c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um;
> c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um;
> r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um;
> r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um;
692,695c688,691
< dynSearchEng = 0.0;
< delay_matchchline = 0.0;
< double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
< bool linear_scaling = false;
---
> dynSearchEng = 0.0;
> delay_matchchline = 0.0;
> double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
> bool linear_scaling = false;
697,717c693,712
< if (linear_scaling)
< {
< Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
< Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
< Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
< Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
< Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
< Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
< Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
< Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
< Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
< Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
< Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
< Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
< Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
< Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
< Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
< Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
< Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
< Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
< Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
---
> if (linear_scaling) {
> Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
> Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
> Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
> Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
> Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
> Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
> Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
> Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
> Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
> Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
> Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
> Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
> Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
> Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
> Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
> Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
> Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
> Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
> Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
719,751c714,744
< Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
< Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
< Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
< Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
< Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
< Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
< Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
< Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
< W_hit_miss_n = Wdummyn;
< W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
< //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
< }
< else
< {
< Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
< Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
< Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
< Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
< Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
< Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
< Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
< Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
< Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
< Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
< Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
< Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
< Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
< Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
< Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
< Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
< Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
< Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
< Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
---
> Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
> Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
> Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
> Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
> Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
> Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
> Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
> Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
> W_hit_miss_n = Wdummyn;
> W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
> //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
> } else {
> Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
> Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
> Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
> Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
> Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
> Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
> Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
> Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
> Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
> Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
> Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
> Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
> Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
> Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
> Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
> Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
> Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
> Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
> Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
753,763c746,756
< Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process
< Wdummyn = g_tp.cam.cell_nmos_w;
< Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
< Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
< Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
< Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
< Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
< Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
< W_hit_miss_n = Wdummyn;
< W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
< }
---
> Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process
> Wdummyn = g_tp.cam.cell_nmos_w;
> Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
> Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
> Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
> Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
> Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
> Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
> W_hit_miss_n = Wdummyn;
> W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
> }
765c758
< Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
---
> Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
767,770c760,763
< /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
< search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
< From the driver(am and an) to the comparators in all the rows including the dummy row,
< Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
---
> /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
> search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
> From the driver(am and an) to the comparators in all the rows including the dummy row,
> Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
772,776c765,769
< //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
< //Searchline precharge routes horizontally
< driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
< driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
< driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
---
> //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
> //Searchline precharge routes horizontally
> driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
> driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
> driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
778,782c771,775
< sl_precharge_eq_drv = new Driver(
< driver_c_gate_load,
< driver_c_wire_load,
< driver_r_wire_load,
< is_dram);
---
> sl_precharge_eq_drv = new Driver(
> driver_c_gate_load,
> driver_c_wire_load,
> driver_r_wire_load,
> is_dram);
784,793c777,786
< //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
< //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
< driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
< driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
< driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
< sl_data_drv = new Driver(
< driver_c_gate_load,
< driver_c_wire_load,
< driver_r_wire_load,
< is_dram);
---
> //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
> //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
> driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
> driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
> driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
> sl_data_drv = new Driver(
> driver_c_gate_load,
> driver_c_wire_load,
> driver_r_wire_load,
> is_dram);
795,801c788,794
< sl_precharge_eq_drv->compute_delay(0);
< double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
< double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
< double R_bl = (subarray.num_rows + 1) * r_b_metal;
< double C_bl = subarray.C_bl_cam;
< delay_cam_sl_restore = sl_precharge_eq_drv->delay
< + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2);
---
> sl_precharge_eq_drv->compute_delay(0);
> double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
> double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
> double R_bl = (subarray.num_rows + 1) * r_b_metal;
> double C_bl = subarray.C_bl_cam;
> delay_cam_sl_restore = sl_precharge_eq_drv->delay
> + log(g_tp.cam.Vbitpre) * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
803c796
< out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
---
> out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
805,806c798,799
< //matchline ops delay
< delay_matchchline += sl_data_drv->delay;
---
> //matchline ops delay
> delay_matchchline += sl_data_drv->delay;
808,809c801,802
< /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
< //matchline delay, matchline power, matchline_reset for cycle time computation,
---
> /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
> //matchline delay, matchline power, matchline_reset for cycle time computation,
811,815c804,808
< ////matchline precharge circuitry routes vertically
< //There are two matchline precharge driver chains per subarray.
< driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
< driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
< driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
---
> ////matchline precharge circuitry routes vertically
> //There are two matchline precharge driver chains per subarray.
> driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
> driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
> driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
817,821c810,814
< ml_precharge_drv = new Driver(
< driver_c_gate_load,
< driver_c_wire_load,
< driver_r_wire_load,
< is_dram);
---
> ml_precharge_drv = new Driver(
> driver_c_gate_load,
> driver_c_wire_load,
> driver_r_wire_load,
> is_dram);
823c816
< ml_precharge_drv->compute_delay(0);
---
> ml_precharge_drv->compute_delay(0);
826,828c819,824
< rd = tr_R_on(Wdummyn, NCH, 2, is_dram);
< c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit
< + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline
---
> rd = tr_R_on(Wdummyn, NCH, 2, is_dram);
> c_intrinsic = Htagbits *
> (2 * drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def,
> is_dram)//TODO: the cell_h_def should be revisit
> + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram) /
> Htagbits);//since each halve only has one precharge tx per matchline
830,832c826,828
< Cwire = c_matchline_metal * Htagbits;
< Rwire = r_matchline_metal * Htagbits;
< c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
---
> Cwire = c_matchline_metal * Htagbits;
> Rwire = r_matchline_metal * Htagbits;
> c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
834,839c830,836
< double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
< //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
< double R_ml = Rwire;
< double C_ml = Cwire + c_intrinsic;
< delay_cam_ml_reset = ml_precharge_drv->delay
< + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too
---
> double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
> //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
> double R_ml = Rwire;
> double C_ml = Cwire + c_intrinsic;
> //TODO: latest CAM has sense amps on matchlines too
> delay_cam_ml_reset = ml_precharge_drv->delay
> + log(g_tp.cam.Vbitpre) * (R_ml_precharge * C_ml + R_ml * C_ml / 2);
841,845c838,842
< //matchline ops delay
< tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
< this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
< delay_matchchline += this_delay;
< out_time_ramp = this_delay / VTHFA3;
---
> //matchline ops delay
> tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
> this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
> delay_matchchline += this_delay;
> out_time_ramp = this_delay / VTHFA3;
847,848c844,847
< dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise
< * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves
---
> dynSearchEng += ((c_intrinsic + Cwire + c_gate_load) *
> (subarray.num_rows + 1)) //TODO: need to be precise
> * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *
> 2;//each subarry has two halves
850,858c849,857
< /* third stage, from the NAND2 gates to the drivers in the dummy row */
< rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
< c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
< drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2;
< c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
< tf = rd * (c_intrinsic + c_gate_load);
< this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
< out_time_ramp = this_delay / (1 - VTHFA4);
< delay_matchchline += this_delay;
---
> /* third stage, from the NAND2 gates to the drivers in the dummy row */
> rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
> c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
> drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram) * 2;
> c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
> tf = rd * (c_intrinsic + c_gate_load);
> this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
> out_time_ramp = this_delay / (1 - VTHFA4);
> delay_matchchline += this_delay;
860,861c859,861
< //only the dummy row has the extra inverter between NAND and NOR gates
< dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl;
---
> //only the dummy row has the extra inverter between NAND and NOR gates
> dynSearchEng += (c_intrinsic * (subarray.num_rows + 1) + c_gate_load * 2) *
> g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl;
863,872c863,874
< /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
< rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
< c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
< Cwire = c_matchline_metal * Htagbits + c_searchline_metal * (subarray.num_rows+1)/2;
< Rwire = r_matchline_metal * Htagbits + r_searchline_metal * (subarray.num_rows+1)/2;
< c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
< tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
< this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
< out_time_ramp = this_delay / VTHFA5;
< delay_matchchline += this_delay;
---
> /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
> rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
> c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
> Cwire = c_matchline_metal * Htagbits + c_searchline_metal *
> (subarray.num_rows + 1) / 2;
> Rwire = r_matchline_metal * Htagbits + r_searchline_metal *
> (subarray.num_rows + 1) / 2;
> c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
> tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
> this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
> out_time_ramp = this_delay / VTHFA5;
> delay_matchchline += this_delay;
874c876,877
< dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
---
> dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows * c_gate_load) *
> g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
876c879
< /*final statge from the NOR gate to drive the wordline of the data portion */
---
> /*final statge from the NOR gate to drive the wordline of the data portion */
878,881c881,884
< //searchline data driver There are two matchline precharge driver chains per subarray.
< driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
< driver_c_wire_load = subarray.C_wl_ram;
< driver_r_wire_load = subarray.R_wl_ram;
---
> //searchline data driver There are two matchline precharge driver chains per subarray.
> driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
> driver_c_wire_load = subarray.C_wl_ram;
> driver_r_wire_load = subarray.R_wl_ram;
883,887c886,890
< ml_to_ram_wl_drv = new Driver(
< driver_c_gate_load,
< driver_c_wire_load,
< driver_r_wire_load,
< is_dram);
---
> ml_to_ram_wl_drv = new Driver(
> driver_c_gate_load,
> driver_c_wire_load,
> driver_r_wire_load,
> is_dram);
891,897c894,901
< rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
< c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
< c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
< tf = rd * (c_intrinsic + c_gate_load);
< this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
< out_time_ramp = this_delay / (1-0.5);
< delay_matchchline += this_delay;
---
> rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
> c_intrinsic = 2 * drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
> drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
> c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
> tf = rd * (c_intrinsic + c_gate_load);
> this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
> out_time_ramp = this_delay / (1 - 0.5);
> delay_matchchline += this_delay;
899c903
< out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
---
> out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
901,902c905,906
< //c_gate_load energy is computed in ml_to_ram_wl_drv
< dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
---
> //c_gate_load energy is computed in ml_to_ram_wl_drv
> dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
905,910c909,916
< /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
< /*Precharge the hitting logic */
< c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
< Cwire = c_searchline_metal * subarray.num_rows;
< Rwire = r_searchline_metal * subarray.num_rows;
< c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
---
> /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
> /*Precharge the hitting logic */
> c_intrinsic = 2 *
> drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
> Cwire = c_searchline_metal * subarray.num_rows;
> Rwire = r_searchline_metal * subarray.num_rows;
> c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
> subarray.num_rows;
912,917c918,924
< rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
< //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
< double R_hit_miss = Rwire;
< double C_hit_miss = Cwire + c_intrinsic;
< delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
< dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
---
> rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
> //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
> double R_hit_miss = Rwire;
> double C_hit_miss = Cwire + c_intrinsic;
> delay_hit_miss_reset = log(g_tp.cam.Vbitpre) *
> (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
> dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
919,923c926,932
< /*hitting logic evaluation */
< c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
< Cwire = c_searchline_metal * subarray.num_rows;
< Rwire = r_searchline_metal * subarray.num_rows;
< c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
---
> /*hitting logic evaluation */
> c_intrinsic = 2 *
> drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
> Cwire = c_searchline_metal * subarray.num_rows;
> Rwire = r_searchline_metal * subarray.num_rows;
> c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
> subarray.num_rows;
925,926c934,935
< rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
< tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
---
> rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
> tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
928c937
< delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
---
> delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
930,931c939,940
< if (is_fa)
< delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
---
> if (is_fa)
> delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
933c942
< dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
---
> dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
935c944
< /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
---
> /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
937c946
< power_matchline.searchOp.dynamic = dynSearchEng;
---
> power_matchline.searchOp.dynamic = dynSearchEng;
939,943c948,955
< //leakage in one subarray
< double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2?
< double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true);
< double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
< double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv
---
> //leakage in one subarray
> double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2?
> double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true);
> double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
> 1, inv, false, true) * 2;
> //approx XOR with Inv
> double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv,
> false, true) * 2;
945,949c957,961
< leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd;
< leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd;
< leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
< leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd;
< leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports
---
> leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd;
> leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd;
> leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
> leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd;
> leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports
951,956c963,968
< power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
< leak_comparator_cam_cell +
< leak_power_acc_tr_RW_or_WR_port_sram_cell +
< leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
< leak_power_RD_port_sram_cell * ERP +
< leak_power_SCHP_port_sram_cell*SCHP;
---
> power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
> leak_comparator_cam_cell +
> leak_power_acc_tr_RW_or_WR_port_sram_cell +
> leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
> leak_power_RD_port_sram_cell * ERP +
> leak_power_SCHP_port_sram_cell * SCHP;
958,963c970,979
< power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
< power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
< power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
< power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd;
< //In idle states, the hit/miss txs are closed (on) therefore no Isub
< power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
---
> power_matchline.searchOp.leakage *= (subarray.num_rows + 1) *
> subarray.num_cols_fa_cam;//TODO:dumy line precise
> power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
> cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
> power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
> cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
> power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
> cmos_Isub_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
> //In idle states, the hit/miss txs are closed (on) therefore no Isub
> power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
966,969c982,987
< //in idle state, Ig_on only possibly exist in access transistors of read only ports
< double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
< double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
< double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2;
---
> //in idle state, Ig_on only possibly exist in access transistors of read only ports
> double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
> double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
> 1, inv, false, true) * 2;
> double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv,
> false, true) * 2;
971,974c989,992
< gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd;
< gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd;
< gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
< gate_leak_power_SCHP_port_sram_cell = 0;
---
> gate_leak_comparator_cam_cell = Ig_cell_comparator * g_tp.cam_cell.Vdd;
> gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.cam_cell.Vdd;
> gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
> gate_leak_power_SCHP_port_sram_cell = 0;
976c994
< //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
---
> //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
978,986c996,1012
< power_matchline.searchOp.gate_leakage += gate_leak_power_cc_inverters_sram_cell;
< power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
< power_matchline.searchOp.gate_leakage += gate_leak_power_SCHP_port_sram_cell*SCHP + gate_leak_power_RD_port_sram_cell * ERP;
< power_matchline.searchOp.gate_leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
< power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(0, Wfaprechp,1, pmos) * g_tp.cam_cell.Vdd;
< power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
< power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
< power_matchline.searchOp.gate_leakage += subarray.num_rows * cmos_Ig_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
< + cmos_Ig_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
---
> power_matchline.searchOp.gate_leakage +=
> gate_leak_power_cc_inverters_sram_cell;
> power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
> power_matchline.searchOp.gate_leakage +=
> gate_leak_power_SCHP_port_sram_cell * SCHP +
> gate_leak_power_RD_port_sram_cell * ERP;
> power_matchline.searchOp.gate_leakage *= (subarray.num_rows + 1) *
> subarray.num_cols_fa_cam;//TODO:dumy line precise
> power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
> cmos_Ig_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
> power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
> cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
> power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
> cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
> power_matchline.searchOp.gate_leakage += subarray.num_rows *
> cmos_Ig_leakage(W_hit_miss_n, 0, 1, nmos) * g_tp.cam_cell.Vdd +
> + cmos_Ig_leakage(0, W_hit_miss_p, 1, pmos) * g_tp.cam_cell.Vdd;
989c1015
< return out_time_ramp;
---
> return out_time_ramp;
993,1000c1019,1025
< double Mat::width_write_driver_or_write_mux()
< {
< // calculate resistance of SRAM cell pull-up PMOS transistor
< // cam and sram have same cell trasistor properties
< double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
< double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true);
< double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
< double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
---
> double Mat::width_write_driver_or_write_mux() {
> // calculate resistance of SRAM cell pull-up PMOS transistor
> // cam and sram have same cell trasistor properties
> double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
> double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true);
> double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
> double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
1002c1027
< return width_write_driver_nmos;
---
> return width_write_driver_nmos;
1010,1014c1035,1038
< double subarray_mem_cell_area_width)
< {
< double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
< double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
< return cumulative_area / subarray_mem_cell_area_width;
---
> double subarray_mem_cell_area_width) {
> double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
> double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
> return cumulative_area / subarray_mem_cell_area_width;
1019,1025c1043,1048
< double Mat::compute_bitline_delay(double inrisetime)
< {
< double V_b_pre, v_th_mem_cell, V_wl;
< double tstep;
< double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
< double R_cell_pull_down=0.0, R_cell_acc =0.0, r_dev=0.0;
< int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
---
> double Mat::compute_bitline_delay(double inrisetime) {
> double V_b_pre, v_th_mem_cell, V_wl;
> double tstep;
> double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
> double R_cell_pull_down = 0.0, R_cell_acc = 0.0, r_dev = 0.0;
> int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
1027,1029c1050,1052
< double R_b_metal = camFlag? cam_cell.h:cell.h * g_tp.wire_local.R_per_um;
< double R_bl = subarray.num_rows * R_b_metal;
< double C_bl = subarray.C_bl;
---
> double R_b_metal = camFlag ? cam_cell.h : cell.h * g_tp.wire_local.R_per_um;
> double R_bl = subarray.num_rows * R_b_metal;
> double C_bl = subarray.C_bl;
1031,1036c1054,1059
< // TODO: no leakage for DRAMs?
< double leak_power_cc_inverters_sram_cell = 0;
< double gate_leak_power_cc_inverters_sram_cell = 0;
< double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
< double leak_power_RD_port_sram_cell = 0;
< double gate_leak_power_RD_port_sram_cell = 0;
---
> // TODO: no leakage for DRAMs?
> double leak_power_cc_inverters_sram_cell = 0;
> double gate_leak_power_cc_inverters_sram_cell = 0;
> double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
> double leak_power_RD_port_sram_cell = 0;
> double gate_leak_power_RD_port_sram_cell = 0;
1038,1054c1061,1075
< if (is_dram == true)
< {
< V_b_pre = g_tp.dram.Vbitpre;
< v_th_mem_cell = g_tp.dram_acc.Vth;
< V_wl = g_tp.vpp;
< //The access transistor is not folded. So we just need to specify a threshold value for the
< //folding width that is equal to or greater than Wmemcella.
< R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
< r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
< }
< else
< { //SRAM
< V_b_pre = g_tp.sram.Vbitpre;
< v_th_mem_cell = g_tp.sram_cell.Vth;
< V_wl = g_tp.sram_cell.Vdd;
< R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
< R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
---
> if (is_dram == true) {
> V_b_pre = g_tp.dram.Vbitpre;
> v_th_mem_cell = g_tp.dram_acc.Vth;
> V_wl = g_tp.vpp;
> //The access transistor is not folded. So we just need to specify a
> // threshold value for the folding width that is equal to or greater
> // than Wmemcella.
> R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
> r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
> } else { //SRAM
> V_b_pre = g_tp.sram.Vbitpre;
> v_th_mem_cell = g_tp.sram_cell.Vth;
> V_wl = g_tp.sram_cell.Vdd;
> R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
> R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
1056,1059c1077,1085
< //Leakage current of an SRAM cell
< double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);//TODO: how much is the idle time? just by *2?
< double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,false, true);
< double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true)*2;//two invs per cell
---
> //Leakage current of an SRAM cell
> //TODO: how much is the idle time? just by *2?
> double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,
> false, true);
> double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,
> false, true);
> double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w,
> g_tp.sram.cell_pmos_w, 1, inv, false,
> true) * 2;//two invs per cell
1061,1063c1087,1089
< leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd;
< leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
< leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd;
---
> leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd;
> leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
> leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd;
1066,1068c1092,1097
< //in idle state, Ig_on only possibly exist in access transistors of read only ports
< double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);
< double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true);
---
> //in idle state, Ig_on only possibly exist in access transistors of read only ports
> double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,
> false, true);
> double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w,
> g_tp.sram.cell_pmos_w, 1, inv, false,
> true);
1070,1072c1099,1101
< gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd;
< gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
< }
---
> gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.sram_cell.Vdd;
> gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
> }
1075,1082c1104,1122
< double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram);
< double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
< double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
< double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
< double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
< drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
< drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
< double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
---
> double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0,
> camFlag ? cam_cell.w : cell.w /
> (2 * (RWP + ERP + SCHP)), is_dram);
> double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
> double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0,
> camFlag ? cam_cell.w :
> cell.w * deg_bl_muxing /
> (RWP + ERP + SCHP), is_dram);
> double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
> double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0,
> is_dram) +
> drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag ? cam_cell.w :
> cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
> drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ? cam_cell.w :
> cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
> double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
> camFlag ? cam_cell.w :
> cell.w * deg_bl_muxing /
> (RWP + ERP + SCHP), is_dram);
1084,1100c1124,1146
< if (is_dram)
< {
< double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl));
< tstep = 2.3 * fraction * r_dev *
< (g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) /
< (g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux));
< delay_writeback = tstep;
< dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
< (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
< dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) *
< (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100;
< per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
< (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
< }
< else
< {
< double tau;
---
> if (is_dram) {
> double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd / 2) *
> g_tp.dram_cell_C /
> (g_tp.dram_cell_C + C_bl));
> tstep = 2.3 * fraction * r_dev *
> (g_tp.dram_cell_C * (C_bl + 2 * C_drain_sense_amp_iso +
> C_sense_amp_latch + C_drain_sense_amp_mux)) /
> (g_tp.dram_cell_C + (C_bl + 2 * C_drain_sense_amp_iso +
> C_sense_amp_latch + C_drain_sense_amp_mux));
> delay_writeback = tstep;
> dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch +
> C_drain_sense_amp_mux) *
> (g_tp.dram_cell_Vdd / 2) *
> g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
> dynWriteEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch) *
> (g_tp.dram_cell_Vdd / 2) *
> g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ *
> num_act_mats_hor_dir * 100;
> per_bitline_read_energy = (C_bl + 2 * C_drain_sense_amp_iso +
> C_sense_amp_latch + C_drain_sense_amp_mux) *
> (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
> } else {
> double tau;
1102,1125c1148,1180
< if (deg_bl_muxing > 1)
< {
< tau = (R_cell_pull_down + R_cell_acc) *
< (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
< R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
< R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
< R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
< dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /*
< subarray.num_cols * num_subarrays_per_mat*/;
< dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
< 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing);
< dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
< num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
< //Write Ops are differential for SRAM
< }
< else
< {
< tau = (R_cell_pull_down + R_cell_acc) *
< (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
< R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
< dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
< 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
< dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
< num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
---
> if (deg_bl_muxing > 1) {
> tau = (R_cell_pull_down + R_cell_acc) *
> (C_bl + 2 * C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
> C_sense_amp_latch + C_drain_sense_amp_mux) +
> R_bl * (C_bl / 2 + 2 * C_drain_bit_mux + 2 *
> C_drain_sense_amp_iso + C_sense_amp_latch +
> C_drain_sense_amp_mux) +
> R_bit_mux * (C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
> C_sense_amp_latch + C_drain_sense_amp_mux) +
> R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch +
> C_drain_sense_amp_mux);
> dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense *
> g_tp.sram_cell.Vdd;
> dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch +
> C_drain_sense_amp_mux) *
> 2 * dp.V_b_sense * g_tp.sram_cell.Vdd *
> (1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
> deg_bl_muxing);
> dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ /
> deg_bl_muxing) / deg_senseamp_muxing) *
> num_act_mats_hor_dir * (C_bl + 2 * C_drain_bit_mux) *
> g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
> //Write Ops are differential for SRAM
> } else {
> tau = (R_cell_pull_down + R_cell_acc) *
> (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
> R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
> dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
> 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
> dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
> deg_bl_muxing) / deg_senseamp_muxing) *
> num_act_mats_hor_dir * C_bl) *
> g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
1126a1182,1191
> }
> tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
> power_bitline.readOp.leakage =
> leak_power_cc_inverters_sram_cell +
> leak_power_acc_tr_RW_or_WR_port_sram_cell +
> leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
> leak_power_RD_port_sram_cell * ERP;
> power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
> gate_leak_power_RD_port_sram_cell * ERP;
>
1128,1135d1192
< tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
< power_bitline.readOp.leakage =
< leak_power_cc_inverters_sram_cell +
< leak_power_acc_tr_RW_or_WR_port_sram_cell +
< leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
< leak_power_RD_port_sram_cell * ERP;
< power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
< gate_leak_power_RD_port_sram_cell * ERP;
1137,1138d1193
< }
<
1145,1154c1200,1206
< /* take input rise time into account */
< double m = V_wl / inrisetime;
< if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m))
< {
< delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell)/ m);
< }
< else
< {
< delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
< }
---
> /* take input rise time into account */
> double m = V_wl / inrisetime;
> if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m)) {
> delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell) / m);
> } else {
> delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
> }
1156c1208
< bool is_fa = (dp.fully_assoc) ? true : false;
---
> bool is_fa = (dp.fully_assoc) ? true : false;
1158,1162c1210,1213
< if (dp.is_tag == false || is_fa == false)
< {
< power_bitline.readOp.dynamic = dynRdEnergy;
< power_bitline.writeOp.dynamic = dynWriteEnergy;
< }
---
> if (dp.is_tag == false || is_fa == false) {
> power_bitline.readOp.dynamic = dynRdEnergy;
> power_bitline.writeOp.dynamic = dynWriteEnergy;
> }
1164,1165c1215,1216
< double outrisetime = 0;
< return outrisetime;
---
> double outrisetime = 0;
> return outrisetime;
1170,1172c1221,1222
< double Mat::compute_sa_delay(double inrisetime)
< {
< //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
---
> double Mat::compute_sa_delay(double inrisetime) {
> //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
1174,1178c1224,1228
< //Bitline circuitry leakage.
< double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram);
< double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
< double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
< double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
---
> //Bitline circuitry leakage.
> double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram);
> double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
> double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
> double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
1180,1187c1230,1237
< double lkgIdlePh = IsenseEn;//+ 2*IoBufP;
< //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
< double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
< //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
< // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
< double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
< leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
< leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
---
> double lkgIdlePh = IsenseEn;//+ 2*IoBufP;
> //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
> double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
> //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
> // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
> double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
> leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
> leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
1189,1199c1239,1257
< // sense amplifier has to drive logic in "data out driver" and sense precharge load.
< // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
< //constant as well as the magnitude of input differential voltage.
< double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
< drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
< drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
< drain_C_(g_tp.w_iso,PCH,1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
< drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
< double tau = C_ld / g_tp.gm_sense_amp_latch;
< delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
< power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
---
> // sense amplifier has to drive logic in "data out driver" and sense precharge load.
> // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
> //constant as well as the magnitude of input differential voltage.
> double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
> drain_C_(g_tp.w_sense_n, NCH, 1, 0,
> camFlag ? cam_cell.w : cell.w * deg_bl_muxing /
> (RWP + ERP + SCHP), is_dram) +
> drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ?
> cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
> is_dram) +
> drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag ?
> cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
> is_dram) +
> drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
> cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
> is_dram);
> double tau = C_ld / g_tp.gm_sense_amp_latch;
> delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
> power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
1201c1259
< power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
---
> power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
1203,1204c1261,1262
< double outrisetime = 0;
< return outrisetime;
---
> double outrisetime = 0;
> return outrisetime;
1209,1212c1267,1269
< double Mat::compute_subarray_out_drv(double inrisetime)
< {
< double C_ld, rd, tf, this_delay;
< double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
---
> double Mat::compute_subarray_out_drv(double inrisetime) {
> double C_ld, rd, tf, this_delay;
> double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
1214,1237c1271,1302
< // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
< rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
< C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
< gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
< tf = rd * C_ld;
< this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
< delay_subarray_out_drv += this_delay;
< inrisetime = this_delay/(1.0 - 0.5);
< power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
< power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
< power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
< // delay of signal through inverter-buffer to second level of sense-amp mux.
< // internal delay of buffer
< rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
< C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
< drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
< gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
< tf = rd * C_ld;
< this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
< delay_subarray_out_drv += this_delay;
< inrisetime = this_delay/(1.0 - 0.5);
< power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
< power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram)* g_tp.peri_global.Vdd;
< power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
---
> // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
> rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
> C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
> camFlag ? cam_cell.w : cell.w *
> deg_bl_muxing / (RWP + ERP + SCHP),
> is_dram) +
> gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
> tf = rd * C_ld;
> this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
> delay_subarray_out_drv += this_delay;
> inrisetime = this_delay / (1.0 - 0.5);
> power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
> power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
> power_subarray_out_drv.readOp.gate_leakage +=
> cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
> // delay of signal through inverter-buffer to second level of sense-amp mux.
> // internal delay of buffer
> rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
> C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
> drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
> gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
> tf = rd * C_ld;
> this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
> delay_subarray_out_drv += this_delay;
> inrisetime = this_delay / (1.0 - 0.5);
> power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
> power_subarray_out_drv.readOp.leakage +=
> cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
> inv, is_dram) * g_tp.peri_global.Vdd;
> power_subarray_out_drv.readOp.gate_leakage +=
> cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
> inv) * g_tp.peri_global.Vdd;
1239,1250c1304,1322
< // inverter driving drain of pass transistor of second level of sense-amp mux.
< rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
< C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
< drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
< drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram);
< tf = rd * C_ld;
< this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
< delay_subarray_out_drv += this_delay;
< inrisetime = this_delay/(1.0 - 0.5);
< power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
< power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
< power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
---
> // inverter driving drain of pass transistor of second level of sense-amp mux.
> rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
> C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
> drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def,
> is_dram) +
> drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
> cam_cell.w : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 /
> (RWP + ERP + SCHP), is_dram);
> tf = rd * C_ld;
> this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
> delay_subarray_out_drv += this_delay;
> inrisetime = this_delay / (1.0 - 0.5);
> power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
> power_subarray_out_drv.readOp.leakage +=
> cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
> inv) * g_tp.peri_global.Vdd;
> power_subarray_out_drv.readOp.gate_leakage +=
> cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
> inv) * g_tp.peri_global.Vdd;
1253,1264c1325,1343
< // delay of signal through pass-transistor to input of subarray output driver.
< rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
< C_ld = dp.Ndsam_lev_2 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram) +
< //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
< gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
< tf = rd * C_ld;
< this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
< delay_subarray_out_drv += this_delay;
< inrisetime = this_delay/(1.0 - 0.5);
< power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
< power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
< power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
---
> // delay of signal through pass-transistor to input of subarray output driver.
> rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
> C_ld = dp.Ndsam_lev_2 *
> drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? cam_cell.w :
> cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP),
> is_dram) +
> //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
> gate_C(subarray_out_wire->repeater_size *
> (subarray_out_wire->wire_length /
> subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ *
> (1 + p_to_n_sz_r), 0.0, is_dram);
> tf = rd * C_ld;
> this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
> delay_subarray_out_drv += this_delay;
> inrisetime = this_delay / (1.0 - 0.5);
> power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
> power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
> power_subarray_out_drv.readOp.gate_leakage +=
> cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
1267c1346
< return inrisetime;
---
> return inrisetime;
1272,1274c1351,1352
< double Mat::compute_comparator_delay(double inrisetime)
< {
< int A = g_ip->tag_assoc;
---
> double Mat::compute_comparator_delay(double inrisetime) {
> int A = g_ip->tag_assoc;
1276,1277c1354,1355
< int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
< // a multiple of 4.
---
> int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
> // a multiple of 4.
1279,1287c1357,1365
< /* First Inverter */
< double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) +
< drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
< drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
< double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
< double tf = Req*Ceq;
< double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL);
< double nextinputtime = st1del/VTHCOMPINV;
< power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
---
> /* First Inverter */
> double Ceq = gate_C(g_tp.w_comp_inv_n2 + g_tp.w_comp_inv_p2, 0, is_dram) +
> drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
> drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
> double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
> double tf = Req * Ceq;
> double st1del = horowitz(inrisetime, tf, VTHCOMPINV, VTHCOMPINV, FALL);
> double nextinputtime = st1del / VTHCOMPINV;
> power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1289,1303c1367,1387
< //For each degree of associativity
< //there are 4 such quarter comparators
< double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
< double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
< /* Second Inverter */
< Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) +
< drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
< drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
< Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
< tf = Req*Ceq;
< double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE);
< nextinputtime = st2del/(1.0-VTHCOMPINV);
< power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
< lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
< gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
---
> //For each degree of associativity
> //there are 4 such quarter comparators
> double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1,
> g_tp.w_comp_inv_p1, 1, inv,
> is_dram) * 4 * A;
> double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1,
> g_tp.w_comp_inv_p1, 1, inv,
> is_dram) * 4 * A;
> /* Second Inverter */
> Ceq = gate_C(g_tp.w_comp_inv_n3 + g_tp.w_comp_inv_p3, 0, is_dram) +
> drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
> drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
> Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
> tf = Req * Ceq;
> double st2del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHCOMPINV, RISE);
> nextinputtime = st2del / (1.0 - VTHCOMPINV);
> power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
> lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
> inv, is_dram) * 4 * A;
> gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
> inv, is_dram) * 4 * A;
1305,1315c1389,1401
< /* Third Inverter */
< Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) +
< drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
< drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
< Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
< tf = Req*Ceq;
< double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL);
< nextinputtime = st3del/(VTHEVALINV);
< power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
< lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
< gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
---
> /* Third Inverter */
> Ceq = gate_C(g_tp.w_eval_inv_n + g_tp.w_eval_inv_p, 0, is_dram) +
> drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
> drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
> Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
> tf = Req * Ceq;
> double st3del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHEVALINV, FALL);
> nextinputtime = st3del / (VTHEVALINV);
> power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
> lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1,
> inv, is_dram) * 4 * A;
> gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3,
> 1, inv, is_dram) * 4 * A;
1317,1331c1403,1423
< /* Final Inverter (virtual ground driver) discharging compare part */
< double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram);
< double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */
< double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
< drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
< drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
< drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram);
< double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
< drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
< drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
< gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram);
< power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
< power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1);
< lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
< lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2
---
> /* Final Inverter (virtual ground driver) discharging compare part */
> double r1 = tr_R_on(g_tp.w_comp_n, NCH, 2, is_dram);
> double r2 = tr_R_on(g_tp.w_eval_inv_n, NCH, 1, is_dram); /* was switch */
> double c2 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
> g_tp.cell_h_def, is_dram) +
> drain_C_(g_tp.w_comp_n, NCH, 2, 1,
> g_tp.cell_h_def, is_dram)) +
> drain_C_(g_tp.w_eval_inv_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
> drain_C_(g_tp.w_eval_inv_n, NCH, 1, 1, g_tp.cell_h_def, is_dram);
> double c1 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
> g_tp.cell_h_def, is_dram) +
> drain_C_(g_tp.w_comp_n, NCH, 2, 1,
> g_tp.cell_h_def, is_dram)) +
> drain_C_(g_tp.w_comp_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
> gate_C(WmuxdrvNANDn + WmuxdrvNANDp, 0, is_dram);
> power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
> power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1);
> lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
> inv, is_dram) * 4 * A;
> lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
> is_dram) * 4 * A; // stack factor of 0.2
1333,1334c1425,1429
< gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
< gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter
---
> gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
> inv, is_dram) * 4 * A;
> //for gate leakage this equals to a inverter
> gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
> is_dram) * 4 * A;
1336,1340c1431,1435
< /* time to go to threshold of mux driver */
< double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND);
< /* take into account non-zero input rise time */
< double m = g_tp.peri_global.Vdd/nextinputtime;
< double Tcomparatorni;
---
> /* time to go to threshold of mux driver */
> double tstep = (r2 * c2 + (r1 + r2) * c1) * log(1.0 / VTHMUXNAND);
> /* take into account non-zero input rise time */
> double m = g_tp.peri_global.Vdd / nextinputtime;
> double Tcomparatorni;
1342,1355c1437,1453
< if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m))
< {
< double a = m;
< double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
< double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
< Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a);
< }
< else
< {
< Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m;
< }
< delay_comparator = Tcomparatorni+st1del+st2del+st3del;
< power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
< power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
---
> if ((tstep) <= (0.5*(g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / m)) {
> double a = m;
> double b = 2 * ((g_tp.peri_global.Vdd * VTHEVALINV) -
> g_tp.peri_global.Vth);
> double c = -2 * (tstep) * (g_tp.peri_global.Vdd -
> g_tp.peri_global.Vth) + 1 / m *
> ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) *
> ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth);
> Tcomparatorni = (-b + sqrt(b * b - 4 * a * c)) / (2 * a);
> } else {
> Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd +
> g_tp.peri_global.Vth) / (2 * m) -
> (g_tp.peri_global.Vdd * VTHEVALINV) / m;
> }
> delay_comparator = Tcomparatorni + st1del + st2del + st3del;
> power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
> power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
1357c1455
< return Tcomparatorni / (1.0 - VTHMUXNAND);;
---
> return Tcomparatorni / (1.0 - VTHMUXNAND);;
1362,1364c1460,1461
< void Mat::compute_power_energy()
< {
< //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
---
> void Mat::compute_power_energy() {
> //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
1366c1463
< //when plain read/write only one subarray in a single mat is active.
---
> //when plain read/write only one subarray in a single mat is active.
1369,1372c1466,1469
< power.readOp.dynamic += r_predec->power.readOp.dynamic +
< b_mux_predec->power.readOp.dynamic +
< sa_mux_lev_1_predec->power.readOp.dynamic +
< sa_mux_lev_2_predec->power.readOp.dynamic;
---
> power.readOp.dynamic += r_predec->power.readOp.dynamic +
> b_mux_predec->power.readOp.dynamic +
> sa_mux_lev_1_predec->power.readOp.dynamic +
> sa_mux_lev_2_predec->power.readOp.dynamic;
1374,1377c1471,1474
< // add energy consumed in decoders
< power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic;
< if (!(is_fa||pure_cam))
< power_row_decoders.readOp.dynamic *= num_subarrays_per_mat;
---
> // add energy consumed in decoders
> power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic;
> if (!(is_fa || pure_cam))
> power_row_decoders.readOp.dynamic *= num_subarrays_per_mat;
1379,1384c1476,1480
< // add energy consumed in bitline prechagers, SAs, and bitlines
< if (!(is_fa||pure_cam))
< {
< // add energy consumed in bitline prechagers
< power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
< power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
---
> // add energy consumed in bitline prechagers, SAs, and bitlines
> if (!(is_fa || pure_cam)) {
> // add energy consumed in bitline prechagers
> power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
> power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
1386,1388c1482,1484
< //Add sense amps energy
< num_sa_subarray = subarray.num_cols / deg_bl_muxing;
< power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ;
---
> //Add sense amps energy
> num_sa_subarray = subarray.num_cols / deg_bl_muxing;
> power_sa.readOp.dynamic *= num_sa_subarray * num_subarrays_per_mat ;
1390,1397c1486,1495
< // add energy consumed in bitlines
< //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
< power_bitline.readOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
< power_bitline.writeOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
< //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
< //Add subarray output energy
< power_subarray_out_drv.readOp.dynamic =
< (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
---
> // add energy consumed in bitlines
> //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
> power_bitline.readOp.dynamic *= num_subarrays_per_mat *
> subarray.num_cols;
> power_bitline.writeOp.dynamic *= num_subarrays_per_mat *
> subarray.num_cols;
> //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
> //Add subarray output energy
> power_subarray_out_drv.readOp.dynamic =
> (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1399,1402c1497,1500
< power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
< power_sa.readOp.dynamic +
< power_bitline.readOp.dynamic +
< power_subarray_out_drv.readOp.dynamic;
---
> power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
> power_sa.readOp.dynamic +
> power_bitline.readOp.dynamic +
> power_subarray_out_drv.readOp.dynamic;
1404,1409c1502,1507
< power.readOp.dynamic += power_row_decoders.readOp.dynamic +
< bit_mux_dec->power.readOp.dynamic +
< sa_mux_lev_1_dec->power.readOp.dynamic +
< sa_mux_lev_2_dec->power.readOp.dynamic +
< power_comparator.readOp.dynamic;
< }
---
> power.readOp.dynamic += power_row_decoders.readOp.dynamic +
> bit_mux_dec->power.readOp.dynamic +
> sa_mux_lev_1_dec->power.readOp.dynamic +
> sa_mux_lev_2_dec->power.readOp.dynamic +
> power_comparator.readOp.dynamic;
> }
1411,1417c1509,1514
< else if (is_fa)
< {
< //for plain read/write only one subarray in a mat is active
< // add energy consumed in bitline prechagers
< power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
< + cam_bl_precharge_eq_drv->power.readOp.dynamic;
< power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
---
> else if (is_fa) {
> //for plain read/write only one subarray in a mat is active
> // add energy consumed in bitline prechagers
> power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
> + cam_bl_precharge_eq_drv->power.readOp.dynamic;
> power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
1419,1423c1516,1522
< //Add sense amps energy
< num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing;
< num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing;
< power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search;
< power_sa.readOp.dynamic *= num_sa_subarray;
---
> //Add sense amps energy
> num_sa_subarray = (subarray.num_cols_fa_cam +
> subarray.num_cols_fa_ram) / deg_bl_muxing;
> num_sa_subarray_search = subarray.num_cols_fa_ram / deg_bl_muxing;
> power_sa.searchOp.dynamic = power_sa.readOp.dynamic *
> num_sa_subarray_search;
> power_sa.readOp.dynamic *= num_sa_subarray;
1426,1430c1525,1531
< // add energy consumed in bitlines
< power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
< power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
< power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
< power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
---
> // add energy consumed in bitlines
> power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
> power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam +
> subarray.num_cols_fa_ram);
> power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam +
> subarray.num_cols_fa_ram);
> power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
1432,1436c1533,1537
< //Add subarray output energy
< power_subarray_out_drv.searchOp.dynamic =
< (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
< power_subarray_out_drv.readOp.dynamic =
< (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
---
> //Add subarray output energy
> power_subarray_out_drv.searchOp.dynamic =
> (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
> power_subarray_out_drv.readOp.dynamic =
> (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1439,1442c1540,1543
< power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
< power_sa.readOp.dynamic +
< power_bitline.readOp.dynamic +
< power_subarray_out_drv.readOp.dynamic;
---
> power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
> power_sa.readOp.dynamic +
> power_bitline.readOp.dynamic +
> power_subarray_out_drv.readOp.dynamic;
1444,1448c1545,1549
< power.readOp.dynamic += power_row_decoders.readOp.dynamic +
< bit_mux_dec->power.readOp.dynamic +
< sa_mux_lev_1_dec->power.readOp.dynamic +
< sa_mux_lev_2_dec->power.readOp.dynamic +
< power_comparator.readOp.dynamic;
---
> power.readOp.dynamic += power_row_decoders.readOp.dynamic +
> bit_mux_dec->power.readOp.dynamic +
> sa_mux_lev_1_dec->power.readOp.dynamic +
> sa_mux_lev_2_dec->power.readOp.dynamic +
> power_comparator.readOp.dynamic;
1450,1459c1551,1563
< //add energy consumed inside cam
< power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
< power_searchline_precharge = sl_precharge_eq_drv->power;
< power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
< power_searchline = sl_data_drv->power;
< power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
< power_matchline_precharge = ml_precharge_drv->power;
< power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
< power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
< power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
---
> //add energy consumed inside cam
> power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
> power_searchline_precharge = sl_precharge_eq_drv->power;
> power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
> power_searchline = sl_data_drv->power;
> power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
> subarray.num_cols_fa_cam * num_subarrays_per_mat;;
> power_matchline_precharge = ml_precharge_drv->power;
> power_matchline_precharge.searchOp.dynamic =
> power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
> power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
> power_ml_to_ram_wl_drv.searchOp.dynamic =
> ml_to_ram_wl_drv->power.readOp.dynamic;
1461,1464c1565,1571
< power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
< power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
< power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
< power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
---
> power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
> power_cam_all_active.searchOp.dynamic +=
> power_searchline_precharge.searchOp.dynamic;
> power_cam_all_active.searchOp.dynamic +=
> power_searchline.searchOp.dynamic;
> power_cam_all_active.searchOp.dynamic +=
> power_matchline_precharge.searchOp.dynamic;
1466,1467c1573,1574
< power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
< //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
---
> power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
> //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
1469,1476c1576,1581
< }
< else
< {
< // add energy consumed in bitline prechagers
< power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
< //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
< //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
< //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
---
> } else {
> // add energy consumed in bitline prechagers
> power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
> //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
> //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
> //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
1478,1481c1583,1586
< //Add sense amps energy
< num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing;
< power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
< power_sa.searchOp.dynamic = 0;
---
> //Add sense amps energy
> num_sa_subarray = subarray.num_cols_fa_cam / deg_bl_muxing;
> power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
> power_sa.searchOp.dynamic = 0;
1483,1485c1588,1590
< power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
< power_bitline.searchOp.dynamic = 0;
< power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
---
> power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
> power_bitline.searchOp.dynamic = 0;
> power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
1487,1490c1592,1595
< power_subarray_out_drv.searchOp.dynamic =
< (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
< power_subarray_out_drv.readOp.dynamic =
< (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
---
> power_subarray_out_drv.searchOp.dynamic =
> (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
> power_subarray_out_drv.readOp.dynamic =
> (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1492,1495c1597,1600
< power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
< power_sa.readOp.dynamic +
< power_bitline.readOp.dynamic +
< power_subarray_out_drv.readOp.dynamic;
---
> power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
> power_sa.readOp.dynamic +
> power_bitline.readOp.dynamic +
> power_subarray_out_drv.readOp.dynamic;
1497,1501c1602,1606
< power.readOp.dynamic += power_row_decoders.readOp.dynamic +
< bit_mux_dec->power.readOp.dynamic +
< sa_mux_lev_1_dec->power.readOp.dynamic +
< sa_mux_lev_2_dec->power.readOp.dynamic +
< power_comparator.readOp.dynamic;
---
> power.readOp.dynamic += power_row_decoders.readOp.dynamic +
> bit_mux_dec->power.readOp.dynamic +
> sa_mux_lev_1_dec->power.readOp.dynamic +
> sa_mux_lev_2_dec->power.readOp.dynamic +
> power_comparator.readOp.dynamic;
1504,1513c1609,1621
< ////add energy consumed inside cam
< power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
< power_searchline_precharge = sl_precharge_eq_drv->power;
< power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
< power_searchline = sl_data_drv->power;
< power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
< power_matchline_precharge = ml_precharge_drv->power;
< power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
< power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
< power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
---
> ////add energy consumed inside cam
> power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
> power_searchline_precharge = sl_precharge_eq_drv->power;
> power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
> power_searchline = sl_data_drv->power;
> power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
> subarray.num_cols_fa_cam * num_subarrays_per_mat;;
> power_matchline_precharge = ml_precharge_drv->power;
> power_matchline_precharge.searchOp.dynamic =
> power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
> power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
> power_ml_to_ram_wl_drv.searchOp.dynamic =
> ml_to_ram_wl_drv->power.readOp.dynamic;
1515,1518c1623,1630
< power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
< power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
< power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
< power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
---
> power_cam_all_active.searchOp.dynamic =
> power_matchline.searchOp.dynamic;
> power_cam_all_active.searchOp.dynamic +=
> power_searchline_precharge.searchOp.dynamic;
> power_cam_all_active.searchOp.dynamic +=
> power_searchline.searchOp.dynamic;
> power_cam_all_active.searchOp.dynamic +=
> power_matchline_precharge.searchOp.dynamic;
1520,1521c1632,1633
< power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
< //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
---
> power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
> //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
1523c1635
< }
---
> }
1527,1529c1639,1640
< // calculate leakage power
< if (!(is_fa || pure_cam))
< {
---
> // calculate leakage power
> if (!(is_fa || pure_cam)) {
1533,1534c1644,1646
< power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
< power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
---
> power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
> power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
> (RWP + ERP);
1536,1539c1648,1651
< //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
< power_subarray_out_drv.readOp.leakage =
< (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
< number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
---
> //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
> power_subarray_out_drv.readOp.leakage =
> (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
> number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
1541,1545c1653,1657
< power.readOp.leakage += power_bitline.readOp.leakage +
< power_bl_precharge_eq_drv.readOp.leakage +
< power_sa.readOp.leakage +
< power_subarray_out_drv.readOp.leakage;
< //cout<<"leakage"<<power.readOp.leakage<<endl;
---
> power.readOp.leakage += power_bitline.readOp.leakage +
> power_bl_precharge_eq_drv.readOp.leakage +
> power_sa.readOp.leakage +
> power_subarray_out_drv.readOp.leakage;
> //cout<<"leakage"<<power.readOp.leakage<<endl;
1547,1548c1659,1660
< power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
< power.readOp.leakage += power_comparator.readOp.leakage;
---
> power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
> power.readOp.leakage += power_comparator.readOp.leakage;
1550c1662
< //cout<<"leakage1"<<power.readOp.leakage<<endl;
---
> //cout<<"leakage1"<<power.readOp.leakage<<endl;
1552,1556c1664,1668
< // leakage power
< power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
< power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
< power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
< power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
---
> // leakage power
> power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
> power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
> power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
> power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
1558,1566c1670,1678
< power.readOp.leakage += r_predec->power.readOp.leakage +
< b_mux_predec->power.readOp.leakage +
< sa_mux_lev_1_predec->power.readOp.leakage +
< sa_mux_lev_2_predec->power.readOp.leakage +
< power_row_decoders.readOp.leakage +
< power_bit_mux_decoders.readOp.leakage +
< power_sa_mux_lev_1_decoders.readOp.leakage +
< power_sa_mux_lev_2_decoders.readOp.leakage;
< //cout<<"leakage2"<<power.readOp.leakage<<endl;
---
> power.readOp.leakage += r_predec->power.readOp.leakage +
> b_mux_predec->power.readOp.leakage +
> sa_mux_lev_1_predec->power.readOp.leakage +
> sa_mux_lev_2_predec->power.readOp.leakage +
> power_row_decoders.readOp.leakage +
> power_bit_mux_decoders.readOp.leakage +
> power_sa_mux_lev_1_decoders.readOp.leakage +
> power_sa_mux_lev_2_decoders.readOp.leakage;
> //cout<<"leakage2"<<power.readOp.leakage<<endl;
1568c1680
< //++++Below is gate leakage
---
> //++++Below is gate leakage
1570,1571c1682,1684
< power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
< power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
---
> power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
> power_sa.readOp.gate_leakage *= num_sa_subarray *
> num_subarrays_per_mat * (RWP + ERP);
1573,1576c1686,1689
< //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
< power_subarray_out_drv.readOp.gate_leakage =
< (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
< number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
---
> //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
> power_subarray_out_drv.readOp.gate_leakage =
> (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
> number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
1578,1582c1691,1695
< power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
< power_bl_precharge_eq_drv.readOp.gate_leakage +
< power_sa.readOp.gate_leakage +
< power_subarray_out_drv.readOp.gate_leakage;
< //cout<<"leakage"<<power.readOp.leakage<<endl;
---
> power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
> power_bl_precharge_eq_drv.readOp.gate_leakage +
> power_sa.readOp.gate_leakage +
> power_subarray_out_drv.readOp.gate_leakage;
> //cout<<"leakage"<<power.readOp.leakage<<endl;
1584,1585c1697,1698
< power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
< power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
---
> power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
> power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
1587c1700
< //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
---
> //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
1589,1593c1702,1706
< // gate_leakage power
< power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
< power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
< power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
< power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
---
> // gate_leakage power
> power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
> power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
> power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
> power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
1595,1606c1708,1717
< power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
< b_mux_predec->power.readOp.gate_leakage +
< sa_mux_lev_1_predec->power.readOp.gate_leakage +
< sa_mux_lev_2_predec->power.readOp.gate_leakage +
< power_row_decoders.readOp.gate_leakage +
< power_bit_mux_decoders.readOp.gate_leakage +
< power_sa_mux_lev_1_decoders.readOp.gate_leakage +
< power_sa_mux_lev_2_decoders.readOp.gate_leakage;
< }
< else if (is_fa)
< {
< int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
---
> power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
> b_mux_predec->power.readOp.gate_leakage +
> sa_mux_lev_1_predec->power.readOp.gate_leakage +
> sa_mux_lev_2_predec->power.readOp.gate_leakage +
> power_row_decoders.readOp.gate_leakage +
> power_bit_mux_decoders.readOp.gate_leakage +
> power_sa_mux_lev_1_decoders.readOp.gate_leakage +
> power_sa_mux_lev_2_decoders.readOp.gate_leakage;
> } else if (is_fa) {
> int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1608,1611c1719,1723
< power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
< power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
< power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
< power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
---
> power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
> power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
> power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
> power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
> (RWP + ERP + SCHP);
1613c1725
< //cout<<"leakage3"<<power.readOp.leakage<<endl;
---
> //cout<<"leakage3"<<power.readOp.leakage<<endl;
1616,1618c1728,1730
< power_subarray_out_drv.readOp.leakage =
< (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
< number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
---
> power_subarray_out_drv.readOp.leakage =
> (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
> number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1620,1624c1732,1736
< power.readOp.leakage += power_bitline.readOp.leakage +
< power_bl_precharge_eq_drv.readOp.leakage +
< power_bl_precharge_eq_drv.searchOp.leakage +
< power_sa.readOp.leakage +
< power_subarray_out_drv.readOp.leakage;
---
> power.readOp.leakage += power_bitline.readOp.leakage +
> power_bl_precharge_eq_drv.readOp.leakage +
> power_bl_precharge_eq_drv.searchOp.leakage +
> power_sa.readOp.leakage +
> power_subarray_out_drv.readOp.leakage;
1626c1738
< //cout<<"leakage4"<<power.readOp.leakage<<endl;
---
> //cout<<"leakage4"<<power.readOp.leakage<<endl;
1628,1631c1740,1743
< // leakage power
< power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
< power.readOp.leakage += r_predec->power.readOp.leakage +
< power_row_decoders.readOp.leakage;
---
> // leakage power
> power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
> power.readOp.leakage += r_predec->power.readOp.leakage +
> power_row_decoders.readOp.leakage;
1633c1745
< //cout<<"leakage5"<<power.readOp.leakage<<endl;
---
> //cout<<"leakage5"<<power.readOp.leakage<<endl;
1635,1640c1747,1756
< //inside cam
< power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
< power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
< power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
< power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
< power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
---
> //inside cam
> power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
> power_cam_all_active.searchOp.leakage +=
> sl_precharge_eq_drv->power.readOp.leakage;
> power_cam_all_active.searchOp.leakage +=
> sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
> power_cam_all_active.searchOp.leakage +=
> ml_precharge_drv->power.readOp.dynamic;
> power_cam_all_active.searchOp.leakage *=
> num_subarrays_per_mat;
1642c1758
< power.readOp.leakage += power_cam_all_active.searchOp.leakage;
---
> power.readOp.leakage += power_cam_all_active.searchOp.leakage;
1646,1650c1762,1767
< //+++Below is gate leakage
< power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
< power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
< power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
< power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
---
> //+++Below is gate leakage
> power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
> power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
> power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
> power_sa.readOp.gate_leakage *= num_sa_subarray *
> num_subarrays_per_mat * (RWP + ERP + SCHP);
1652c1769
< //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
---
> //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
1655,1657c1772,1774
< power_subarray_out_drv.readOp.gate_leakage =
< (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
< number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
---
> power_subarray_out_drv.readOp.gate_leakage =
> (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
> number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1659,1663c1776,1780
< power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
< power_bl_precharge_eq_drv.readOp.gate_leakage +
< power_bl_precharge_eq_drv.searchOp.gate_leakage +
< power_sa.readOp.gate_leakage +
< power_subarray_out_drv.readOp.gate_leakage;
---
> power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
> power_bl_precharge_eq_drv.readOp.gate_leakage +
> power_bl_precharge_eq_drv.searchOp.gate_leakage +
> power_sa.readOp.gate_leakage +
> power_subarray_out_drv.readOp.gate_leakage;
1665c1782
< //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
---
> //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
1667,1670c1784,1787
< // gate_leakage power
< power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
< power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
< power_row_decoders.readOp.gate_leakage;
---
> // gate_leakage power
> power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
> power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
> power_row_decoders.readOp.gate_leakage;
1672c1789
< //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
---
> //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
1674,1679c1791,1800
< //inside cam
< power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
< power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
< power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
< power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
< power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
---
> //inside cam
> power_cam_all_active.searchOp.gate_leakage =
> power_matchline.searchOp.gate_leakage;
> power_cam_all_active.searchOp.gate_leakage +=
> sl_precharge_eq_drv->power.readOp.gate_leakage;
> power_cam_all_active.searchOp.gate_leakage +=
> sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
> power_cam_all_active.searchOp.gate_leakage +=
> ml_precharge_drv->power.readOp.dynamic;
> power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
1681c1802
< power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
---
> power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
1683,1686c1804,1805
< }
< else
< {
< int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
---
> } else {
> int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1688,1691c1807,1811
< //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
< //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
< power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
< power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
---
> //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
> //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
> power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
> power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
> (RWP + ERP + SCHP);
1694,1696c1814,1816
< power_subarray_out_drv.readOp.leakage =
< (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
< number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
---
> power_subarray_out_drv.readOp.leakage =
> (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
> number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1698,1702c1818,1822
< power.readOp.leakage += //power_bitline.readOp.leakage +
< //power_bl_precharge_eq_drv.readOp.leakage +
< power_bl_precharge_eq_drv.searchOp.leakage +
< power_sa.readOp.leakage +
< power_subarray_out_drv.readOp.leakage;
---
> power.readOp.leakage += //power_bitline.readOp.leakage +
> //power_bl_precharge_eq_drv.readOp.leakage +
> power_bl_precharge_eq_drv.searchOp.leakage +
> power_sa.readOp.leakage +
> power_subarray_out_drv.readOp.leakage;
1704,1707c1824,1828
< // leakage power
< power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
< power.readOp.leakage += r_predec->power.readOp.leakage +
< power_row_decoders.readOp.leakage;
---
> // leakage power
> power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage *
> subarray.num_rows * num_subarrays_per_mat * (RWP + ERP + EWP);
> power.readOp.leakage += r_predec->power.readOp.leakage +
> power_row_decoders.readOp.leakage;
1709,1714c1830,1838
< //inside cam
< power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
< power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
< power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
< power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
< power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
---
> //inside cam
> power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
> power_cam_all_active.searchOp.leakage +=
> sl_precharge_eq_drv->power.readOp.leakage;
> power_cam_all_active.searchOp.leakage +=
> sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
> power_cam_all_active.searchOp.leakage +=
> ml_precharge_drv->power.readOp.dynamic;
> power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
1716c1840
< power.readOp.leakage += power_cam_all_active.searchOp.leakage;
---
> power.readOp.leakage += power_cam_all_active.searchOp.leakage;
1718,1720c1842,1845
< //+++Below is gate leakage
< power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
< power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
---
> //+++Below is gate leakage
> power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
> power_sa.readOp.gate_leakage *= num_sa_subarray *
> num_subarrays_per_mat * (RWP + ERP + SCHP);
1723,1725c1848,1850
< power_subarray_out_drv.readOp.gate_leakage =
< (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
< number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
---
> power_subarray_out_drv.readOp.gate_leakage =
> (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
> number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1727,1731c1852,1856
< power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
< //power_bl_precharge_eq_drv.readOp.gate_leakage +
< power_bl_precharge_eq_drv.searchOp.gate_leakage +
< power_sa.readOp.gate_leakage +
< power_subarray_out_drv.readOp.gate_leakage;
---
> power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
> //power_bl_precharge_eq_drv.readOp.gate_leakage +
> power_bl_precharge_eq_drv.searchOp.gate_leakage +
> power_sa.readOp.gate_leakage +
> power_subarray_out_drv.readOp.gate_leakage;
1733,1736c1858,1863
< // gate_leakage power
< power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
< power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
< power_row_decoders.readOp.gate_leakage;
---
> // gate_leakage power
> power_row_decoders.readOp.gate_leakage =
> row_dec->power.readOp.gate_leakage * subarray.num_rows *
> num_subarrays_per_mat * (RWP + ERP + EWP);
> power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
> power_row_decoders.readOp.gate_leakage;
1738,1743c1865,1875
< //inside cam
< power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
< power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
< power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
< power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
< power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
---
> //inside cam
> power_cam_all_active.searchOp.gate_leakage =
> power_matchline.searchOp.gate_leakage;
> power_cam_all_active.searchOp.gate_leakage +=
> sl_precharge_eq_drv->power.readOp.gate_leakage;
> power_cam_all_active.searchOp.gate_leakage +=
> sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
> power_cam_all_active.searchOp.gate_leakage +=
> ml_precharge_drv->power.readOp.dynamic;
> power_cam_all_active.searchOp.gate_leakage *=
> num_subarrays_per_mat;
1745,1746c1877,1878
< power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
< }
---
> power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
> }