1/*****************************************************************************
2 *                                McPAT/CACTI
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
6 *                          All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 ***************************************************************************/
32
33
34
35#include <cassert>
36
37#include "mat.h"
38
39Mat::Mat(const DynamicParameter & dyn_p)
40    : dp(dyn_p),
41      power_subarray_out_drv(),
42      delay_fa_tag(0), delay_cam(0),
43      delay_before_decoder(0), delay_bitline(0),
44      delay_wl_reset(0), delay_bl_restore(0),
45      delay_searchline(0), delay_matchchline(0),
46      delay_cam_sl_restore(0), delay_cam_ml_reset(0),
47      delay_fa_ram_wl(0), delay_hit_miss_reset(0),
48      delay_hit_miss(0),
49      subarray(dp, dp.fully_assoc),
50      power_bitline(), per_bitline_read_energy(0),
51      deg_bl_muxing(dp.deg_bl_muxing),
52      num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
53      delay_writeback(0),
54      cell(subarray.cell), cam_cell(subarray.cam_cell),
55      is_dram(dyn_p.is_dram),
56      pure_cam(dyn_p.pure_cam),
57      num_mats(dp.num_mats),
58      power_sa(), delay_sa(0),
59      leak_power_sense_amps_closed_page_state(0),
60      leak_power_sense_amps_open_page_state(0),
61      delay_subarray_out_drv(0),
62      delay_comparator(0), power_comparator(),
63      num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
64      num_subarrays_per_mat(dp.num_subarrays / dp.num_mats),
65      num_subarrays_per_row(dp.Ndwl / dp.num_mats_h_dir) {
66    assert(num_subarrays_per_mat <= 4);
67    assert(num_subarrays_per_row <= 2);
68    is_fa = (dp.fully_assoc) ? true : false;
69    camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
70
71    if (is_fa || pure_cam) {
72        num_subarrays_per_row = num_subarrays_per_mat > 2 ?
73            num_subarrays_per_mat / 2 : num_subarrays_per_mat;
74    }
75
76    if (dp.use_inp_params == 1) {
77        RWP  = dp.num_rw_ports;
78        ERP  = dp.num_rd_ports;
79        EWP  = dp.num_wr_ports;
80        SCHP = dp.num_search_ports;
81    } else {
82        RWP = g_ip->num_rw_ports;
83        ERP = g_ip->num_rd_ports;
84        EWP = g_ip->num_wr_ports;
85        SCHP = g_ip->num_search_ports;
86
87    }
88
89    double number_sa_subarray;
90
91    if (!is_fa && !pure_cam) {
92        number_sa_subarray = subarray.num_cols / deg_bl_muxing;
93    } else if (is_fa && !pure_cam) {
94        number_sa_subarray =  (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
95    }
96
97    else {
98        number_sa_subarray =  (subarray.num_cols_fa_cam) / deg_bl_muxing;
99    }
100
101    int    num_dec_signals           = subarray.num_rows;
102    double C_ld_bit_mux_dec_out      = 0;
103    double C_ld_sa_mux_lev_1_dec_out = 0;
104    double C_ld_sa_mux_lev_2_dec_out = 0;
105    double R_wire_wl_drv_out;
106
107    if (!is_fa && !pure_cam) {
108        R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
109    } else if (is_fa && !pure_cam) {
110        R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
111    } else {
112        R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um;
113    }
114
115    double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
116    double R_wire_sa_mux_dec_out  = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
117
118    if (deg_bl_muxing > 1) {
119        C_ld_bit_mux_dec_out =
120            (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing) *
121            gate_C(g_tp.w_nmos_b_mux, 0, is_dram) +  // 2 transistor per cell
122            num_subarrays_per_row * subarray.num_cols *
123            g_tp.wire_inside_mat.C_per_um * cell.get_w();
124    }
125
126    if (dp.Ndsam_lev_1 > 1) {
127        C_ld_sa_mux_lev_1_dec_out =
128            (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1) *
129            gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
130            num_subarrays_per_row * subarray.num_cols *
131            g_tp.wire_inside_mat.C_per_um * cell.get_w();
132    }
133    if (dp.Ndsam_lev_2 > 1) {
134        C_ld_sa_mux_lev_2_dec_out =
135            (num_subarrays_per_mat * number_sa_subarray /
136             (dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) *
137            gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
138            num_subarrays_per_row * subarray.num_cols *
139            g_tp.wire_inside_mat.C_per_um * cell.get_w();
140    }
141
142    if (num_subarrays_per_row >= 2) {
143        // wire heads for both right and left side of a mat, so half the resistance
144        R_wire_bit_mux_dec_out /= 2.0;
145        R_wire_sa_mux_dec_out  /= 2.0;
146    }
147
148
149    row_dec = new Decoder(
150        num_dec_signals,
151        false,
152        subarray.C_wl,
153        R_wire_wl_drv_out,
154        false/*is_fa*/,
155        is_dram,
156        true,
157        camFlag ? cam_cell : cell);
158//  if (is_fa && (!dp.is_tag))
159//  {
160//    row_dec->exist = true;
161//  }
162    bit_mux_dec = new Decoder(
163        deg_bl_muxing,// This number is 1 for FA or CAM
164        false,
165        C_ld_bit_mux_dec_out,
166        R_wire_bit_mux_dec_out,
167        false/*is_fa*/,
168        is_dram,
169        false,
170        camFlag ? cam_cell : cell);
171    sa_mux_lev_1_dec = new Decoder(
172        dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
173        dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
174        C_ld_sa_mux_lev_1_dec_out,
175        R_wire_sa_mux_dec_out,
176        false/*is_fa*/,
177        is_dram,
178        false,
179        camFlag ? cam_cell : cell);
180    sa_mux_lev_2_dec = new Decoder(
181        dp.Ndsam_lev_2, // This number is 1 for FA or CAM
182        false,
183        C_ld_sa_mux_lev_2_dec_out,
184        R_wire_sa_mux_dec_out,
185        false/*is_fa*/,
186        is_dram,
187        false,
188        camFlag ? cam_cell : cell);
189
190    double C_wire_predec_blk_out;
191    double R_wire_predec_blk_out;
192
193    if (!is_fa && !pure_cam) {
194
195        C_wire_predec_blk_out  = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
196        R_wire_predec_blk_out  = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
197
198    } else { //for pre-decode block's load is same for both FA and CAM
199        C_wire_predec_blk_out  = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
200        R_wire_predec_blk_out  = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
201    }
202
203
204    if (is_fa || pure_cam)
205        num_dec_signals += _log2(num_subarrays_per_mat);
206
207    PredecBlk * r_predec_blk1 = new PredecBlk(
208        num_dec_signals,
209        row_dec,
210        C_wire_predec_blk_out,
211        R_wire_predec_blk_out,
212        num_subarrays_per_mat,
213        is_dram,
214        true);
215    PredecBlk * r_predec_blk2 = new PredecBlk(
216        num_dec_signals,
217        row_dec,
218        C_wire_predec_blk_out,
219        R_wire_predec_blk_out,
220        num_subarrays_per_mat,
221        is_dram,
222        false);
223    PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
224    PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
225    PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
226    PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
227    PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
228    PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
229    dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
230    dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
231
232    PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
233    PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
234    PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
235    PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
236    PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
237    PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
238    PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
239    PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
240    way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
241    dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
242
243    r_predec            = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
244    b_mux_predec        = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
245    sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
246    sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
247
248    subarray_out_wire   = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
249
250    double driver_c_gate_load;
251    double driver_c_wire_load;
252    double driver_r_wire_load;
253
254    if (is_fa || pure_cam)
255
256    {   //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
257        driver_c_gate_load = (subarray.num_cols_fa_cam ) *
258            gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
259                   is_dram, false, false);
260        driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
261            g_tp.wire_outside_mat.C_per_um;
262        driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
263            g_tp.wire_outside_mat.R_per_um;
264        cam_bl_precharge_eq_drv = new Driver(
265            driver_c_gate_load,
266            driver_c_wire_load,
267            driver_r_wire_load,
268            is_dram);
269
270        if (!pure_cam) {
271            //This is only used for fully asso not pure CAM
272            driver_c_gate_load = (subarray.num_cols_fa_ram ) *
273                gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
274                       is_dram, false, false);
275            driver_c_wire_load = subarray.num_cols_fa_ram * cell.w *
276                g_tp.wire_outside_mat.C_per_um;
277            driver_r_wire_load = subarray.num_cols_fa_ram * cell.w *
278                g_tp.wire_outside_mat.R_per_um;
279            bl_precharge_eq_drv = new Driver(
280                driver_c_gate_load,
281                driver_c_wire_load,
282                driver_r_wire_load,
283                is_dram);
284        }
285    }
286
287    else {
288        driver_c_gate_load =  subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
289        driver_c_wire_load =  subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
290        driver_r_wire_load =  subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
291        bl_precharge_eq_drv = new Driver(
292            driver_c_gate_load,
293            driver_c_wire_load,
294            driver_r_wire_load,
295            is_dram);
296    }
297    double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
298    double w_row_decoder    = area_row_decoder / subarray.area.get_h();
299
300    double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
301        compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
302
303    double h_subarray_out_drv = subarray_out_wire->area.get_area() *
304                                (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
305
306
307    h_subarray_out_drv *= (RWP + ERP + SCHP);
308
309    double h_comparators                = 0.0;
310    double w_row_predecode_output_wires = 0.0;
311    double h_bit_mux_dec_out_wires      = 0.0;
312    double h_senseamp_mux_dec_out_wires = 0.0;
313
314    if ((!is_fa) && (dp.is_tag)) {
315        //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
316        h_comparators  = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
317        h_comparators *= (RWP + ERP);
318    }
319
320
321    int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits);
322    int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits);
323    w_row_predecode_output_wires   = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) *
324                                     g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
325
326
327    double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
328                             (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
329                              h_subarray_out_drv + h_comparators);
330
331    double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
332
333    if (deg_bl_muxing > 1) {
334        h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
335    }
336    if (dp.Ndsam_lev_1 > 1) {
337        h_senseamp_mux_dec_out_wires =  dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
338    }
339    if (dp.Ndsam_lev_2 > 1) {
340        h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
341    }
342
343    double h_addr_datain_wires;
344    if (!g_ip->ver_htree_wires_over_array) {
345        h_addr_datain_wires = (dp.number_addr_bits_mat +
346                               dp.number_way_select_signals_mat +
347                               (dp.num_di_b_mat + dp.num_do_b_mat) /
348                               num_subarrays_per_row) *
349            g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
350
351        if (is_fa || pure_cam) {
352            h_addr_datain_wires =
353                (dp.number_addr_bits_mat +
354                 dp.number_way_select_signals_mat +  //TODO: revisit
355                 (dp.num_di_b_mat + dp.num_do_b_mat ) / num_subarrays_per_row) *
356                g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
357                (dp.num_si_b_mat + dp.num_so_b_mat ) / num_subarrays_per_row *
358                g_tp.wire_inside_mat.pitch * SCHP;
359        }
360        //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
361        //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
362        h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
363                           h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
364                          h_addr_datain_wires +
365                          h_bit_mux_dec_out_wires +
366                          h_senseamp_mux_dec_out_wires;
367
368    }
369
370    // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
371    double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
372                                        b_mux_predec_blk_drv1->area.get_area() +
373                                        sa_mux_lev_1_predec_blk_drv1->area.get_area() +
374                                        sa_mux_lev_2_predec_blk_drv1->area.get_area() +
375                                        way_sel_drv1->area.get_area() +
376                                        r_predec_blk_drv2->area.get_area() +
377                                        b_mux_predec_blk_drv2->area.get_area() +
378                                        sa_mux_lev_1_predec_blk_drv2->area.get_area() +
379                                        sa_mux_lev_2_predec_blk_drv2->area.get_area() +
380                                        r_predec_blk1->area.get_area() +
381                                        b_mux_predec_blk1->area.get_area() +
382                                        sa_mux_lev_1_predec_blk1->area.get_area() +
383                                        sa_mux_lev_2_predec_blk1->area.get_area() +
384                                        r_predec_blk2->area.get_area() +
385                                        b_mux_predec_blk2->area.get_area() +
386                                        sa_mux_lev_1_predec_blk2->area.get_area() +
387                                        sa_mux_lev_2_predec_blk2->area.get_area() +
388                                        bit_mux_dec->area.get_area() +
389                                        sa_mux_lev_1_dec->area.get_area() +
390                                        sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
391
392    double area_efficiency_mat;
393
394//  if (!is_fa)
395//  {
396    assert(num_subarrays_per_mat / num_subarrays_per_row > 0);
397    area.h = (num_subarrays_per_mat / num_subarrays_per_row) *
398        subarray.area.h + h_non_cell_area;
399    area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
400    area.w = (area.h * area.w + area_mat_center_circuitry) / area.h;
401    area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat *
402        100.0 / area.get_area();
403
404//    cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl;
405//    cout<<"h_comparators"<<h_comparators<<endl;
406//    cout<<"h_subarray_out_drv"<<h_subarray_out_drv<<endl;
407//    cout<<"h_addr_datain_wires"<<h_addr_datain_wires<<endl;
408//    cout<<"h_bit_mux_dec_out_wires"<<h_bit_mux_dec_out_wires<<endl;
409//    cout<<"h_senseamp_mux_dec_out_wires"<<h_senseamp_mux_dec_out_wires<<endl;
410//    cout<<"h_non_cell_area"<<h_non_cell_area<<endl;
411//    cout<<"area.h =" << (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h<<endl;
412//    cout<<"w_non_cell_area"<<w_non_cell_area<<endl;
413//    cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl;
414
415    assert(area.h > 0);
416    assert(area.w > 0);
417//  }
418//  else
419//  {
420//    area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.get_h() + h_non_cell_area;
421//    area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
422//    area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
423//    area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area();
424//  }
425}
426
427
428
429Mat::~Mat() {
430    delete row_dec;
431    delete bit_mux_dec;
432    delete sa_mux_lev_1_dec;
433    delete sa_mux_lev_2_dec;
434
435    delete r_predec->blk1;
436    delete r_predec->blk2;
437    delete b_mux_predec->blk1;
438    delete b_mux_predec->blk2;
439    delete sa_mux_lev_1_predec->blk1;
440    delete sa_mux_lev_1_predec->blk2;
441    delete sa_mux_lev_2_predec->blk1;
442    delete sa_mux_lev_2_predec->blk2;
443    delete dummy_way_sel_predec_blk1;
444    delete dummy_way_sel_predec_blk2;
445
446    delete r_predec->drv1;
447    delete r_predec->drv2;
448    delete b_mux_predec->drv1;
449    delete b_mux_predec->drv2;
450    delete sa_mux_lev_1_predec->drv1;
451    delete sa_mux_lev_1_predec->drv2;
452    delete sa_mux_lev_2_predec->drv1;
453    delete sa_mux_lev_2_predec->drv2;
454    delete way_sel_drv1;
455    delete dummy_way_sel_predec_blk_drv2;
456
457    delete r_predec;
458    delete b_mux_predec;
459    delete sa_mux_lev_1_predec;
460    delete sa_mux_lev_2_predec;
461
462    delete subarray_out_wire;
463    if (!pure_cam)
464        delete bl_precharge_eq_drv;
465
466    if (is_fa || pure_cam) {
467        delete sl_precharge_eq_drv ;
468        delete sl_data_drv ;
469        delete cam_bl_precharge_eq_drv;
470        delete ml_precharge_drv;
471        delete ml_to_ram_wl_drv;
472    }
473}
474
475
476
477double Mat::compute_delays(double inrisetime) {
478    int k;
479    double rd, C_intrinsic, C_ld, tf, R_bl_precharge, r_b_metal, R_bl, C_bl;
480    double outrisetime_search, outrisetime, row_dec_outrisetime;
481    // delay calculation for tags of fully associative cache
482    if (is_fa || pure_cam) {
483        //Compute search access time
484        outrisetime_search = compute_cam_delay(inrisetime);
485        if (is_fa) {
486            bl_precharge_eq_drv->compute_delay(0);
487            k = ml_to_ram_wl_drv->number_gates - 1;
488            rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
489            C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4 *
490                                   cell.h, is_dram, false, true) +
491                drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4 * cell.h,
492                         is_dram, false, true);
493            C_ld = ml_to_ram_wl_drv->c_gate_load +
494                ml_to_ram_wl_drv->c_wire_load;
495            tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
496            delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
497
498            R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
499            r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
500            R_bl = subarray.num_rows * r_b_metal;
501            C_bl = subarray.C_bl;
502            delay_bl_restore = bl_precharge_eq_drv->delay +
503                log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
504                    (g_tp.sram.Vbitpre - dp.V_b_sense)) *
505                (R_bl_precharge * C_bl + R_bl * C_bl / 2);
506
507
508            outrisetime_search = compute_bitline_delay(outrisetime_search);
509            outrisetime_search = compute_sa_delay(outrisetime_search);
510        }
511        outrisetime_search = compute_subarray_out_drv(outrisetime_search);
512        subarray_out_wire->set_in_rise_time(outrisetime_search);
513        outrisetime_search = subarray_out_wire->signal_rise_time();
514        delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
515
516
517        //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
518        outrisetime = r_predec->compute_delays(inrisetime);
519        row_dec_outrisetime = row_dec->compute_delays(outrisetime);
520
521        outrisetime = b_mux_predec->compute_delays(inrisetime);
522        bit_mux_dec->compute_delays(outrisetime);
523
524        outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
525        sa_mux_lev_1_dec->compute_delays(outrisetime);
526
527        outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
528        sa_mux_lev_2_dec->compute_delays(outrisetime);
529
530        if (pure_cam) {
531            outrisetime = compute_bitline_delay(row_dec_outrisetime);
532            outrisetime = compute_sa_delay(outrisetime);
533        }
534        return outrisetime_search;
535    } else {
536        bl_precharge_eq_drv->compute_delay(0);
537        if (row_dec->exist == true) {
538            int k = row_dec->num_gates - 1;
539            double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
540            // TODO: this 4*cell.h number must be revisited
541            double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4 *
542                                          cell.h, is_dram, false, true) +
543                drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, is_dram,
544                         false, true);
545            double C_ld = row_dec->C_ld_dec_out;
546            double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
547            delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
548        }
549        double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
550        double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
551        double R_bl = subarray.num_rows * r_b_metal;
552        double C_bl = subarray.C_bl;
553
554        if (is_dram) {
555            delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
556        } else {
557            delay_bl_restore = bl_precharge_eq_drv->delay +
558                log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
559                    (g_tp.sram.Vbitpre - dp.V_b_sense)) *
560                (R_bl_precharge * C_bl + R_bl * C_bl / 2);
561        }
562    }
563
564
565
566    outrisetime = r_predec->compute_delays(inrisetime);
567    row_dec_outrisetime = row_dec->compute_delays(outrisetime);
568
569    outrisetime = b_mux_predec->compute_delays(inrisetime);
570    bit_mux_dec->compute_delays(outrisetime);
571
572    outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
573    sa_mux_lev_1_dec->compute_delays(outrisetime);
574
575    outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
576    sa_mux_lev_2_dec->compute_delays(outrisetime);
577
578    outrisetime = compute_bitline_delay(row_dec_outrisetime);
579    outrisetime = compute_sa_delay(outrisetime);
580    outrisetime = compute_subarray_out_drv(outrisetime);
581    subarray_out_wire->set_in_rise_time(outrisetime);
582    outrisetime = subarray_out_wire->signal_rise_time();
583
584    delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
585
586    if (dp.is_tag == true && dp.fully_assoc == false) {
587        compute_comparator_delay(0);
588    }
589
590    if (row_dec->exist == false) {
591        delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
592    }
593    return outrisetime;
594}
595
596
597
598double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() {
599
600    double height =
601        compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge,
602                                       camFlag ? cam_cell.w :
603                                       cell.w / (2 * (RWP + ERP + SCHP))) +
604        // precharge circuitry
605        compute_tr_width_after_folding(g_tp.w_pmos_bl_eq,
606                                       camFlag ? cam_cell.w :
607                                       cell.w / (RWP + ERP + SCHP));
608
609    if (deg_bl_muxing > 1) {
610        // col mux tr height
611        height +=
612            compute_tr_width_after_folding(g_tp.w_nmos_b_mux,
613                                           cell.w / (2 * (RWP + ERP)));
614        // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);  // bit mux dec out wires height
615    }
616
617    height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP));  // sense_amp_height
618
619    if (dp.Ndsam_lev_1 > 1) {
620        height += compute_tr_width_after_folding(
621                      g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP));  // sense_amp_mux_height
622        //height_senseamp_mux_decode_output_wires =  Ndsam * wire_inside_mat_pitch * (RWP + ERP);
623    }
624
625    if (dp.Ndsam_lev_2 > 1) {
626        height += compute_tr_width_after_folding(
627                      g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP));  // sense_amp_mux_height
628        //height_senseamp_mux_decode_output_wires =  Ndsam * wire_inside_mat_pitch * (RWP + ERP);
629
630        // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
631        height += 2 * compute_tr_width_after_folding(
632                      pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
633        height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
634    }
635
636    // TODO: this should be uncommented...
637    /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
638      {
639    //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
640    double width_write_driver_write_mux  = width_write_driver_or_write_mux();
641    double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
642    cell.w *
643    // deg_bl_muxing *
644    dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
645    height += height_write_driver_write_mux;
646    }*/
647
648    return height;
649}
650
651
652
653double Mat::compute_cam_delay(double inrisetime) {
654
655    double out_time_ramp, this_delay;
656    double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
657
658
659    double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
660    Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp,
661    Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp,
662    Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p;
663
664    double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal,  dynSearchEng;
665    int Htagbits;
666
667    double driver_c_gate_load;
668    double driver_c_wire_load;
669    double driver_r_wire_load;
670    //double searchline_precharge_time;
671
672    double leak_power_cc_inverters_sram_cell         = 0;
673    double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
674    double leak_power_RD_port_sram_cell              = 0;
675    double leak_power_SCHP_port_sram_cell            = 0;
676    double leak_comparator_cam_cell                  =0;
677
678    double gate_leak_comparator_cam_cell          = 0;
679    double gate_leak_power_cc_inverters_sram_cell = 0;
680    double gate_leak_power_RD_port_sram_cell      = 0;
681    double gate_leak_power_SCHP_port_sram_cell    = 0;
682
683    c_matchline_metal   = cam_cell.get_w() * g_tp.wire_local.C_per_um;
684    c_searchline_metal  = cam_cell.get_h() * g_tp.wire_local.C_per_um;
685    r_matchline_metal   = cam_cell.get_w() * g_tp.wire_local.R_per_um;
686    r_searchline_metal  = cam_cell.get_h() * g_tp.wire_local.R_per_um;
687
688    dynSearchEng = 0.0;
689    delay_matchchline = 0.0;
690    double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
691    bool linear_scaling = false;
692
693    if (linear_scaling) {
694        Wdecdrivep    =  450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
695        Wdecdriven    =  300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
696        Wfadriven     = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
697        Wfadrivep     =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
698        Wfadrive2n    =  250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
699        Wfadrive2p    =  500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
700        Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
701        Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
702        Wfadecdrive2n =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
703        Wfadecdrive2p =   50 * g_ip->F_sz_um;//this was  40 micron for the 0.8 micron process
704        Wfadecdriven  = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
705        Wfadecdrivep  =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
706        Wfaprechn     =  7.5 * g_ip->F_sz_um;//this was   6 micron for the 0.8 micron process
707        Wfainvn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
708        Wfainvp       =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
709        Wfanandn      =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
710        Wfanandp      = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
711        Wdecnandn     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
712        Wdecnandp     = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
713
714        Wfaprechp     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
715        Wdummyn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
716        Wdummyinvn    =   75 * g_ip->F_sz_um;//this was  60 micron for the 0.8 micron process
717        Wdummyinvp    =  100 * g_ip->F_sz_um;//this was  80 micron for the 0.8 micron process
718        Waddrnandn    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
719        Waddrnandp    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
720        Wfanorn       = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
721        Wfanorp       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
722        W_hit_miss_n    = Wdummyn;
723        W_hit_miss_p    = g_tp.min_w_nmos_*p_to_n_sizing_r;
724        //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
725    } else {
726        Wdecdrivep    =  450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
727        Wdecdriven    =  300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
728        Wfadriven     = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
729        Wfadrivep     =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
730        Wfadrive2n    =  250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
731        Wfadrive2p    =  500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
732        Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
733        Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
734        Wfadecdrive2n =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
735        Wfadecdrive2p =   50 * g_ip->F_sz_um;//this was  40 micron for the 0.8 micron process
736        Wfadecdriven  = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
737        Wfadecdrivep  =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
738        Wfaprechn     =  7.5 * g_ip->F_sz_um;//this was   6 micron for the 0.8 micron process
739        Wfainvn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
740        Wfainvp       =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
741        Wfanandn      =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
742        Wfanandp      = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
743        Wdecnandn     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
744        Wdecnandp     = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
745
746        Wfaprechp     = g_tp.w_pmos_bl_precharge;//this was  10 micron for the 0.8 micron process
747        Wdummyn       = g_tp.cam.cell_nmos_w;
748        Wdummyinvn    =   75 * g_ip->F_sz_um;//this was  60 micron for the 0.8 micron process
749        Wdummyinvp    =  100 * g_ip->F_sz_um;//this was  80 micron for the 0.8 micron process
750        Waddrnandn    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
751        Waddrnandp    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
752        Wfanorn       = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
753        Wfanorp       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
754        W_hit_miss_n    = Wdummyn;
755        W_hit_miss_p    = g_tp.min_w_nmos_*p_to_n_sizing_r;
756    }
757
758    Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
759
760    /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
761       search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
762       From the driver(am and an) to the comparators in all the rows including the dummy row,
763       Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
764
765    //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
766    //Searchline precharge routes horizontally
767    driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
768    driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
769    driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
770
771    sl_precharge_eq_drv = new Driver(
772        driver_c_gate_load,
773        driver_c_wire_load,
774        driver_r_wire_load,
775        is_dram);
776
777    //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
778    //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
779    driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
780    driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
781    driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
782    sl_data_drv = new Driver(
783        driver_c_gate_load,
784        driver_c_wire_load,
785        driver_r_wire_load,
786        is_dram);
787
788    sl_precharge_eq_drv->compute_delay(0);
789    double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
790    double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
791    double R_bl = (subarray.num_rows + 1) * r_b_metal;
792    double C_bl = subarray.C_bl_cam;
793    delay_cam_sl_restore = sl_precharge_eq_drv->delay
794        + log(g_tp.cam.Vbitpre) * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
795
796    out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
797
798    //matchline ops delay
799    delay_matchchline += sl_data_drv->delay;
800
801    /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
802    //matchline delay, matchline power, matchline_reset for cycle time computation,
803
804    ////matchline precharge circuitry routes vertically
805    //There are two matchline precharge driver chains per subarray.
806    driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
807    driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
808    driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
809
810    ml_precharge_drv = new Driver(
811        driver_c_gate_load,
812        driver_c_wire_load,
813        driver_r_wire_load,
814        is_dram);
815
816    ml_precharge_drv->compute_delay(0);
817
818
819    rd =  tr_R_on(Wdummyn, NCH, 2, is_dram);
820    c_intrinsic = Htagbits *
821        (2 * drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def,
822                      is_dram)//TODO: the cell_h_def should be revisit
823         + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram) /
824         Htagbits);//since each halve only has one precharge tx per matchline
825
826    Cwire = c_matchline_metal * Htagbits;
827    Rwire = r_matchline_metal * Htagbits;
828    c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
829
830    double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
831    //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
832    double R_ml = Rwire;
833    double C_ml = Cwire + c_intrinsic;
834    //TODO: latest CAM has sense amps on matchlines too
835    delay_cam_ml_reset = ml_precharge_drv->delay
836        + log(g_tp.cam.Vbitpre) * (R_ml_precharge * C_ml + R_ml * C_ml / 2);
837
838    //matchline ops delay
839    tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
840    this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
841    delay_matchchline += this_delay;
842    out_time_ramp = this_delay / VTHFA3;
843
844    dynSearchEng += ((c_intrinsic + Cwire + c_gate_load) *
845                     (subarray.num_rows + 1)) //TODO: need to be precise
846        * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *
847        2;//each subarry has two halves
848
849    /* third stage, from the NAND2 gates to the drivers in the dummy row */
850    rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
851    c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
852                  drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram) * 2;
853    c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
854    tf = rd * (c_intrinsic + c_gate_load);
855    this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
856    out_time_ramp = this_delay / (1 - VTHFA4);
857    delay_matchchline += this_delay;
858
859    //only the dummy row has the extra inverter between NAND and NOR gates
860    dynSearchEng += (c_intrinsic * (subarray.num_rows + 1) + c_gate_load * 2) *
861        g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//  * Ntbl;
862
863    /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
864    rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
865    c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
866    Cwire = c_matchline_metal * Htagbits +  c_searchline_metal *
867        (subarray.num_rows + 1) / 2;
868    Rwire = r_matchline_metal * Htagbits +  r_searchline_metal *
869        (subarray.num_rows + 1) / 2;
870    c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
871    tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
872    this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
873    out_time_ramp = this_delay / VTHFA5;
874    delay_matchchline += this_delay;
875
876    dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows * c_gate_load) *
877        g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
878
879    /*final statge from the NOR gate to drive the wordline of the data portion */
880
881    //searchline data driver There are two matchline precharge driver chains per subarray.
882    driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
883    driver_c_wire_load = subarray.C_wl_ram;
884    driver_r_wire_load = subarray.R_wl_ram;
885
886    ml_to_ram_wl_drv = new Driver(
887        driver_c_gate_load,
888        driver_c_wire_load,
889        driver_r_wire_load,
890        is_dram);
891
892
893
894    rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
895    c_intrinsic = 2 * drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
896        drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
897    c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
898    tf = rd * (c_intrinsic + c_gate_load);
899    this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
900    out_time_ramp = this_delay / (1 - 0.5);
901    delay_matchchline += this_delay;
902
903    out_time_ramp   = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
904
905    //c_gate_load energy is computed in ml_to_ram_wl_drv
906    dynSearchEng  += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
907
908
909    /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
910    /*Precharge the hitting logic */
911    c_intrinsic = 2 *
912        drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
913    Cwire = c_searchline_metal * subarray.num_rows;
914    Rwire = r_searchline_metal * subarray.num_rows;
915    c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
916        subarray.num_rows;
917
918    rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
919    //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
920    double R_hit_miss = Rwire;
921    double C_hit_miss = Cwire + c_intrinsic;
922    delay_hit_miss_reset = log(g_tp.cam.Vbitpre) *
923        (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
924    dynSearchEng  += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
925
926    /*hitting logic evaluation */
927    c_intrinsic = 2 *
928        drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
929    Cwire = c_searchline_metal * subarray.num_rows;
930    Rwire = r_searchline_metal * subarray.num_rows;
931    c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
932        subarray.num_rows;
933
934    rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
935    tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
936
937    delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
938
939    if (is_fa)
940        delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
941
942    dynSearchEng  += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
943
944    /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
945
946    power_matchline.searchOp.dynamic = dynSearchEng;
947
948    //leakage in one subarray
949    double Iport     = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0,  1, nmos, false, true);//TODO: how much is the idle time? just by *2?
950    double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0,  2, nmos, false, true);
951    double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
952                                     1, inv, false, true) * 2;
953    //approx XOR with Inv
954    double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv,
955                                                false, true) * 2;
956
957    leak_power_cc_inverters_sram_cell         = Icell * g_tp.cam_cell.Vdd;
958    leak_comparator_cam_cell                  = Icell_comparator * g_tp.cam_cell.Vdd;
959    leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
960    leak_power_RD_port_sram_cell              = Iport_erp * g_tp.cam_cell.Vdd;
961    leak_power_SCHP_port_sram_cell            = 0;//search port and r/w port are sperate, therefore no access txs in search ports
962
963    power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
964                                        leak_comparator_cam_cell +
965                                        leak_power_acc_tr_RW_or_WR_port_sram_cell +
966                                        leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
967                                        leak_power_RD_port_sram_cell * ERP +
968                                        leak_power_SCHP_port_sram_cell * SCHP;
969//  power_matchline.searchOp.leakage += leak_comparator_cam_cell;
970    power_matchline.searchOp.leakage *= (subarray.num_rows + 1) *
971        subarray.num_cols_fa_cam;//TODO:dumy line precise
972    power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
973        cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
974    power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
975        cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
976    power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
977        cmos_Isub_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
978    //In idle states, the hit/miss txs are closed (on) therefore no Isub
979    power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
980    // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
981
982    //in idle state, Ig_on only possibly exist in access transistors of read only ports
983    double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
984    double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
985                                     1, inv, false, true) * 2;
986    double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv,
987                                                false, true) * 2;
988
989    gate_leak_comparator_cam_cell = Ig_cell_comparator * g_tp.cam_cell.Vdd;
990    gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.cam_cell.Vdd;
991    gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
992    gate_leak_power_SCHP_port_sram_cell = 0;
993
994    //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
995
996    power_matchline.searchOp.gate_leakage +=
997        gate_leak_power_cc_inverters_sram_cell;
998    power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
999    power_matchline.searchOp.gate_leakage +=
1000        gate_leak_power_SCHP_port_sram_cell * SCHP +
1001        gate_leak_power_RD_port_sram_cell * ERP;
1002    power_matchline.searchOp.gate_leakage *= (subarray.num_rows + 1) *
1003        subarray.num_cols_fa_cam;//TODO:dumy line precise
1004    power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
1005        cmos_Ig_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
1006    power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
1007        cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
1008    power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
1009        cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
1010    power_matchline.searchOp.gate_leakage += subarray.num_rows *
1011        cmos_Ig_leakage(W_hit_miss_n, 0, 1, nmos) * g_tp.cam_cell.Vdd +
1012        + cmos_Ig_leakage(0, W_hit_miss_p, 1, pmos) * g_tp.cam_cell.Vdd;
1013
1014
1015    return out_time_ramp;
1016}
1017
1018
1019double Mat::width_write_driver_or_write_mux() {
1020    // calculate resistance of SRAM cell pull-up PMOS transistor
1021    // cam and sram have same cell trasistor properties
1022    double R_sram_cell_pull_up_tr  = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
1023    double R_access_tr             = tr_R_on(g_tp.sram.cell_a_w,    NCH, 1, is_dram, true);
1024    double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
1025    double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
1026
1027    return width_write_driver_nmos;
1028}
1029
1030
1031
1032double Mat::compute_comparators_height(
1033    int tagbits,
1034    int number_ways_in_mat,
1035    double subarray_mem_cell_area_width) {
1036    double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
1037    double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
1038    return cumulative_area / subarray_mem_cell_area_width;
1039}
1040
1041
1042
1043double Mat::compute_bitline_delay(double inrisetime) {
1044    double V_b_pre, v_th_mem_cell, V_wl;
1045    double tstep;
1046    double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
1047    double R_cell_pull_down = 0.0, R_cell_acc = 0.0, r_dev = 0.0;
1048    int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
1049
1050    double R_b_metal = camFlag ? cam_cell.h : cell.h * g_tp.wire_local.R_per_um;
1051    double R_bl      = subarray.num_rows * R_b_metal;
1052    double C_bl      = subarray.C_bl;
1053
1054    // TODO: no leakage for DRAMs?
1055    double leak_power_cc_inverters_sram_cell = 0;
1056    double gate_leak_power_cc_inverters_sram_cell = 0;
1057    double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
1058    double leak_power_RD_port_sram_cell = 0;
1059    double gate_leak_power_RD_port_sram_cell = 0;
1060
1061    if (is_dram == true) {
1062        V_b_pre = g_tp.dram.Vbitpre;
1063        v_th_mem_cell = g_tp.dram_acc.Vth;
1064        V_wl = g_tp.vpp;
1065        //The access transistor is not folded. So we just need to specify a
1066        // threshold value for the folding width that is equal to or greater
1067        // than Wmemcella.
1068        R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
1069        r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
1070    } else { //SRAM
1071        V_b_pre = g_tp.sram.Vbitpre;
1072        v_th_mem_cell = g_tp.sram_cell.Vth;
1073        V_wl = g_tp.sram_cell.Vdd;
1074        R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
1075        R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
1076
1077        //Leakage current of an SRAM cell
1078        //TODO: how much is the idle time? just by *2?
1079        double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0,  1, nmos,
1080                                         false, true);
1081        double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0,  2, nmos,
1082                                             false, true);
1083        double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w,
1084                                         g_tp.sram.cell_pmos_w, 1, inv, false,
1085                                         true) * 2;//two invs per cell
1086
1087        leak_power_cc_inverters_sram_cell         = Icell * g_tp.sram_cell.Vdd;
1088        leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
1089        leak_power_RD_port_sram_cell              = Iport_erp * g_tp.sram_cell.Vdd;
1090
1091
1092        //in idle state, Ig_on only possibly exist in access transistors of read only ports
1093        double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,
1094                                             false, true);
1095        double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w,
1096                                         g_tp.sram.cell_pmos_w, 1, inv, false,
1097                                         true);
1098
1099        gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.sram_cell.Vdd;
1100        gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
1101    }
1102
1103
1104    double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0,
1105                                      camFlag ? cam_cell.w : cell.w /
1106                                      (2 * (RWP + ERP + SCHP)), is_dram);
1107    double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
1108    double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0,
1109                                            camFlag ? cam_cell.w :
1110                                            cell.w * deg_bl_muxing /
1111                                            (RWP + ERP + SCHP), is_dram);
1112    double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
1113    double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0,
1114                                      is_dram) +
1115        drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag ? cam_cell.w :
1116                 cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1117        drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ? cam_cell.w :
1118                 cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
1119    double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
1120                                            camFlag ? cam_cell.w :
1121                                            cell.w * deg_bl_muxing /
1122                                            (RWP + ERP + SCHP), is_dram);
1123
1124    if (is_dram) {
1125        double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd / 2) *
1126                                          g_tp.dram_cell_C /
1127                                          (g_tp.dram_cell_C + C_bl));
1128        tstep = 2.3 * fraction * r_dev *
1129            (g_tp.dram_cell_C * (C_bl + 2 * C_drain_sense_amp_iso +
1130                                 C_sense_amp_latch + C_drain_sense_amp_mux)) /
1131            (g_tp.dram_cell_C + (C_bl + 2 * C_drain_sense_amp_iso +
1132                                 C_sense_amp_latch + C_drain_sense_amp_mux));
1133        delay_writeback = tstep;
1134        dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch +
1135                        C_drain_sense_amp_mux) *
1136            (g_tp.dram_cell_Vdd / 2) *
1137            g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
1138        dynWriteEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch) *
1139            (g_tp.dram_cell_Vdd / 2) *
1140            g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ *
1141            num_act_mats_hor_dir * 100;
1142        per_bitline_read_energy = (C_bl + 2 * C_drain_sense_amp_iso +
1143                                   C_sense_amp_latch + C_drain_sense_amp_mux) *
1144            (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
1145    } else {
1146        double tau;
1147
1148        if (deg_bl_muxing > 1) {
1149            tau = (R_cell_pull_down + R_cell_acc) *
1150                (C_bl + 2 * C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
1151                 C_sense_amp_latch + C_drain_sense_amp_mux) +
1152                R_bl * (C_bl / 2 + 2 * C_drain_bit_mux + 2 *
1153                        C_drain_sense_amp_iso + C_sense_amp_latch +
1154                        C_drain_sense_amp_mux) +
1155                R_bit_mux * (C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
1156                             C_sense_amp_latch + C_drain_sense_amp_mux) +
1157                R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch +
1158                                   C_drain_sense_amp_mux);
1159            dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense *
1160                g_tp.sram_cell.Vdd;
1161            dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch +
1162                            C_drain_sense_amp_mux) *
1163                2 * dp.V_b_sense * g_tp.sram_cell.Vdd *
1164                (1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
1165                 deg_bl_muxing);
1166            dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ /
1167                                deg_bl_muxing) / deg_senseamp_muxing) *
1168                num_act_mats_hor_dir * (C_bl + 2 * C_drain_bit_mux) *
1169                g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
1170            //Write Ops are differential for SRAM
1171        } else {
1172            tau = (R_cell_pull_down + R_cell_acc) *
1173                  (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
1174                  R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
1175            dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
1176                           2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
1177            dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
1178                                 deg_bl_muxing) / deg_senseamp_muxing) *
1179                               num_act_mats_hor_dir * C_bl) *
1180                g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
1181
1182        }
1183        tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
1184        power_bitline.readOp.leakage =
1185            leak_power_cc_inverters_sram_cell +
1186            leak_power_acc_tr_RW_or_WR_port_sram_cell +
1187            leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
1188            leak_power_RD_port_sram_cell * ERP;
1189        power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
1190                                            gate_leak_power_RD_port_sram_cell * ERP;
1191
1192    }
1193
1194//  cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl;
1195//  cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
1196//  cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
1197//  cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl;
1198
1199
1200    /* take input rise time into account */
1201    double m = V_wl / inrisetime;
1202    if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m)) {
1203        delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell) / m);
1204    } else {
1205        delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
1206    }
1207
1208    bool is_fa = (dp.fully_assoc) ? true : false;
1209
1210    if (dp.is_tag == false || is_fa == false) {
1211        power_bitline.readOp.dynamic  = dynRdEnergy;
1212        power_bitline.writeOp.dynamic = dynWriteEnergy;
1213    }
1214
1215    double outrisetime = 0;
1216    return outrisetime;
1217}
1218
1219
1220
1221double Mat::compute_sa_delay(double inrisetime) {
1222    //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
1223
1224    //Bitline circuitry leakage.
1225    double Iiso     = simplified_pmos_leakage(g_tp.w_iso, is_dram);
1226    double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
1227    double IsenseN  = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
1228    double IsenseP  = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
1229
1230    double lkgIdlePh  = IsenseEn;//+ 2*IoBufP;
1231    //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
1232    double lkgReadPh  = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
1233    //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
1234    //    lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
1235    double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
1236    leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
1237    leak_power_sense_amps_open_page_state   = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
1238
1239    // sense amplifier has to drive logic in "data out driver" and sense precharge load.
1240    // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
1241    //constant as well as the magnitude of input differential voltage.
1242    double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
1243        drain_C_(g_tp.w_sense_n, NCH, 1, 0,
1244                 camFlag ? cam_cell.w : cell.w * deg_bl_muxing /
1245                 (RWP + ERP + SCHP), is_dram) +
1246        drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ?
1247                 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
1248                 is_dram) +
1249        drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag ?
1250                 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
1251                 is_dram) +
1252        drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
1253                 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
1254                 is_dram);
1255    double tau = C_ld / g_tp.gm_sense_amp_latch;
1256    delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
1257    power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
1258                            num_subarrays_per_mat * num_act_mats_hor_dir*/;
1259    power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
1260
1261    double outrisetime = 0;
1262    return outrisetime;
1263}
1264
1265
1266
1267double Mat::compute_subarray_out_drv(double inrisetime) {
1268    double C_ld, rd, tf, this_delay;
1269    double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
1270
1271    // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
1272    rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
1273    C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
1274                                     camFlag ? cam_cell.w : cell.w *
1275                                     deg_bl_muxing / (RWP + ERP + SCHP),
1276                                     is_dram) +
1277        gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
1278    tf = rd * C_ld;
1279    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1280    delay_subarray_out_drv += this_delay;
1281    inrisetime = this_delay / (1.0 - 0.5);
1282    power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1283    power_subarray_out_drv.readOp.leakage += 0;  // for now, let leakage of the pass transistor be 0
1284    power_subarray_out_drv.readOp.gate_leakage +=
1285        cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
1286    // delay of signal through inverter-buffer to second level of sense-amp mux.
1287    // internal delay of buffer
1288    rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
1289    C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
1290           drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1291           gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
1292    tf = rd * C_ld;
1293    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1294    delay_subarray_out_drv += this_delay;
1295    inrisetime = this_delay / (1.0 - 0.5);
1296    power_subarray_out_drv.readOp.dynamic      += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1297    power_subarray_out_drv.readOp.leakage +=
1298        cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
1299                          inv, is_dram) * g_tp.peri_global.Vdd;
1300    power_subarray_out_drv.readOp.gate_leakage +=
1301        cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
1302                        inv) * g_tp.peri_global.Vdd;
1303
1304    // inverter driving drain of pass transistor of second level of sense-amp mux.
1305    rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
1306    C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
1307        drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def,
1308                 is_dram) +
1309        drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
1310                 cam_cell.w : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 /
1311                 (RWP + ERP + SCHP), is_dram);
1312    tf = rd * C_ld;
1313    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1314    delay_subarray_out_drv += this_delay;
1315    inrisetime = this_delay / (1.0 - 0.5);
1316    power_subarray_out_drv.readOp.dynamic      += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1317    power_subarray_out_drv.readOp.leakage +=
1318        cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
1319                          inv) * g_tp.peri_global.Vdd;
1320    power_subarray_out_drv.readOp.gate_leakage +=
1321        cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
1322                        inv) * g_tp.peri_global.Vdd;
1323
1324
1325    // delay of signal through pass-transistor to input of subarray output driver.
1326    rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
1327    C_ld = dp.Ndsam_lev_2 *
1328        drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? cam_cell.w :
1329                 cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP),
1330                 is_dram) +
1331           //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
1332        gate_C(subarray_out_wire->repeater_size *
1333               (subarray_out_wire->wire_length /
1334                subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ *
1335               (1 + p_to_n_sz_r), 0.0, is_dram);
1336    tf = rd * C_ld;
1337    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1338    delay_subarray_out_drv += this_delay;
1339    inrisetime = this_delay / (1.0 - 0.5);
1340    power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1341    power_subarray_out_drv.readOp.leakage += 0;  // for now, let leakage of the pass transistor be 0
1342    power_subarray_out_drv.readOp.gate_leakage +=
1343        cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
1344
1345
1346    return inrisetime;
1347}
1348
1349
1350
1351double Mat::compute_comparator_delay(double inrisetime) {
1352    int A = g_ip->tag_assoc;
1353
1354    int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
1355    // a multiple of 4.
1356
1357    /* First Inverter */
1358    double Ceq = gate_C(g_tp.w_comp_inv_n2 + g_tp.w_comp_inv_p2, 0, is_dram) +
1359                 drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1360                 drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1361    double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
1362    double tf  = Req * Ceq;
1363    double st1del = horowitz(inrisetime, tf, VTHCOMPINV, VTHCOMPINV, FALL);
1364    double nextinputtime = st1del / VTHCOMPINV;
1365    power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1366
1367    //For each degree of associativity
1368    //there are 4 such quarter comparators
1369    double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1,
1370                                          g_tp.w_comp_inv_p1, 1, inv,
1371                                          is_dram) * 4 * A;
1372    double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1,
1373                                            g_tp.w_comp_inv_p1, 1, inv,
1374                                            is_dram) * 4 * A;
1375    /* Second Inverter */
1376    Ceq = gate_C(g_tp.w_comp_inv_n3 + g_tp.w_comp_inv_p3, 0, is_dram) +
1377          drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1378          drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1379    Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
1380    tf = Req * Ceq;
1381    double st2del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHCOMPINV, RISE);
1382    nextinputtime = st2del / (1.0 - VTHCOMPINV);
1383    power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1384    lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
1385                                    inv, is_dram) * 4 * A;
1386    gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
1387                                      inv, is_dram) * 4 * A;
1388
1389    /* Third Inverter */
1390    Ceq = gate_C(g_tp.w_eval_inv_n + g_tp.w_eval_inv_p, 0, is_dram) +
1391          drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1392          drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1393    Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
1394    tf = Req * Ceq;
1395    double st3del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHEVALINV, FALL);
1396    nextinputtime = st3del / (VTHEVALINV);
1397    power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1398    lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1,
1399                                    inv, is_dram) * 4 * A;
1400    gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3,
1401                                      1, inv, is_dram) * 4 * A;
1402
1403    /* Final Inverter (virtual ground driver) discharging compare part */
1404    double r1 = tr_R_on(g_tp.w_comp_n, NCH, 2, is_dram);
1405    double r2 = tr_R_on(g_tp.w_eval_inv_n, NCH, 1, is_dram); /* was switch */
1406    double c2 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
1407                                       g_tp.cell_h_def, is_dram) +
1408                              drain_C_(g_tp.w_comp_n, NCH, 2, 1,
1409                                       g_tp.cell_h_def, is_dram)) +
1410        drain_C_(g_tp.w_eval_inv_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1411        drain_C_(g_tp.w_eval_inv_n, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1412    double c1 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
1413                                       g_tp.cell_h_def, is_dram) +
1414                              drain_C_(g_tp.w_comp_n, NCH, 2, 1,
1415                                       g_tp.cell_h_def, is_dram)) +
1416        drain_C_(g_tp.w_comp_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1417        gate_C(WmuxdrvNANDn + WmuxdrvNANDp, 0, is_dram);
1418    power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1419    power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *  (A - 1);
1420    lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
1421                                    inv, is_dram) * 4 * A;
1422    lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
1423                                    is_dram) * 4 * A; // stack factor of 0.2
1424
1425    gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
1426                                      inv, is_dram) * 4 * A;
1427    //for gate leakage this equals to a inverter
1428    gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
1429                                      is_dram) * 4 * A;
1430
1431    /* time to go to threshold of mux driver */
1432    double tstep = (r2 * c2 + (r1 + r2) * c1) * log(1.0 / VTHMUXNAND);
1433    /* take into account non-zero input rise time */
1434    double m = g_tp.peri_global.Vdd / nextinputtime;
1435    double Tcomparatorni;
1436
1437    if ((tstep) <= (0.5*(g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / m)) {
1438        double a = m;
1439        double b = 2 * ((g_tp.peri_global.Vdd * VTHEVALINV) -
1440                        g_tp.peri_global.Vth);
1441        double c = -2 * (tstep) * (g_tp.peri_global.Vdd -
1442                                   g_tp.peri_global.Vth) + 1 / m *
1443            ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) *
1444            ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth);
1445        Tcomparatorni = (-b + sqrt(b * b - 4 * a * c)) / (2 * a);
1446    } else {
1447        Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd +
1448                                   g_tp.peri_global.Vth) / (2 * m) -
1449            (g_tp.peri_global.Vdd * VTHEVALINV) / m;
1450    }
1451    delay_comparator = Tcomparatorni + st1del + st2del + st3del;
1452    power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
1453    power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
1454
1455    return Tcomparatorni / (1.0 - VTHMUXNAND);;
1456}
1457
1458
1459
1460void Mat::compute_power_energy() {
1461    //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
1462    //when search all subarrays and all mats are fully active
1463    //when plain read/write only one subarray in a single mat is active.
1464
1465    // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat.
1466    power.readOp.dynamic += r_predec->power.readOp.dynamic +
1467                            b_mux_predec->power.readOp.dynamic +
1468                            sa_mux_lev_1_predec->power.readOp.dynamic +
1469                            sa_mux_lev_2_predec->power.readOp.dynamic;
1470
1471    // add energy consumed in decoders
1472    power_row_decoders.readOp.dynamic        = row_dec->power.readOp.dynamic;
1473    if (!(is_fa || pure_cam))
1474        power_row_decoders.readOp.dynamic        *= num_subarrays_per_mat;
1475
1476    // add energy consumed in bitline prechagers, SAs, and bitlines
1477    if (!(is_fa || pure_cam)) {
1478        // add energy consumed in bitline prechagers
1479        power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
1480        power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
1481
1482        //Add sense amps energy
1483        num_sa_subarray = subarray.num_cols / deg_bl_muxing;
1484        power_sa.readOp.dynamic *= num_sa_subarray * num_subarrays_per_mat ;
1485
1486        // add energy consumed in bitlines
1487        //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
1488        power_bitline.readOp.dynamic *= num_subarrays_per_mat *
1489            subarray.num_cols;
1490        power_bitline.writeOp.dynamic *= num_subarrays_per_mat *
1491            subarray.num_cols;
1492        //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
1493        //Add subarray output energy
1494        power_subarray_out_drv.readOp.dynamic =
1495            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1496
1497        power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1498                                power_sa.readOp.dynamic +
1499                                power_bitline.readOp.dynamic +
1500                                power_subarray_out_drv.readOp.dynamic;
1501
1502        power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1503                                bit_mux_dec->power.readOp.dynamic +
1504                                sa_mux_lev_1_dec->power.readOp.dynamic +
1505                                sa_mux_lev_2_dec->power.readOp.dynamic +
1506                                power_comparator.readOp.dynamic;
1507    }
1508
1509    else if (is_fa) {
1510        //for plain read/write only one subarray in a mat is active
1511        // add energy consumed in bitline prechagers
1512        power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
1513                + cam_bl_precharge_eq_drv->power.readOp.dynamic;
1514        power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
1515
1516        //Add sense amps energy
1517        num_sa_subarray = (subarray.num_cols_fa_cam +
1518                           subarray.num_cols_fa_ram) / deg_bl_muxing;
1519        num_sa_subarray_search = subarray.num_cols_fa_ram / deg_bl_muxing;
1520        power_sa.searchOp.dynamic = power_sa.readOp.dynamic *
1521            num_sa_subarray_search;
1522        power_sa.readOp.dynamic *= num_sa_subarray;
1523
1524
1525        // add energy consumed in bitlines
1526        power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
1527        power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam +
1528                                         subarray.num_cols_fa_ram);
1529        power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam +
1530                                          subarray.num_cols_fa_ram);
1531        power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
1532
1533        //Add subarray output energy
1534        power_subarray_out_drv.searchOp.dynamic =
1535            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
1536        power_subarray_out_drv.readOp.dynamic =
1537            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1538
1539
1540        power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1541                                power_sa.readOp.dynamic +
1542                                power_bitline.readOp.dynamic +
1543                                power_subarray_out_drv.readOp.dynamic;
1544
1545        power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1546                                bit_mux_dec->power.readOp.dynamic +
1547                                sa_mux_lev_1_dec->power.readOp.dynamic +
1548                                sa_mux_lev_2_dec->power.readOp.dynamic +
1549                                power_comparator.readOp.dynamic;
1550
1551        //add energy consumed inside cam
1552        power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
1553        power_searchline_precharge = sl_precharge_eq_drv->power;
1554        power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
1555        power_searchline = sl_data_drv->power;
1556        power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
1557            subarray.num_cols_fa_cam * num_subarrays_per_mat;;
1558        power_matchline_precharge  = ml_precharge_drv->power;
1559        power_matchline_precharge.searchOp.dynamic =
1560            power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
1561        power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
1562        power_ml_to_ram_wl_drv.searchOp.dynamic =
1563            ml_to_ram_wl_drv->power.readOp.dynamic;
1564
1565        power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
1566        power_cam_all_active.searchOp.dynamic +=
1567            power_searchline_precharge.searchOp.dynamic;
1568        power_cam_all_active.searchOp.dynamic +=
1569            power_searchline.searchOp.dynamic;
1570        power_cam_all_active.searchOp.dynamic +=
1571            power_matchline_precharge.searchOp.dynamic;
1572
1573        power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
1574        //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
1575
1576    } else {
1577        // add energy consumed in bitline prechagers
1578        power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
1579        //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
1580        //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
1581        //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
1582
1583        //Add sense amps energy
1584        num_sa_subarray = subarray.num_cols_fa_cam / deg_bl_muxing;
1585        power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
1586        power_sa.searchOp.dynamic = 0;
1587
1588        power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
1589        power_bitline.searchOp.dynamic = 0;
1590        power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
1591
1592        power_subarray_out_drv.searchOp.dynamic =
1593            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
1594        power_subarray_out_drv.readOp.dynamic =
1595            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1596
1597        power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1598                                power_sa.readOp.dynamic +
1599                                power_bitline.readOp.dynamic +
1600                                power_subarray_out_drv.readOp.dynamic;
1601
1602        power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1603                                bit_mux_dec->power.readOp.dynamic +
1604                                sa_mux_lev_1_dec->power.readOp.dynamic +
1605                                sa_mux_lev_2_dec->power.readOp.dynamic +
1606                                power_comparator.readOp.dynamic;
1607
1608
1609        ////add energy consumed inside cam
1610        power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
1611        power_searchline_precharge = sl_precharge_eq_drv->power;
1612        power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
1613        power_searchline = sl_data_drv->power;
1614        power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
1615            subarray.num_cols_fa_cam * num_subarrays_per_mat;;
1616        power_matchline_precharge  = ml_precharge_drv->power;
1617        power_matchline_precharge.searchOp.dynamic =
1618            power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
1619        power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
1620        power_ml_to_ram_wl_drv.searchOp.dynamic =
1621            ml_to_ram_wl_drv->power.readOp.dynamic;
1622
1623        power_cam_all_active.searchOp.dynamic =
1624            power_matchline.searchOp.dynamic;
1625        power_cam_all_active.searchOp.dynamic +=
1626            power_searchline_precharge.searchOp.dynamic;
1627        power_cam_all_active.searchOp.dynamic +=
1628            power_searchline.searchOp.dynamic;
1629        power_cam_all_active.searchOp.dynamic +=
1630            power_matchline_precharge.searchOp.dynamic;
1631
1632        power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
1633        //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
1634
1635    }
1636
1637
1638
1639    // calculate leakage power
1640    if (!(is_fa || pure_cam)) {
1641        int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1642
1643        power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1644        power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1645        power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
1646            (RWP + ERP);
1647
1648        //num_sa_subarray             = subarray.num_cols / deg_bl_muxing;
1649        power_subarray_out_drv.readOp.leakage =
1650            (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1651            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
1652
1653        power.readOp.leakage += power_bitline.readOp.leakage +
1654                                power_bl_precharge_eq_drv.readOp.leakage +
1655                                power_sa.readOp.leakage +
1656                                power_subarray_out_drv.readOp.leakage;
1657        //cout<<"leakage"<<power.readOp.leakage<<endl;
1658
1659        power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
1660        power.readOp.leakage += power_comparator.readOp.leakage;
1661
1662        //cout<<"leakage1"<<power.readOp.leakage<<endl;
1663
1664        // leakage power
1665        power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
1666        power_bit_mux_decoders.readOp.leakage      = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
1667        power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
1668        power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
1669
1670        power.readOp.leakage += r_predec->power.readOp.leakage +
1671                                b_mux_predec->power.readOp.leakage +
1672                                sa_mux_lev_1_predec->power.readOp.leakage +
1673                                sa_mux_lev_2_predec->power.readOp.leakage +
1674                                power_row_decoders.readOp.leakage +
1675                                power_bit_mux_decoders.readOp.leakage +
1676                                power_sa_mux_lev_1_decoders.readOp.leakage +
1677                                power_sa_mux_lev_2_decoders.readOp.leakage;
1678        //cout<<"leakage2"<<power.readOp.leakage<<endl;
1679
1680        //++++Below is gate leakage
1681        power_bitline.readOp.gate_leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1682        power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1683        power_sa.readOp.gate_leakage *= num_sa_subarray *
1684            num_subarrays_per_mat * (RWP + ERP);
1685
1686        //num_sa_subarray             = subarray.num_cols / deg_bl_muxing;
1687        power_subarray_out_drv.readOp.gate_leakage =
1688            (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1689            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
1690
1691        power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
1692                                     power_bl_precharge_eq_drv.readOp.gate_leakage +
1693                                     power_sa.readOp.gate_leakage +
1694                                     power_subarray_out_drv.readOp.gate_leakage;
1695        //cout<<"leakage"<<power.readOp.leakage<<endl;
1696
1697        power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
1698        power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
1699
1700        //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
1701
1702        // gate_leakage power
1703        power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
1704        power_bit_mux_decoders.readOp.gate_leakage      = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
1705        power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
1706        power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
1707
1708        power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1709                                     b_mux_predec->power.readOp.gate_leakage +
1710                                     sa_mux_lev_1_predec->power.readOp.gate_leakage +
1711                                     sa_mux_lev_2_predec->power.readOp.gate_leakage +
1712                                     power_row_decoders.readOp.gate_leakage +
1713                                     power_bit_mux_decoders.readOp.gate_leakage +
1714                                     power_sa_mux_lev_1_decoders.readOp.gate_leakage +
1715                                     power_sa_mux_lev_2_decoders.readOp.gate_leakage;
1716    } else if (is_fa) {
1717        int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1718
1719        power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1720        power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1721        power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1722        power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
1723            (RWP + ERP + SCHP);
1724
1725        //cout<<"leakage3"<<power.readOp.leakage<<endl;
1726
1727
1728        power_subarray_out_drv.readOp.leakage =
1729            (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1730            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1731
1732        power.readOp.leakage += power_bitline.readOp.leakage +
1733                                power_bl_precharge_eq_drv.readOp.leakage +
1734                                power_bl_precharge_eq_drv.searchOp.leakage +
1735                                power_sa.readOp.leakage +
1736                                power_subarray_out_drv.readOp.leakage;
1737
1738        //cout<<"leakage4"<<power.readOp.leakage<<endl;
1739
1740        // leakage power
1741        power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
1742        power.readOp.leakage += r_predec->power.readOp.leakage +
1743                                power_row_decoders.readOp.leakage;
1744
1745        //cout<<"leakage5"<<power.readOp.leakage<<endl;
1746
1747        //inside cam
1748        power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
1749        power_cam_all_active.searchOp.leakage +=
1750            sl_precharge_eq_drv->power.readOp.leakage;
1751        power_cam_all_active.searchOp.leakage +=
1752            sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
1753        power_cam_all_active.searchOp.leakage +=
1754            ml_precharge_drv->power.readOp.dynamic;
1755        power_cam_all_active.searchOp.leakage *=
1756            num_subarrays_per_mat;
1757
1758        power.readOp.leakage += power_cam_all_active.searchOp.leakage;
1759
1760//	  cout<<"leakage6"<<power.readOp.leakage<<endl;
1761
1762        //+++Below is gate leakage
1763        power_bitline.readOp.gate_leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1764        power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1765        power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1766        power_sa.readOp.gate_leakage *= num_sa_subarray *
1767            num_subarrays_per_mat * (RWP + ERP + SCHP);
1768
1769        //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
1770
1771
1772        power_subarray_out_drv.readOp.gate_leakage =
1773            (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1774            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1775
1776        power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
1777                                     power_bl_precharge_eq_drv.readOp.gate_leakage +
1778                                     power_bl_precharge_eq_drv.searchOp.gate_leakage +
1779                                     power_sa.readOp.gate_leakage +
1780                                     power_subarray_out_drv.readOp.gate_leakage;
1781
1782        //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
1783
1784        // gate_leakage power
1785        power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
1786        power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1787                                     power_row_decoders.readOp.gate_leakage;
1788
1789        //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
1790
1791        //inside cam
1792        power_cam_all_active.searchOp.gate_leakage =
1793            power_matchline.searchOp.gate_leakage;
1794        power_cam_all_active.searchOp.gate_leakage +=
1795            sl_precharge_eq_drv->power.readOp.gate_leakage;
1796        power_cam_all_active.searchOp.gate_leakage +=
1797            sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
1798        power_cam_all_active.searchOp.gate_leakage +=
1799            ml_precharge_drv->power.readOp.dynamic;
1800        power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
1801
1802        power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
1803
1804    } else {
1805        int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1806
1807        //power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1808        //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1809        power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1810        power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
1811            (RWP + ERP + SCHP);
1812
1813
1814        power_subarray_out_drv.readOp.leakage =
1815            (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1816            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1817
1818        power.readOp.leakage += //power_bitline.readOp.leakage +
1819            //power_bl_precharge_eq_drv.readOp.leakage +
1820            power_bl_precharge_eq_drv.searchOp.leakage +
1821            power_sa.readOp.leakage +
1822            power_subarray_out_drv.readOp.leakage;
1823
1824        // leakage power
1825        power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage *
1826            subarray.num_rows * num_subarrays_per_mat * (RWP + ERP + EWP);
1827        power.readOp.leakage += r_predec->power.readOp.leakage +
1828                                power_row_decoders.readOp.leakage;
1829
1830        //inside cam
1831        power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
1832        power_cam_all_active.searchOp.leakage +=
1833            sl_precharge_eq_drv->power.readOp.leakage;
1834        power_cam_all_active.searchOp.leakage +=
1835            sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
1836        power_cam_all_active.searchOp.leakage +=
1837            ml_precharge_drv->power.readOp.dynamic;
1838        power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
1839
1840        power.readOp.leakage += power_cam_all_active.searchOp.leakage;
1841
1842        //+++Below is gate leakage
1843        power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1844        power_sa.readOp.gate_leakage *= num_sa_subarray *
1845            num_subarrays_per_mat * (RWP + ERP + SCHP);
1846
1847
1848        power_subarray_out_drv.readOp.gate_leakage =
1849            (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1850            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1851
1852        power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
1853            //power_bl_precharge_eq_drv.readOp.gate_leakage +
1854            power_bl_precharge_eq_drv.searchOp.gate_leakage +
1855            power_sa.readOp.gate_leakage +
1856            power_subarray_out_drv.readOp.gate_leakage;
1857
1858        // gate_leakage power
1859        power_row_decoders.readOp.gate_leakage =
1860            row_dec->power.readOp.gate_leakage * subarray.num_rows *
1861            num_subarrays_per_mat * (RWP + ERP + EWP);
1862        power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1863                                     power_row_decoders.readOp.gate_leakage;
1864
1865        //inside cam
1866        power_cam_all_active.searchOp.gate_leakage =
1867            power_matchline.searchOp.gate_leakage;
1868        power_cam_all_active.searchOp.gate_leakage +=
1869            sl_precharge_eq_drv->power.readOp.gate_leakage;
1870        power_cam_all_active.searchOp.gate_leakage +=
1871            sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
1872        power_cam_all_active.searchOp.gate_leakage +=
1873            ml_precharge_drv->power.readOp.dynamic;
1874        power_cam_all_active.searchOp.gate_leakage *=
1875            num_subarrays_per_mat;
1876
1877        power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
1878    }
1879}
1880
1881