mat.cc revision 10152:52c552138ba1
1/*****************************************************************************
2 *                                McPAT/CACTI
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *                          All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution;
14 * neither the name of the copyright holders nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 *
30 ***************************************************************************/
31
32
33
34#include <cassert>
35
36#include "mat.h"
37
38Mat::Mat(const DynamicParameter & dyn_p)
39 :dp(dyn_p),
40  power_subarray_out_drv(),
41  delay_fa_tag(0), delay_cam(0),
42  delay_before_decoder(0), delay_bitline(0),
43  delay_wl_reset(0), delay_bl_restore(0),
44  delay_searchline(0), delay_matchchline(0),
45  delay_cam_sl_restore(0), delay_cam_ml_reset(0),
46  delay_fa_ram_wl(0),delay_hit_miss_reset(0),
47  delay_hit_miss(0),
48  subarray(dp, dp.fully_assoc),
49  power_bitline(), per_bitline_read_energy(0),
50  deg_bl_muxing(dp.deg_bl_muxing),
51  num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
52  delay_writeback(0),
53  cell(subarray.cell), cam_cell(subarray.cam_cell),
54  is_dram(dyn_p.is_dram),
55  pure_cam(dyn_p.pure_cam),
56  num_mats(dp.num_mats),
57  power_sa(), delay_sa(0),
58  leak_power_sense_amps_closed_page_state(0),
59  leak_power_sense_amps_open_page_state(0),
60  delay_subarray_out_drv(0),
61  delay_comparator(0), power_comparator(),
62  num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
63  num_subarrays_per_mat(dp.num_subarrays/dp.num_mats),
64  num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir)
65{
66  assert(num_subarrays_per_mat <= 4);
67  assert(num_subarrays_per_row <= 2);
68  is_fa = (dp.fully_assoc) ? true : false;
69  camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
70
71  if (is_fa || pure_cam)
72          num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat;
73
74  if (dp.use_inp_params == 1) {
75          RWP  = dp.num_rw_ports;
76          ERP  = dp.num_rd_ports;
77          EWP  = dp.num_wr_ports;
78          SCHP = dp.num_search_ports;
79  }
80  else {
81    RWP = g_ip->num_rw_ports;
82    ERP = g_ip->num_rd_ports;
83    EWP = g_ip->num_wr_ports;
84    SCHP = g_ip->num_search_ports;
85
86  }
87
88  double number_sa_subarray;
89
90  if (!is_fa && !pure_cam)
91  {
92          number_sa_subarray = subarray.num_cols / deg_bl_muxing;
93  }
94  else if (is_fa && !pure_cam)
95  {
96          number_sa_subarray =  (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
97  }
98
99  else
100  {
101          number_sa_subarray =  (subarray.num_cols_fa_cam) / deg_bl_muxing;
102  }
103
104  int    num_dec_signals           = subarray.num_rows;
105  double C_ld_bit_mux_dec_out      = 0;
106  double C_ld_sa_mux_lev_1_dec_out = 0;
107  double C_ld_sa_mux_lev_2_dec_out = 0;
108  double R_wire_wl_drv_out;
109
110  if (!is_fa && !pure_cam)
111    {
112            R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
113    }
114    else if (is_fa && !pure_cam)
115    {
116        R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
117    }
118    else
119    {
120        R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um;
121    }
122
123  double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
124  double R_wire_sa_mux_dec_out  = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
125
126  if (deg_bl_muxing > 1)
127  {
128    C_ld_bit_mux_dec_out =
129      (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) +  // 2 transistor per cell
130      num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
131  }
132
133  if (dp.Ndsam_lev_1 > 1)
134  {
135    C_ld_sa_mux_lev_1_dec_out =
136      (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
137      num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
138  }
139  if (dp.Ndsam_lev_2 > 1)
140  {
141    C_ld_sa_mux_lev_2_dec_out =
142      (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
143      num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
144  }
145
146  if (num_subarrays_per_row >= 2)
147  {
148    // wire heads for both right and left side of a mat, so half the resistance
149    R_wire_bit_mux_dec_out /= 2.0;
150    R_wire_sa_mux_dec_out  /= 2.0;
151  }
152
153
154  row_dec = new Decoder(
155      num_dec_signals,
156      false,
157      subarray.C_wl,
158      R_wire_wl_drv_out,
159      false/*is_fa*/,
160      is_dram,
161      true,
162      camFlag? cam_cell:cell);
163//  if (is_fa && (!dp.is_tag))
164//  {
165//    row_dec->exist = true;
166//  }
167  bit_mux_dec = new Decoder(
168      deg_bl_muxing,// This number is 1 for FA or CAM
169      false,
170      C_ld_bit_mux_dec_out,
171      R_wire_bit_mux_dec_out,
172      false/*is_fa*/,
173      is_dram,
174      false,
175      camFlag? cam_cell:cell);
176  sa_mux_lev_1_dec = new Decoder(
177      dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
178      dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
179      C_ld_sa_mux_lev_1_dec_out,
180      R_wire_sa_mux_dec_out,
181      false/*is_fa*/,
182      is_dram,
183      false,
184      camFlag? cam_cell:cell);
185  sa_mux_lev_2_dec = new Decoder(
186      dp.Ndsam_lev_2, // This number is 1 for FA or CAM
187      false,
188      C_ld_sa_mux_lev_2_dec_out,
189      R_wire_sa_mux_dec_out,
190      false/*is_fa*/,
191      is_dram,
192      false,
193      camFlag? cam_cell:cell);
194
195  double C_wire_predec_blk_out;
196  double R_wire_predec_blk_out;
197
198  if (!is_fa && !pure_cam)
199      {
200
201          C_wire_predec_blk_out  = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
202          R_wire_predec_blk_out  = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
203
204      }
205      else //for pre-decode block's load is same for both FA and CAM
206      {
207          C_wire_predec_blk_out  = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
208          R_wire_predec_blk_out  = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
209      }
210
211
212  if (is_fa||pure_cam)
213          num_dec_signals += _log2(num_subarrays_per_mat);
214
215  PredecBlk * r_predec_blk1 = new PredecBlk(
216      num_dec_signals,
217      row_dec,
218      C_wire_predec_blk_out,
219      R_wire_predec_blk_out,
220      num_subarrays_per_mat,
221      is_dram,
222      true);
223  PredecBlk * r_predec_blk2 = new PredecBlk(
224      num_dec_signals,
225      row_dec,
226      C_wire_predec_blk_out,
227      R_wire_predec_blk_out,
228      num_subarrays_per_mat,
229      is_dram,
230      false);
231  PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
232  PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
233  PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
234  PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
235  PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
236  PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
237  dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
238  dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
239
240  PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
241  PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
242  PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
243  PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
244  PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
245  PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
246  PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
247  PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
248  way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
249  dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
250
251  r_predec            = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
252  b_mux_predec        = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
253  sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
254  sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
255
256  subarray_out_wire   = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
257
258  double driver_c_gate_load;
259  double driver_c_wire_load;
260  double driver_r_wire_load;
261
262  if (is_fa || pure_cam)
263
264  {   //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
265          driver_c_gate_load =  (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
266          driver_c_wire_load =  subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
267          driver_r_wire_load =  subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
268          cam_bl_precharge_eq_drv = new Driver(
269                          driver_c_gate_load,
270                          driver_c_wire_load,
271                          driver_r_wire_load,
272                          is_dram);
273
274          if (!pure_cam)
275          {
276                  //This is only used for fully asso not pure CAM
277                  driver_c_gate_load =  (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
278                  driver_c_wire_load =  subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um;
279                  driver_r_wire_load =  subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um;
280                  bl_precharge_eq_drv = new Driver(
281                                  driver_c_gate_load,
282                                  driver_c_wire_load,
283                                  driver_r_wire_load,
284                                  is_dram);
285          }
286  }
287
288  else
289  {
290          driver_c_gate_load =  subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
291          driver_c_wire_load =  subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
292          driver_r_wire_load =  subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
293          bl_precharge_eq_drv = new Driver(
294                          driver_c_gate_load,
295                          driver_c_wire_load,
296                          driver_r_wire_load,
297                          is_dram);
298  }
299  double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
300  double w_row_decoder    = area_row_decoder / subarray.area.get_h();
301
302  double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
303    compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
304
305  double h_subarray_out_drv = subarray_out_wire->area.get_area() *
306    (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
307
308
309  h_subarray_out_drv *= (RWP + ERP + SCHP);
310
311  double h_comparators                = 0.0;
312  double w_row_predecode_output_wires = 0.0;
313  double h_bit_mux_dec_out_wires      = 0.0;
314  double h_senseamp_mux_dec_out_wires = 0.0;
315
316  if ((!is_fa)&&(dp.is_tag))
317  {
318    //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
319    h_comparators  = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
320    h_comparators *= (RWP + ERP);
321  }
322
323
324    int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits);
325    int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits);
326    w_row_predecode_output_wires   = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) *
327      g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
328
329
330  double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
331                           (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
332                            h_subarray_out_drv + h_comparators);
333
334  double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
335
336  if (deg_bl_muxing > 1)
337  {
338    h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
339  }
340  if (dp.Ndsam_lev_1 > 1)
341  {
342    h_senseamp_mux_dec_out_wires =  dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
343  }
344  if (dp.Ndsam_lev_2 > 1)
345  {
346    h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
347  }
348
349  double h_addr_datain_wires;
350  if (!g_ip->ver_htree_wires_over_array)
351  {
352    h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat +
353                                  (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) *
354                                 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
355
356    if (is_fa || pure_cam)
357    {
358        h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat +     //TODO: revisit
359                                      (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) *
360                                       g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
361                                       (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP;
362    }
363    //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
364    //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
365    h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
366                       h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
367                      h_addr_datain_wires +
368                      h_bit_mux_dec_out_wires +
369                      h_senseamp_mux_dec_out_wires;
370
371  }
372
373  // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
374  double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
375                                      b_mux_predec_blk_drv1->area.get_area() +
376                                      sa_mux_lev_1_predec_blk_drv1->area.get_area() +
377                                      sa_mux_lev_2_predec_blk_drv1->area.get_area() +
378                                      way_sel_drv1->area.get_area() +
379                                      r_predec_blk_drv2->area.get_area() +
380                                      b_mux_predec_blk_drv2->area.get_area() +
381                                      sa_mux_lev_1_predec_blk_drv2->area.get_area() +
382                                      sa_mux_lev_2_predec_blk_drv2->area.get_area() +
383                                      r_predec_blk1->area.get_area() +
384                                      b_mux_predec_blk1->area.get_area() +
385                                      sa_mux_lev_1_predec_blk1->area.get_area() +
386                                      sa_mux_lev_2_predec_blk1->area.get_area() +
387                                      r_predec_blk2->area.get_area() +
388                                      b_mux_predec_blk2->area.get_area() +
389                                      sa_mux_lev_1_predec_blk2->area.get_area() +
390                                      sa_mux_lev_2_predec_blk2->area.get_area() +
391                                      bit_mux_dec->area.get_area() +
392                                      sa_mux_lev_1_dec->area.get_area() +
393                                      sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
394
395  double area_efficiency_mat;
396
397//  if (!is_fa)
398//  {
399    assert(num_subarrays_per_mat/num_subarrays_per_row>0);
400    area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area;
401    area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
402    area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
403    area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area();
404
405//    cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl;
406//    cout<<"h_comparators"<<h_comparators<<endl;
407//    cout<<"h_subarray_out_drv"<<h_subarray_out_drv<<endl;
408//    cout<<"h_addr_datain_wires"<<h_addr_datain_wires<<endl;
409//    cout<<"h_bit_mux_dec_out_wires"<<h_bit_mux_dec_out_wires<<endl;
410//    cout<<"h_senseamp_mux_dec_out_wires"<<h_senseamp_mux_dec_out_wires<<endl;
411//    cout<<"h_non_cell_area"<<h_non_cell_area<<endl;
412//    cout<<"area.h =" << (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h<<endl;
413//    cout<<"w_non_cell_area"<<w_non_cell_area<<endl;
414//    cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl;
415
416    assert(area.h>0);
417    assert(area.w>0);
418//  }
419//  else
420//  {
421//    area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.get_h() + h_non_cell_area;
422//    area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
423//    area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
424//    area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area();
425//  }
426  }
427
428
429
430Mat::~Mat()
431{
432  delete row_dec;
433  delete bit_mux_dec;
434  delete sa_mux_lev_1_dec;
435  delete sa_mux_lev_2_dec;
436
437  delete r_predec->blk1;
438  delete r_predec->blk2;
439  delete b_mux_predec->blk1;
440  delete b_mux_predec->blk2;
441  delete sa_mux_lev_1_predec->blk1;
442  delete sa_mux_lev_1_predec->blk2;
443  delete sa_mux_lev_2_predec->blk1;
444  delete sa_mux_lev_2_predec->blk2;
445  delete dummy_way_sel_predec_blk1;
446  delete dummy_way_sel_predec_blk2;
447
448  delete r_predec->drv1;
449  delete r_predec->drv2;
450  delete b_mux_predec->drv1;
451  delete b_mux_predec->drv2;
452  delete sa_mux_lev_1_predec->drv1;
453  delete sa_mux_lev_1_predec->drv2;
454  delete sa_mux_lev_2_predec->drv1;
455  delete sa_mux_lev_2_predec->drv2;
456  delete way_sel_drv1;
457  delete dummy_way_sel_predec_blk_drv2;
458
459  delete r_predec;
460  delete b_mux_predec;
461  delete sa_mux_lev_1_predec;
462  delete sa_mux_lev_2_predec;
463
464  delete subarray_out_wire;
465  if (!pure_cam)
466    delete bl_precharge_eq_drv;
467
468  if (is_fa || pure_cam)
469  {
470    delete sl_precharge_eq_drv ;
471    delete sl_data_drv ;
472    delete cam_bl_precharge_eq_drv;
473    delete ml_precharge_drv;
474    delete ml_to_ram_wl_drv;
475  }
476}
477
478
479
480double Mat::compute_delays(double inrisetime)
481{
482        int k;
483        double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl;
484        double outrisetime_search, outrisetime, row_dec_outrisetime;
485        // delay calculation for tags of fully associative cache
486        if (is_fa || pure_cam)
487        {
488                //Compute search access time
489                outrisetime_search = compute_cam_delay(inrisetime);
490                if (is_fa)
491                {
492                        bl_precharge_eq_drv->compute_delay(0);
493                        k = ml_to_ram_wl_drv->number_gates - 1;
494                        rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
495                        C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
496                        drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
497                        C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load;
498                        tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
499                        delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
500
501                        R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
502                        r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
503                        R_bl = subarray.num_rows * r_b_metal;
504                        C_bl = subarray.C_bl;
505                        delay_bl_restore = bl_precharge_eq_drv->delay +
506                                 log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
507                                 (R_bl_precharge * C_bl + R_bl * C_bl / 2);
508
509
510                        outrisetime_search = compute_bitline_delay(outrisetime_search);
511                        outrisetime_search = compute_sa_delay(outrisetime_search);
512                }
513                        outrisetime_search = compute_subarray_out_drv(outrisetime_search);
514                        subarray_out_wire->set_in_rise_time(outrisetime_search);
515                        outrisetime_search = subarray_out_wire->signal_rise_time();
516                        delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
517
518
519                        //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
520                        outrisetime = r_predec->compute_delays(inrisetime);
521                        row_dec_outrisetime = row_dec->compute_delays(outrisetime);
522
523                        outrisetime = b_mux_predec->compute_delays(inrisetime);
524                        bit_mux_dec->compute_delays(outrisetime);
525
526                        outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
527                        sa_mux_lev_1_dec->compute_delays(outrisetime);
528
529                        outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
530                        sa_mux_lev_2_dec->compute_delays(outrisetime);
531
532                        if (pure_cam)
533                        {
534                          outrisetime = compute_bitline_delay(row_dec_outrisetime);
535                          outrisetime = compute_sa_delay(outrisetime);
536                        }
537                        return outrisetime_search;
538    }
539        else
540        {
541                bl_precharge_eq_drv->compute_delay(0);
542                if (row_dec->exist == true)
543                {
544                        int k = row_dec->num_gates - 1;
545                        double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
546                        // TODO: this 4*cell.h number must be revisited
547                        double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
548                        drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
549                        double C_ld = row_dec->C_ld_dec_out;
550                        double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
551                        delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
552                }
553                double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
554                double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
555                double R_bl = subarray.num_rows * r_b_metal;
556                double C_bl = subarray.C_bl;
557
558                if (is_dram)
559                {
560                        delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
561                }
562                else
563                {
564                        delay_bl_restore = bl_precharge_eq_drv->delay +
565                        log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
566                        (R_bl_precharge * C_bl + R_bl * C_bl / 2);
567                }
568  }
569
570
571
572  outrisetime = r_predec->compute_delays(inrisetime);
573  row_dec_outrisetime = row_dec->compute_delays(outrisetime);
574
575  outrisetime = b_mux_predec->compute_delays(inrisetime);
576  bit_mux_dec->compute_delays(outrisetime);
577
578  outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
579  sa_mux_lev_1_dec->compute_delays(outrisetime);
580
581  outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
582  sa_mux_lev_2_dec->compute_delays(outrisetime);
583
584  outrisetime = compute_bitline_delay(row_dec_outrisetime);
585  outrisetime = compute_sa_delay(outrisetime);
586  outrisetime = compute_subarray_out_drv(outrisetime);
587  subarray_out_wire->set_in_rise_time(outrisetime);
588  outrisetime = subarray_out_wire->signal_rise_time();
589
590  delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
591
592  if (dp.is_tag == true && dp.fully_assoc == false)
593  {
594    compute_comparator_delay(0);
595  }
596
597  if (row_dec->exist == false)
598    {
599      delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
600    }
601  return outrisetime;
602}
603
604
605
606double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h()
607{
608
609  double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) +
610    compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP));  // precharge circuitry
611
612  if (deg_bl_muxing > 1)
613  {
614    height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP)));  // col mux tr height
615    // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);  // bit mux dec out wires height
616  }
617
618  height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP));  // sense_amp_height
619
620  if (dp.Ndsam_lev_1 > 1)
621  {
622    height += compute_tr_width_after_folding(
623        g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP));  // sense_amp_mux_height
624    //height_senseamp_mux_decode_output_wires =  Ndsam * wire_inside_mat_pitch * (RWP + ERP);
625  }
626
627  if (dp.Ndsam_lev_2 > 1)
628  {
629    height += compute_tr_width_after_folding(
630        g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP));  // sense_amp_mux_height
631    //height_senseamp_mux_decode_output_wires =  Ndsam * wire_inside_mat_pitch * (RWP + ERP);
632
633    // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
634    height += 2 * compute_tr_width_after_folding(
635        pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
636    height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
637  }
638
639  // TODO: this should be uncommented...
640  /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
641    {
642  //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
643  double width_write_driver_write_mux  = width_write_driver_or_write_mux();
644  double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
645  cell.w *
646  // deg_bl_muxing *
647  dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
648  height += height_write_driver_write_mux;
649  }*/
650
651  return height;
652}
653
654
655
656double Mat::compute_cam_delay(double inrisetime)
657{
658
659  double out_time_ramp, this_delay;
660  double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
661
662
663  double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
664    Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp,
665    Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp,
666    Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p;
667
668  double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal,  dynSearchEng;
669  int Htagbits;
670
671  double driver_c_gate_load;
672  double driver_c_wire_load;
673  double driver_r_wire_load;
674  //double searchline_precharge_time;
675
676  double leak_power_cc_inverters_sram_cell         = 0;
677  double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
678  double leak_power_RD_port_sram_cell              = 0;
679  double leak_power_SCHP_port_sram_cell            = 0;
680  double leak_comparator_cam_cell                  =0;
681
682  double gate_leak_comparator_cam_cell          = 0;
683  double gate_leak_power_cc_inverters_sram_cell = 0;
684  double gate_leak_power_RD_port_sram_cell      = 0;
685  double gate_leak_power_SCHP_port_sram_cell    = 0;
686
687  c_matchline_metal   = cam_cell.get_w() * g_tp.wire_local.C_per_um;
688  c_searchline_metal  = cam_cell.get_h() * g_tp.wire_local.C_per_um;
689  r_matchline_metal   = cam_cell.get_w() * g_tp.wire_local.R_per_um;
690  r_searchline_metal  = cam_cell.get_h() * g_tp.wire_local.R_per_um;
691
692  dynSearchEng = 0.0;
693  delay_matchchline = 0.0;
694  double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
695  bool linear_scaling = false;
696
697  if (linear_scaling)
698  {
699          Wdecdrivep    =  450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
700          Wdecdriven    =  300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
701          Wfadriven     = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
702          Wfadrivep     =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
703          Wfadrive2n    =  250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
704          Wfadrive2p    =  500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
705          Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
706          Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
707          Wfadecdrive2n =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
708          Wfadecdrive2p =   50 * g_ip->F_sz_um;//this was  40 micron for the 0.8 micron process
709          Wfadecdriven  = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
710          Wfadecdrivep  =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
711          Wfaprechn     =  7.5 * g_ip->F_sz_um;//this was   6 micron for the 0.8 micron process
712          Wfainvn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
713          Wfainvp       =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
714          Wfanandn      =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
715          Wfanandp      = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
716          Wdecnandn     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
717          Wdecnandp     = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
718
719          Wfaprechp     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
720          Wdummyn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
721          Wdummyinvn    =   75 * g_ip->F_sz_um;//this was  60 micron for the 0.8 micron process
722          Wdummyinvp    =  100 * g_ip->F_sz_um;//this was  80 micron for the 0.8 micron process
723          Waddrnandn    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
724          Waddrnandp    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
725          Wfanorn       = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
726          Wfanorp       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
727          W_hit_miss_n    = Wdummyn;
728          W_hit_miss_p    = g_tp.min_w_nmos_*p_to_n_sizing_r;
729          //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
730  }
731  else
732  {
733          Wdecdrivep    =  450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
734          Wdecdriven    =  300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
735          Wfadriven     = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
736          Wfadrivep     =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
737          Wfadrive2n    =  250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
738          Wfadrive2p    =  500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
739          Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
740          Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
741          Wfadecdrive2n =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
742          Wfadecdrive2p =   50 * g_ip->F_sz_um;//this was  40 micron for the 0.8 micron process
743          Wfadecdriven  = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
744          Wfadecdrivep  =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
745          Wfaprechn     =  7.5 * g_ip->F_sz_um;//this was   6 micron for the 0.8 micron process
746          Wfainvn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
747          Wfainvp       =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
748          Wfanandn      =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
749          Wfanandp      = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
750          Wdecnandn     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
751          Wdecnandp     = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
752
753          Wfaprechp     = g_tp.w_pmos_bl_precharge;//this was  10 micron for the 0.8 micron process
754          Wdummyn       = g_tp.cam.cell_nmos_w;
755          Wdummyinvn    =   75 * g_ip->F_sz_um;//this was  60 micron for the 0.8 micron process
756          Wdummyinvp    =  100 * g_ip->F_sz_um;//this was  80 micron for the 0.8 micron process
757          Waddrnandn    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
758          Waddrnandp    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
759          Wfanorn       = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
760          Wfanorp       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
761          W_hit_miss_n    = Wdummyn;
762          W_hit_miss_p    = g_tp.min_w_nmos_*p_to_n_sizing_r;
763  }
764
765  Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
766
767  /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
768     search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
769     From the driver(am and an) to the comparators in all the rows including the dummy row,
770     Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
771
772  //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
773  //Searchline precharge routes horizontally
774  driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
775  driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
776  driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
777
778  sl_precharge_eq_drv = new Driver(
779      driver_c_gate_load,
780          driver_c_wire_load,
781      driver_r_wire_load,
782      is_dram);
783
784  //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
785  //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
786  driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
787  driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
788  driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
789  sl_data_drv = new Driver(
790      driver_c_gate_load,
791          driver_c_wire_load,
792      driver_r_wire_load,
793      is_dram);
794
795  sl_precharge_eq_drv->compute_delay(0);
796  double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
797  double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
798  double R_bl = (subarray.num_rows + 1) * r_b_metal;
799  double C_bl = subarray.C_bl_cam;
800  delay_cam_sl_restore = sl_precharge_eq_drv->delay
801                         + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2);
802
803  out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
804
805  //matchline ops delay
806  delay_matchchline += sl_data_drv->delay;
807
808  /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
809  //matchline delay, matchline power, matchline_reset for cycle time computation,
810
811  ////matchline precharge circuitry routes vertically
812  //There are two matchline precharge driver chains per subarray.
813  driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
814  driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
815  driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
816
817  ml_precharge_drv = new Driver(
818                                                  driver_c_gate_load,
819                              driver_c_wire_load,
820                          driver_r_wire_load,
821                          is_dram);
822
823  ml_precharge_drv->compute_delay(0);
824
825
826  rd =  tr_R_on(Wdummyn, NCH, 2, is_dram);
827  c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit
828                                  + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline
829
830  Cwire = c_matchline_metal * Htagbits;
831  Rwire = r_matchline_metal * Htagbits;
832  c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
833
834  double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
835  //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
836  double R_ml = Rwire;
837  double C_ml = Cwire + c_intrinsic;
838  delay_cam_ml_reset = ml_precharge_drv->delay
839                           + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too
840
841  //matchline ops delay
842  tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
843  this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
844  delay_matchchline += this_delay;
845  out_time_ramp = this_delay / VTHFA3;
846
847  dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise
848                                          * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves
849
850  /* third stage, from the NAND2 gates to the drivers in the dummy row */
851  rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
852  c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
853                drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2;
854  c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
855  tf = rd * (c_intrinsic + c_gate_load);
856  this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
857  out_time_ramp = this_delay / (1 - VTHFA4);
858  delay_matchchline += this_delay;
859
860  //only the dummy row has the extra inverter between NAND and NOR gates
861  dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//  * Ntbl;
862
863  /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
864  rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
865  c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
866  Cwire = c_matchline_metal * Htagbits +  c_searchline_metal * (subarray.num_rows+1)/2;
867  Rwire = r_matchline_metal * Htagbits +  r_searchline_metal * (subarray.num_rows+1)/2;
868  c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
869  tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
870  this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
871  out_time_ramp = this_delay / VTHFA5;
872  delay_matchchline += this_delay;
873
874  dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
875
876  /*final statge from the NOR gate to drive the wordline of the data portion */
877
878  //searchline data driver There are two matchline precharge driver chains per subarray.
879  driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
880  driver_c_wire_load = subarray.C_wl_ram;
881  driver_r_wire_load = subarray.R_wl_ram;
882
883  ml_to_ram_wl_drv = new Driver(
884                                                  driver_c_gate_load,
885                              driver_c_wire_load,
886                          driver_r_wire_load,
887                          is_dram);
888
889
890
891  rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
892  c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
893  c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
894  tf = rd * (c_intrinsic + c_gate_load);
895  this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
896  out_time_ramp = this_delay / (1-0.5);
897  delay_matchchline += this_delay;
898
899  out_time_ramp   = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
900
901  //c_gate_load energy is computed in ml_to_ram_wl_drv
902  dynSearchEng  += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
903
904
905  /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
906  /*Precharge the hitting logic */
907  c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
908  Cwire = c_searchline_metal * subarray.num_rows;
909  Rwire = r_searchline_metal * subarray.num_rows;
910  c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
911
912  rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
913  //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
914  double R_hit_miss = Rwire;
915  double C_hit_miss = Cwire + c_intrinsic;
916  delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
917  dynSearchEng  += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
918
919  /*hitting logic evaluation */
920  c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
921  Cwire = c_searchline_metal * subarray.num_rows;
922  Rwire = r_searchline_metal * subarray.num_rows;
923  c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
924
925  rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
926  tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
927
928  delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
929
930  if (is_fa)
931      delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
932
933  dynSearchEng  += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
934
935  /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
936
937  power_matchline.searchOp.dynamic = dynSearchEng;
938
939  //leakage in one subarray
940  double Iport     = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0,  1, nmos, false, true);//TODO: how much is the idle time? just by *2?
941  double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0,  2, nmos, false, true);
942  double Icell     = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
943  double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv
944
945  leak_power_cc_inverters_sram_cell         = Icell * g_tp.cam_cell.Vdd;
946  leak_comparator_cam_cell                  = Icell_comparator * g_tp.cam_cell.Vdd;
947  leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
948  leak_power_RD_port_sram_cell              = Iport_erp * g_tp.cam_cell.Vdd;
949  leak_power_SCHP_port_sram_cell            = 0;//search port and r/w port are sperate, therefore no access txs in search ports
950
951  power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
952    leak_comparator_cam_cell +
953    leak_power_acc_tr_RW_or_WR_port_sram_cell +
954    leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
955    leak_power_RD_port_sram_cell * ERP +
956    leak_power_SCHP_port_sram_cell*SCHP;
957//  power_matchline.searchOp.leakage += leak_comparator_cam_cell;
958  power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
959  power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
960  power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
961  power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd;
962  //In idle states, the hit/miss txs are closed (on) therefore no Isub
963  power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
964    // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
965
966  //in idle state, Ig_on only possibly exist in access transistors of read only ports
967  double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
968  double Ig_cell     = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
969  double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2;
970
971  gate_leak_comparator_cam_cell          = Ig_cell_comparator* g_tp.cam_cell.Vdd;
972  gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd;
973  gate_leak_power_RD_port_sram_cell      = Ig_port_erp*g_tp.sram_cell.Vdd;
974  gate_leak_power_SCHP_port_sram_cell    = 0;
975
976  //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
977
978  power_matchline.searchOp.gate_leakage += gate_leak_power_cc_inverters_sram_cell;
979  power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
980  power_matchline.searchOp.gate_leakage += gate_leak_power_SCHP_port_sram_cell*SCHP + gate_leak_power_RD_port_sram_cell * ERP;
981  power_matchline.searchOp.gate_leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
982  power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(0, Wfaprechp,1, pmos) * g_tp.cam_cell.Vdd;
983  power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
984  power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
985  power_matchline.searchOp.gate_leakage += subarray.num_rows * cmos_Ig_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
986                                       + cmos_Ig_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
987
988
989   return out_time_ramp;
990}
991
992
993double Mat::width_write_driver_or_write_mux()
994{
995  // calculate resistance of SRAM cell pull-up PMOS transistor
996  // cam and sram have same cell trasistor properties
997  double R_sram_cell_pull_up_tr  = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
998  double R_access_tr             = tr_R_on(g_tp.sram.cell_a_w,    NCH, 1, is_dram, true);
999  double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
1000  double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
1001
1002  return width_write_driver_nmos;
1003}
1004
1005
1006
1007double Mat::compute_comparators_height(
1008    int tagbits,
1009    int number_ways_in_mat,
1010    double subarray_mem_cell_area_width)
1011{
1012  double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
1013  double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
1014  return cumulative_area / subarray_mem_cell_area_width;
1015}
1016
1017
1018
1019double Mat::compute_bitline_delay(double inrisetime)
1020{
1021  double V_b_pre, v_th_mem_cell, V_wl;
1022  double tstep;
1023  double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
1024  double R_cell_pull_down=0.0, R_cell_acc =0.0, r_dev=0.0;
1025  int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
1026
1027  double R_b_metal = camFlag? cam_cell.h:cell.h * g_tp.wire_local.R_per_um;
1028  double R_bl      = subarray.num_rows * R_b_metal;
1029  double C_bl      = subarray.C_bl;
1030
1031  // TODO: no leakage for DRAMs?
1032  double leak_power_cc_inverters_sram_cell = 0;
1033  double gate_leak_power_cc_inverters_sram_cell = 0;
1034  double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
1035  double leak_power_RD_port_sram_cell = 0;
1036  double gate_leak_power_RD_port_sram_cell = 0;
1037
1038  if (is_dram == true)
1039  {
1040    V_b_pre = g_tp.dram.Vbitpre;
1041    v_th_mem_cell = g_tp.dram_acc.Vth;
1042    V_wl = g_tp.vpp;
1043    //The access transistor is not folded. So we just need to specify a threshold value for the
1044    //folding width that is equal to or greater than Wmemcella.
1045    R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
1046    r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
1047  }
1048  else
1049  { //SRAM
1050    V_b_pre = g_tp.sram.Vbitpre;
1051    v_th_mem_cell = g_tp.sram_cell.Vth;
1052    V_wl = g_tp.sram_cell.Vdd;
1053    R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
1054    R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
1055
1056    //Leakage current of an SRAM cell
1057    double Iport     = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0,  1, nmos,false, true);//TODO: how much is the idle time? just by *2?
1058    double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0,  2, nmos,false, true);
1059    double Icell     = cmos_Isub_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true)*2;//two invs per cell
1060
1061    leak_power_cc_inverters_sram_cell         = Icell * g_tp.sram_cell.Vdd;
1062    leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
1063    leak_power_RD_port_sram_cell              = Iport_erp * g_tp.sram_cell.Vdd;
1064
1065
1066    //in idle state, Ig_on only possibly exist in access transistors of read only ports
1067    double Ig_port_erp   = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);
1068    double Ig_cell   = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true);
1069
1070    gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd;
1071    gate_leak_power_RD_port_sram_cell      = Ig_port_erp*g_tp.sram_cell.Vdd;
1072  }
1073
1074
1075  double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram);
1076  double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
1077  double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
1078  double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
1079  double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
1080    drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1081    drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
1082  double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
1083
1084  if (is_dram)
1085  {
1086    double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl));
1087    tstep = 2.3 * fraction * r_dev *
1088      (g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) /
1089      (g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux));
1090    delay_writeback = tstep;
1091    dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
1092      (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
1093    dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) *
1094      (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100;
1095    per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
1096      (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
1097  }
1098  else
1099  {
1100    double tau;
1101
1102    if (deg_bl_muxing > 1)
1103    {
1104      tau = (R_cell_pull_down + R_cell_acc) *
1105        (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
1106        R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
1107        R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
1108        R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
1109      dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /*
1110        subarray.num_cols * num_subarrays_per_mat*/;
1111      dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch +  C_drain_sense_amp_mux) *
1112        2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing);
1113      dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
1114          num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
1115      //Write Ops are differential for SRAM
1116    }
1117    else
1118    {
1119      tau = (R_cell_pull_down + R_cell_acc) *
1120        (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
1121        R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
1122      dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
1123        2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
1124      dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
1125          num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
1126
1127    }
1128    tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
1129    power_bitline.readOp.leakage =
1130      leak_power_cc_inverters_sram_cell +
1131      leak_power_acc_tr_RW_or_WR_port_sram_cell +
1132      leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
1133      leak_power_RD_port_sram_cell * ERP;
1134    power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
1135      gate_leak_power_RD_port_sram_cell * ERP;
1136
1137  }
1138
1139//  cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl;
1140//  cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
1141//  cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
1142//  cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl;
1143
1144
1145  /* take input rise time into account */
1146  double m = V_wl / inrisetime;
1147  if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m))
1148  {
1149    delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell)/ m);
1150  }
1151  else
1152  {
1153    delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
1154  }
1155
1156  bool is_fa = (dp.fully_assoc) ? true : false;
1157
1158  if (dp.is_tag == false || is_fa == false)
1159  {
1160    power_bitline.readOp.dynamic  = dynRdEnergy;
1161    power_bitline.writeOp.dynamic = dynWriteEnergy;
1162  }
1163
1164  double outrisetime = 0;
1165  return outrisetime;
1166}
1167
1168
1169
1170double Mat::compute_sa_delay(double inrisetime)
1171{
1172  //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
1173
1174  //Bitline circuitry leakage.
1175  double Iiso     = simplified_pmos_leakage(g_tp.w_iso, is_dram);
1176  double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
1177  double IsenseN  = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
1178  double IsenseP  = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
1179
1180  double lkgIdlePh  = IsenseEn;//+ 2*IoBufP;
1181  //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
1182  double lkgReadPh  = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
1183  //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
1184  //    lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
1185  double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
1186  leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
1187  leak_power_sense_amps_open_page_state   = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
1188
1189  // sense amplifier has to drive logic in "data out driver" and sense precharge load.
1190  // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
1191  //constant as well as the magnitude of input differential voltage.
1192  double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
1193    drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1194    drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1195    drain_C_(g_tp.w_iso,PCH,1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1196    drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
1197  double tau = C_ld / g_tp.gm_sense_amp_latch;
1198  delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
1199  power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
1200                            num_subarrays_per_mat * num_act_mats_hor_dir*/;
1201  power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
1202
1203  double outrisetime = 0;
1204  return outrisetime;
1205}
1206
1207
1208
1209double Mat::compute_subarray_out_drv(double inrisetime)
1210{
1211  double C_ld, rd, tf, this_delay;
1212  double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
1213
1214  // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
1215  rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
1216  C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1217    gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
1218  tf = rd * C_ld;
1219  this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1220  delay_subarray_out_drv += this_delay;
1221  inrisetime = this_delay/(1.0 - 0.5);
1222  power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1223  power_subarray_out_drv.readOp.leakage += 0;  // for now, let leakage of the pass transistor be 0
1224  power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
1225  // delay of signal through inverter-buffer to second level of sense-amp mux.
1226  // internal delay of buffer
1227  rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
1228  C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
1229    drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1230    gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
1231  tf = rd * C_ld;
1232  this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1233  delay_subarray_out_drv += this_delay;
1234  inrisetime = this_delay/(1.0 - 0.5);
1235  power_subarray_out_drv.readOp.dynamic      += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1236  power_subarray_out_drv.readOp.leakage      += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram)* g_tp.peri_global.Vdd;
1237  power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
1238
1239  // inverter driving drain of pass transistor of second level of sense-amp mux.
1240  rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
1241  C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
1242    drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1243    drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram);
1244  tf = rd * C_ld;
1245  this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1246  delay_subarray_out_drv += this_delay;
1247  inrisetime = this_delay/(1.0 - 0.5);
1248  power_subarray_out_drv.readOp.dynamic      += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1249  power_subarray_out_drv.readOp.leakage      += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
1250  power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
1251
1252
1253  // delay of signal through pass-transistor to input of subarray output driver.
1254  rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
1255  C_ld = dp.Ndsam_lev_2 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram) +
1256    //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
1257    gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
1258  tf = rd * C_ld;
1259  this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1260  delay_subarray_out_drv += this_delay;
1261  inrisetime = this_delay/(1.0 - 0.5);
1262  power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1263  power_subarray_out_drv.readOp.leakage += 0;  // for now, let leakage of the pass transistor be 0
1264  power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
1265
1266
1267  return inrisetime;
1268}
1269
1270
1271
1272double Mat::compute_comparator_delay(double inrisetime)
1273{
1274  int A = g_ip->tag_assoc;
1275
1276  int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
1277  // a multiple of 4.
1278
1279  /* First Inverter */
1280  double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) +
1281               drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1282               drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1283  double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
1284  double tf  = Req*Ceq;
1285  double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL);
1286  double nextinputtime = st1del/VTHCOMPINV;
1287  power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1288
1289  //For each degree of associativity
1290  //there are 4 such quarter comparators
1291  double lkgCurrent   = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
1292  double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
1293  /* Second Inverter */
1294  Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) +
1295    drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1296    drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1297  Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
1298  tf = Req*Ceq;
1299  double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE);
1300  nextinputtime = st2del/(1.0-VTHCOMPINV);
1301  power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1302  lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
1303  gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
1304
1305  /* Third Inverter */
1306  Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) +
1307    drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1308    drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1309  Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
1310  tf = Req*Ceq;
1311  double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL);
1312  nextinputtime = st3del/(VTHEVALINV);
1313  power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1314  lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
1315  gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
1316
1317  /* Final Inverter (virtual ground driver) discharging compare part */
1318  double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram);
1319  double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */
1320  double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
1321                   drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
1322       drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
1323       drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram);
1324  double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
1325                          drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
1326    drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
1327    gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram);
1328  power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1329  power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *  (A - 1);
1330  lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
1331  lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;  // stack factor of 0.2
1332
1333  gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
1334  gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter
1335
1336  /* time to go to threshold of mux driver */
1337  double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND);
1338  /* take into account non-zero input rise time */
1339  double m = g_tp.peri_global.Vdd/nextinputtime;
1340  double Tcomparatorni;
1341
1342  if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m))
1343  {
1344    double a = m;
1345    double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
1346    double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
1347    Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a);
1348  }
1349  else
1350  {
1351    Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m;
1352  }
1353  delay_comparator = Tcomparatorni+st1del+st2del+st3del;
1354  power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
1355  power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
1356
1357  return Tcomparatorni / (1.0 - VTHMUXNAND);;
1358}
1359
1360
1361
1362void Mat::compute_power_energy()
1363{
1364        //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
1365    //when search all subarrays and all mats are fully active
1366        //when plain read/write only one subarray in a single mat is active.
1367
1368    // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat.
1369  power.readOp.dynamic += r_predec->power.readOp.dynamic +
1370                          b_mux_predec->power.readOp.dynamic +
1371                          sa_mux_lev_1_predec->power.readOp.dynamic +
1372                          sa_mux_lev_2_predec->power.readOp.dynamic;
1373
1374  // add energy consumed in decoders
1375  power_row_decoders.readOp.dynamic        = row_dec->power.readOp.dynamic;
1376  if (!(is_fa||pure_cam))
1377    power_row_decoders.readOp.dynamic        *= num_subarrays_per_mat;
1378
1379  // add energy consumed in bitline prechagers, SAs, and bitlines
1380  if (!(is_fa||pure_cam))
1381  {
1382          // add energy consumed in bitline prechagers
1383          power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
1384          power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
1385
1386          //Add sense amps energy
1387          num_sa_subarray = subarray.num_cols / deg_bl_muxing;
1388          power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ;
1389
1390          // add energy consumed in bitlines
1391          //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
1392          power_bitline.readOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
1393          power_bitline.writeOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
1394          //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
1395          //Add subarray output energy
1396          power_subarray_out_drv.readOp.dynamic =
1397                  (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1398
1399          power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1400                                  power_sa.readOp.dynamic +
1401                                  power_bitline.readOp.dynamic +
1402                                  power_subarray_out_drv.readOp.dynamic;
1403
1404          power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1405                                  bit_mux_dec->power.readOp.dynamic +
1406                                  sa_mux_lev_1_dec->power.readOp.dynamic +
1407                                  sa_mux_lev_2_dec->power.readOp.dynamic +
1408                                  power_comparator.readOp.dynamic;
1409  }
1410
1411  else if (is_fa)
1412  {
1413          //for plain read/write only one subarray in a mat is active
1414          // add energy consumed in bitline prechagers
1415          power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
1416                   + cam_bl_precharge_eq_drv->power.readOp.dynamic;
1417          power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
1418
1419          //Add sense amps energy
1420          num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing;
1421          num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing;
1422          power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search;
1423          power_sa.readOp.dynamic *= num_sa_subarray;
1424
1425
1426          // add energy consumed in bitlines
1427          power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
1428          power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
1429          power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
1430          power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
1431
1432          //Add subarray output energy
1433      power_subarray_out_drv.searchOp.dynamic =
1434                  (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
1435          power_subarray_out_drv.readOp.dynamic =
1436                  (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1437
1438
1439          power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1440                                  power_sa.readOp.dynamic +
1441                                  power_bitline.readOp.dynamic +
1442                                  power_subarray_out_drv.readOp.dynamic;
1443
1444          power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1445                                  bit_mux_dec->power.readOp.dynamic +
1446                                  sa_mux_lev_1_dec->power.readOp.dynamic +
1447                                  sa_mux_lev_2_dec->power.readOp.dynamic +
1448                                  power_comparator.readOp.dynamic;
1449
1450          //add energy consumed inside cam
1451          power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
1452          power_searchline_precharge = sl_precharge_eq_drv->power;
1453      power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
1454      power_searchline = sl_data_drv->power;
1455      power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
1456      power_matchline_precharge  = ml_precharge_drv->power;
1457      power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
1458      power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
1459      power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
1460
1461          power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
1462          power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
1463          power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
1464          power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
1465
1466          power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
1467          //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
1468
1469  }
1470  else
1471  {
1472          // add energy consumed in bitline prechagers
1473          power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
1474          //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
1475          //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
1476          //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
1477
1478          //Add sense amps energy
1479          num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing;
1480          power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
1481          power_sa.searchOp.dynamic = 0;
1482
1483          power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
1484          power_bitline.searchOp.dynamic = 0;
1485          power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
1486
1487          power_subarray_out_drv.searchOp.dynamic =
1488                  (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
1489          power_subarray_out_drv.readOp.dynamic =
1490                          (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1491
1492          power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1493                                  power_sa.readOp.dynamic +
1494                                  power_bitline.readOp.dynamic +
1495                                  power_subarray_out_drv.readOp.dynamic;
1496
1497          power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1498                                  bit_mux_dec->power.readOp.dynamic +
1499                                  sa_mux_lev_1_dec->power.readOp.dynamic +
1500                                  sa_mux_lev_2_dec->power.readOp.dynamic +
1501                                  power_comparator.readOp.dynamic;
1502
1503
1504          ////add energy consumed inside cam
1505          power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
1506          power_searchline_precharge = sl_precharge_eq_drv->power;
1507      power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
1508      power_searchline = sl_data_drv->power;
1509      power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
1510      power_matchline_precharge  = ml_precharge_drv->power;
1511      power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
1512      power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
1513      power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
1514
1515          power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
1516          power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
1517          power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
1518          power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
1519
1520          power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
1521          //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
1522
1523  }
1524
1525
1526
1527  // calculate leakage power
1528  if (!(is_fa || pure_cam))
1529  {
1530        int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1531
1532        power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1533    power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1534    power_sa.readOp.leakage                 *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
1535
1536    //num_sa_subarray             = subarray.num_cols / deg_bl_muxing;
1537    power_subarray_out_drv.readOp.leakage =
1538      (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1539      number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
1540
1541    power.readOp.leakage += power_bitline.readOp.leakage +
1542                            power_bl_precharge_eq_drv.readOp.leakage +
1543                            power_sa.readOp.leakage +
1544                            power_subarray_out_drv.readOp.leakage;
1545    //cout<<"leakage"<<power.readOp.leakage<<endl;
1546
1547    power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
1548    power.readOp.leakage += power_comparator.readOp.leakage;
1549
1550    //cout<<"leakage1"<<power.readOp.leakage<<endl;
1551
1552    // leakage power
1553    power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
1554    power_bit_mux_decoders.readOp.leakage      = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
1555    power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
1556    power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
1557
1558    power.readOp.leakage += r_predec->power.readOp.leakage +
1559                          b_mux_predec->power.readOp.leakage +
1560                          sa_mux_lev_1_predec->power.readOp.leakage +
1561                          sa_mux_lev_2_predec->power.readOp.leakage +
1562                          power_row_decoders.readOp.leakage +
1563                          power_bit_mux_decoders.readOp.leakage +
1564                          power_sa_mux_lev_1_decoders.readOp.leakage +
1565                          power_sa_mux_lev_2_decoders.readOp.leakage;
1566    //cout<<"leakage2"<<power.readOp.leakage<<endl;
1567
1568    //++++Below is gate leakage
1569        power_bitline.readOp.gate_leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1570    power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1571    power_sa.readOp.gate_leakage                 *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
1572
1573    //num_sa_subarray             = subarray.num_cols / deg_bl_muxing;
1574    power_subarray_out_drv.readOp.gate_leakage =
1575      (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1576      number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
1577
1578    power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
1579                            power_bl_precharge_eq_drv.readOp.gate_leakage +
1580                            power_sa.readOp.gate_leakage +
1581                            power_subarray_out_drv.readOp.gate_leakage;
1582    //cout<<"leakage"<<power.readOp.leakage<<endl;
1583
1584    power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
1585    power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
1586
1587    //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
1588
1589    // gate_leakage power
1590    power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
1591    power_bit_mux_decoders.readOp.gate_leakage      = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
1592    power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
1593    power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
1594
1595    power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1596                          b_mux_predec->power.readOp.gate_leakage +
1597                          sa_mux_lev_1_predec->power.readOp.gate_leakage +
1598                          sa_mux_lev_2_predec->power.readOp.gate_leakage +
1599                          power_row_decoders.readOp.gate_leakage +
1600                          power_bit_mux_decoders.readOp.gate_leakage +
1601                          power_sa_mux_lev_1_decoders.readOp.gate_leakage +
1602                          power_sa_mux_lev_2_decoders.readOp.gate_leakage;
1603  }
1604  else if (is_fa)
1605  {
1606          int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1607
1608          power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1609          power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1610          power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1611          power_sa.readOp.leakage                 *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
1612
1613          //cout<<"leakage3"<<power.readOp.leakage<<endl;
1614
1615
1616          power_subarray_out_drv.readOp.leakage =
1617                  (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1618                  number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1619
1620          power.readOp.leakage += power_bitline.readOp.leakage +
1621                                  power_bl_precharge_eq_drv.readOp.leakage +
1622                                  power_bl_precharge_eq_drv.searchOp.leakage +
1623                                  power_sa.readOp.leakage +
1624                                  power_subarray_out_drv.readOp.leakage;
1625
1626          //cout<<"leakage4"<<power.readOp.leakage<<endl;
1627
1628          // leakage power
1629          power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
1630          power.readOp.leakage += r_predec->power.readOp.leakage +
1631                                  power_row_decoders.readOp.leakage;
1632
1633          //cout<<"leakage5"<<power.readOp.leakage<<endl;
1634
1635          //inside cam
1636          power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
1637          power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
1638          power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
1639          power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
1640          power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
1641
1642          power.readOp.leakage += power_cam_all_active.searchOp.leakage;
1643
1644//	  cout<<"leakage6"<<power.readOp.leakage<<endl;
1645
1646          //+++Below is gate leakage
1647          power_bitline.readOp.gate_leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1648          power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1649          power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1650          power_sa.readOp.gate_leakage                 *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
1651
1652          //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
1653
1654
1655          power_subarray_out_drv.readOp.gate_leakage =
1656                  (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1657                  number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1658
1659          power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
1660          power_bl_precharge_eq_drv.readOp.gate_leakage +
1661          power_bl_precharge_eq_drv.searchOp.gate_leakage +
1662          power_sa.readOp.gate_leakage +
1663          power_subarray_out_drv.readOp.gate_leakage;
1664
1665          //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
1666
1667          // gate_leakage power
1668          power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
1669          power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1670          power_row_decoders.readOp.gate_leakage;
1671
1672          //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
1673
1674          //inside cam
1675          power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
1676          power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
1677          power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
1678          power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
1679          power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
1680
1681          power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
1682
1683  }
1684  else
1685  {
1686          int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1687
1688          //power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1689          //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1690          power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1691          power_sa.readOp.leakage                 *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
1692
1693
1694          power_subarray_out_drv.readOp.leakage =
1695                  (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1696                  number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1697
1698          power.readOp.leakage += //power_bitline.readOp.leakage +
1699                                  //power_bl_precharge_eq_drv.readOp.leakage +
1700                                  power_bl_precharge_eq_drv.searchOp.leakage +
1701                                  power_sa.readOp.leakage +
1702                                  power_subarray_out_drv.readOp.leakage;
1703
1704          // leakage power
1705          power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
1706          power.readOp.leakage += r_predec->power.readOp.leakage +
1707                                  power_row_decoders.readOp.leakage;
1708
1709          //inside cam
1710          power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
1711          power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
1712          power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
1713          power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
1714          power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
1715
1716          power.readOp.leakage += power_cam_all_active.searchOp.leakage;
1717
1718          //+++Below is gate leakage
1719          power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1720          power_sa.readOp.gate_leakage                 *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
1721
1722
1723          power_subarray_out_drv.readOp.gate_leakage =
1724                  (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1725                  number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1726
1727          power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
1728                                  //power_bl_precharge_eq_drv.readOp.gate_leakage +
1729                                  power_bl_precharge_eq_drv.searchOp.gate_leakage +
1730                                  power_sa.readOp.gate_leakage +
1731                                  power_subarray_out_drv.readOp.gate_leakage;
1732
1733          // gate_leakage power
1734          power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
1735          power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1736                                  power_row_decoders.readOp.gate_leakage;
1737
1738          //inside cam
1739          power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
1740          power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
1741          power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
1742          power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
1743          power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
1744
1745          power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
1746  }
1747}
1748
1749