mat.cc (10152:52c552138ba1) mat.cc (10234:5cb711fa6176)
1/*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
1/*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
5 * All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the

--- 7 unchanged lines hidden (view full) ---

20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
6 * All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the

--- 7 unchanged lines hidden (view full) ---

21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 ***************************************************************************/
31
32
33
34#include <cassert>
35
36#include "mat.h"
37
38Mat::Mat(const DynamicParameter & dyn_p)
30 *
31 ***************************************************************************/
32
33
34
35#include <cassert>
36
37#include "mat.h"
38
39Mat::Mat(const DynamicParameter & dyn_p)
39 :dp(dyn_p),
40 power_subarray_out_drv(),
41 delay_fa_tag(0), delay_cam(0),
42 delay_before_decoder(0), delay_bitline(0),
43 delay_wl_reset(0), delay_bl_restore(0),
44 delay_searchline(0), delay_matchchline(0),
45 delay_cam_sl_restore(0), delay_cam_ml_reset(0),
46 delay_fa_ram_wl(0),delay_hit_miss_reset(0),
47 delay_hit_miss(0),
48 subarray(dp, dp.fully_assoc),
49 power_bitline(), per_bitline_read_energy(0),
50 deg_bl_muxing(dp.deg_bl_muxing),
51 num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
52 delay_writeback(0),
53 cell(subarray.cell), cam_cell(subarray.cam_cell),
54 is_dram(dyn_p.is_dram),
55 pure_cam(dyn_p.pure_cam),
56 num_mats(dp.num_mats),
57 power_sa(), delay_sa(0),
58 leak_power_sense_amps_closed_page_state(0),
59 leak_power_sense_amps_open_page_state(0),
60 delay_subarray_out_drv(0),
61 delay_comparator(0), power_comparator(),
62 num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
63 num_subarrays_per_mat(dp.num_subarrays/dp.num_mats),
64 num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir)
65{
66 assert(num_subarrays_per_mat <= 4);
67 assert(num_subarrays_per_row <= 2);
68 is_fa = (dp.fully_assoc) ? true : false;
69 camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
40 : dp(dyn_p),
41 power_subarray_out_drv(),
42 delay_fa_tag(0), delay_cam(0),
43 delay_before_decoder(0), delay_bitline(0),
44 delay_wl_reset(0), delay_bl_restore(0),
45 delay_searchline(0), delay_matchchline(0),
46 delay_cam_sl_restore(0), delay_cam_ml_reset(0),
47 delay_fa_ram_wl(0), delay_hit_miss_reset(0),
48 delay_hit_miss(0),
49 subarray(dp, dp.fully_assoc),
50 power_bitline(), per_bitline_read_energy(0),
51 deg_bl_muxing(dp.deg_bl_muxing),
52 num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
53 delay_writeback(0),
54 cell(subarray.cell), cam_cell(subarray.cam_cell),
55 is_dram(dyn_p.is_dram),
56 pure_cam(dyn_p.pure_cam),
57 num_mats(dp.num_mats),
58 power_sa(), delay_sa(0),
59 leak_power_sense_amps_closed_page_state(0),
60 leak_power_sense_amps_open_page_state(0),
61 delay_subarray_out_drv(0),
62 delay_comparator(0), power_comparator(),
63 num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
64 num_subarrays_per_mat(dp.num_subarrays / dp.num_mats),
65 num_subarrays_per_row(dp.Ndwl / dp.num_mats_h_dir) {
66 assert(num_subarrays_per_mat <= 4);
67 assert(num_subarrays_per_row <= 2);
68 is_fa = (dp.fully_assoc) ? true : false;
69 camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
70
70
71 if (is_fa || pure_cam)
72 num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat;
71 if (is_fa || pure_cam) {
72 num_subarrays_per_row = num_subarrays_per_mat > 2 ?
73 num_subarrays_per_mat / 2 : num_subarrays_per_mat;
74 }
73
75
74 if (dp.use_inp_params == 1) {
75 RWP = dp.num_rw_ports;
76 ERP = dp.num_rd_ports;
77 EWP = dp.num_wr_ports;
78 SCHP = dp.num_search_ports;
79 }
80 else {
81 RWP = g_ip->num_rw_ports;
82 ERP = g_ip->num_rd_ports;
83 EWP = g_ip->num_wr_ports;
84 SCHP = g_ip->num_search_ports;
76 if (dp.use_inp_params == 1) {
77 RWP = dp.num_rw_ports;
78 ERP = dp.num_rd_ports;
79 EWP = dp.num_wr_ports;
80 SCHP = dp.num_search_ports;
81 } else {
82 RWP = g_ip->num_rw_ports;
83 ERP = g_ip->num_rd_ports;
84 EWP = g_ip->num_wr_ports;
85 SCHP = g_ip->num_search_ports;
85
86
86 }
87 }
87
88
88 double number_sa_subarray;
89 double number_sa_subarray;
89
90
90 if (!is_fa && !pure_cam)
91 {
92 number_sa_subarray = subarray.num_cols / deg_bl_muxing;
93 }
94 else if (is_fa && !pure_cam)
95 {
96 number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
97 }
91 if (!is_fa && !pure_cam) {
92 number_sa_subarray = subarray.num_cols / deg_bl_muxing;
93 } else if (is_fa && !pure_cam) {
94 number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
95 }
98
96
99 else
100 {
101 number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing;
102 }
97 else {
98 number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing;
99 }
103
100
104 int num_dec_signals = subarray.num_rows;
105 double C_ld_bit_mux_dec_out = 0;
106 double C_ld_sa_mux_lev_1_dec_out = 0;
107 double C_ld_sa_mux_lev_2_dec_out = 0;
108 double R_wire_wl_drv_out;
101 int num_dec_signals = subarray.num_rows;
102 double C_ld_bit_mux_dec_out = 0;
103 double C_ld_sa_mux_lev_1_dec_out = 0;
104 double C_ld_sa_mux_lev_2_dec_out = 0;
105 double R_wire_wl_drv_out;
109
106
110 if (!is_fa && !pure_cam)
111 {
112 R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
113 }
114 else if (is_fa && !pure_cam)
115 {
107 if (!is_fa && !pure_cam) {
108 R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
109 } else if (is_fa && !pure_cam) {
116 R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
110 R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
117 }
118 else
119 {
111 } else {
120 R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um;
121 }
122
112 R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um;
113 }
114
123 double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
124 double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
115 double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
116 double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
125
117
126 if (deg_bl_muxing > 1)
127 {
128 C_ld_bit_mux_dec_out =
129 (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell
130 num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
131 }
118 if (deg_bl_muxing > 1) {
119 C_ld_bit_mux_dec_out =
120 (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing) *
121 gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell
122 num_subarrays_per_row * subarray.num_cols *
123 g_tp.wire_inside_mat.C_per_um * cell.get_w();
124 }
132
125
133 if (dp.Ndsam_lev_1 > 1)
134 {
135 C_ld_sa_mux_lev_1_dec_out =
136 (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
137 num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
138 }
139 if (dp.Ndsam_lev_2 > 1)
140 {
141 C_ld_sa_mux_lev_2_dec_out =
142 (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
143 num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
144 }
126 if (dp.Ndsam_lev_1 > 1) {
127 C_ld_sa_mux_lev_1_dec_out =
128 (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1) *
129 gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
130 num_subarrays_per_row * subarray.num_cols *
131 g_tp.wire_inside_mat.C_per_um * cell.get_w();
132 }
133 if (dp.Ndsam_lev_2 > 1) {
134 C_ld_sa_mux_lev_2_dec_out =
135 (num_subarrays_per_mat * number_sa_subarray /
136 (dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) *
137 gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
138 num_subarrays_per_row * subarray.num_cols *
139 g_tp.wire_inside_mat.C_per_um * cell.get_w();
140 }
145
141
146 if (num_subarrays_per_row >= 2)
147 {
148 // wire heads for both right and left side of a mat, so half the resistance
149 R_wire_bit_mux_dec_out /= 2.0;
150 R_wire_sa_mux_dec_out /= 2.0;
151 }
142 if (num_subarrays_per_row >= 2) {
143 // wire heads for both right and left side of a mat, so half the resistance
144 R_wire_bit_mux_dec_out /= 2.0;
145 R_wire_sa_mux_dec_out /= 2.0;
146 }
152
153
147
148
154 row_dec = new Decoder(
155 num_dec_signals,
156 false,
157 subarray.C_wl,
158 R_wire_wl_drv_out,
159 false/*is_fa*/,
160 is_dram,
161 true,
162 camFlag? cam_cell:cell);
149 row_dec = new Decoder(
150 num_dec_signals,
151 false,
152 subarray.C_wl,
153 R_wire_wl_drv_out,
154 false/*is_fa*/,
155 is_dram,
156 true,
157 camFlag ? cam_cell : cell);
163// if (is_fa && (!dp.is_tag))
164// {
165// row_dec->exist = true;
166// }
158// if (is_fa && (!dp.is_tag))
159// {
160// row_dec->exist = true;
161// }
167 bit_mux_dec = new Decoder(
168 deg_bl_muxing,// This number is 1 for FA or CAM
169 false,
170 C_ld_bit_mux_dec_out,
171 R_wire_bit_mux_dec_out,
172 false/*is_fa*/,
173 is_dram,
174 false,
175 camFlag? cam_cell:cell);
176 sa_mux_lev_1_dec = new Decoder(
177 dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
178 dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
179 C_ld_sa_mux_lev_1_dec_out,
180 R_wire_sa_mux_dec_out,
181 false/*is_fa*/,
182 is_dram,
183 false,
184 camFlag? cam_cell:cell);
185 sa_mux_lev_2_dec = new Decoder(
186 dp.Ndsam_lev_2, // This number is 1 for FA or CAM
187 false,
188 C_ld_sa_mux_lev_2_dec_out,
189 R_wire_sa_mux_dec_out,
190 false/*is_fa*/,
191 is_dram,
192 false,
193 camFlag? cam_cell:cell);
162 bit_mux_dec = new Decoder(
163 deg_bl_muxing,// This number is 1 for FA or CAM
164 false,
165 C_ld_bit_mux_dec_out,
166 R_wire_bit_mux_dec_out,
167 false/*is_fa*/,
168 is_dram,
169 false,
170 camFlag ? cam_cell : cell);
171 sa_mux_lev_1_dec = new Decoder(
172 dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
173 dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
174 C_ld_sa_mux_lev_1_dec_out,
175 R_wire_sa_mux_dec_out,
176 false/*is_fa*/,
177 is_dram,
178 false,
179 camFlag ? cam_cell : cell);
180 sa_mux_lev_2_dec = new Decoder(
181 dp.Ndsam_lev_2, // This number is 1 for FA or CAM
182 false,
183 C_ld_sa_mux_lev_2_dec_out,
184 R_wire_sa_mux_dec_out,
185 false/*is_fa*/,
186 is_dram,
187 false,
188 camFlag ? cam_cell : cell);
194
189
195 double C_wire_predec_blk_out;
196 double R_wire_predec_blk_out;
190 double C_wire_predec_blk_out;
191 double R_wire_predec_blk_out;
197
192
198 if (!is_fa && !pure_cam)
199 {
193 if (!is_fa && !pure_cam) {
200
194
201 C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
202 R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
195 C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
196 R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
203
197
204 }
205 else //for pre-decode block's load is same for both FA and CAM
206 {
207 C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
208 R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
209 }
198 } else { //for pre-decode block's load is same for both FA and CAM
199 C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
200 R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
201 }
210
211
202
203
212 if (is_fa||pure_cam)
213 num_dec_signals += _log2(num_subarrays_per_mat);
204 if (is_fa || pure_cam)
205 num_dec_signals += _log2(num_subarrays_per_mat);
214
206
215 PredecBlk * r_predec_blk1 = new PredecBlk(
216 num_dec_signals,
217 row_dec,
218 C_wire_predec_blk_out,
219 R_wire_predec_blk_out,
220 num_subarrays_per_mat,
221 is_dram,
222 true);
223 PredecBlk * r_predec_blk2 = new PredecBlk(
224 num_dec_signals,
225 row_dec,
226 C_wire_predec_blk_out,
227 R_wire_predec_blk_out,
228 num_subarrays_per_mat,
229 is_dram,
230 false);
231 PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
232 PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
233 PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
234 PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
235 PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
236 PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
237 dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
238 dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
207 PredecBlk * r_predec_blk1 = new PredecBlk(
208 num_dec_signals,
209 row_dec,
210 C_wire_predec_blk_out,
211 R_wire_predec_blk_out,
212 num_subarrays_per_mat,
213 is_dram,
214 true);
215 PredecBlk * r_predec_blk2 = new PredecBlk(
216 num_dec_signals,
217 row_dec,
218 C_wire_predec_blk_out,
219 R_wire_predec_blk_out,
220 num_subarrays_per_mat,
221 is_dram,
222 false);
223 PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
224 PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
225 PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
226 PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
227 PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
228 PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
229 dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
230 dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
239
231
240 PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
241 PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
242 PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
243 PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
244 PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
245 PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
246 PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
247 PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
248 way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
249 dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
232 PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
233 PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
234 PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
235 PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
236 PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
237 PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
238 PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
239 PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
240 way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
241 dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
250
242
251 r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
252 b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
253 sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
254 sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
243 r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
244 b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
245 sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
246 sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
255
247
256 subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
248 subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
257
249
258 double driver_c_gate_load;
259 double driver_c_wire_load;
260 double driver_r_wire_load;
250 double driver_c_gate_load;
251 double driver_c_wire_load;
252 double driver_r_wire_load;
261
253
262 if (is_fa || pure_cam)
254 if (is_fa || pure_cam)
263
255
264 { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
265 driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
266 driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
267 driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
268 cam_bl_precharge_eq_drv = new Driver(
269 driver_c_gate_load,
270 driver_c_wire_load,
271 driver_r_wire_load,
272 is_dram);
256 { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
257 driver_c_gate_load = (subarray.num_cols_fa_cam ) *
258 gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
259 is_dram, false, false);
260 driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
261 g_tp.wire_outside_mat.C_per_um;
262 driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
263 g_tp.wire_outside_mat.R_per_um;
264 cam_bl_precharge_eq_drv = new Driver(
265 driver_c_gate_load,
266 driver_c_wire_load,
267 driver_r_wire_load,
268 is_dram);
273
269
274 if (!pure_cam)
275 {
276 //This is only used for fully asso not pure CAM
277 driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
278 driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um;
279 driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um;
280 bl_precharge_eq_drv = new Driver(
281 driver_c_gate_load,
282 driver_c_wire_load,
283 driver_r_wire_load,
284 is_dram);
285 }
286 }
270 if (!pure_cam) {
271 //This is only used for fully asso not pure CAM
272 driver_c_gate_load = (subarray.num_cols_fa_ram ) *
273 gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
274 is_dram, false, false);
275 driver_c_wire_load = subarray.num_cols_fa_ram * cell.w *
276 g_tp.wire_outside_mat.C_per_um;
277 driver_r_wire_load = subarray.num_cols_fa_ram * cell.w *
278 g_tp.wire_outside_mat.R_per_um;
279 bl_precharge_eq_drv = new Driver(
280 driver_c_gate_load,
281 driver_c_wire_load,
282 driver_r_wire_load,
283 is_dram);
284 }
285 }
287
286
288 else
289 {
290 driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
291 driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
292 driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
293 bl_precharge_eq_drv = new Driver(
294 driver_c_gate_load,
295 driver_c_wire_load,
296 driver_r_wire_load,
297 is_dram);
298 }
299 double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
300 double w_row_decoder = area_row_decoder / subarray.area.get_h();
287 else {
288 driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
289 driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
290 driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
291 bl_precharge_eq_drv = new Driver(
292 driver_c_gate_load,
293 driver_c_wire_load,
294 driver_r_wire_load,
295 is_dram);
296 }
297 double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
298 double w_row_decoder = area_row_decoder / subarray.area.get_h();
301
299
302 double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
303 compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
300 double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
301 compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
304
302
305 double h_subarray_out_drv = subarray_out_wire->area.get_area() *
306 (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
303 double h_subarray_out_drv = subarray_out_wire->area.get_area() *
304 (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
307
308
305
306
309 h_subarray_out_drv *= (RWP + ERP + SCHP);
307 h_subarray_out_drv *= (RWP + ERP + SCHP);
310
308
311 double h_comparators = 0.0;
312 double w_row_predecode_output_wires = 0.0;
313 double h_bit_mux_dec_out_wires = 0.0;
314 double h_senseamp_mux_dec_out_wires = 0.0;
309 double h_comparators = 0.0;
310 double w_row_predecode_output_wires = 0.0;
311 double h_bit_mux_dec_out_wires = 0.0;
312 double h_senseamp_mux_dec_out_wires = 0.0;
315
313
316 if ((!is_fa)&&(dp.is_tag))
317 {
318 //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
319 h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
320 h_comparators *= (RWP + ERP);
321 }
314 if ((!is_fa) && (dp.is_tag)) {
315 //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
316 h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
317 h_comparators *= (RWP + ERP);
318 }
322
323
324 int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits);
325 int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits);
326 w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) *
319
320
321 int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits);
322 int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits);
323 w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) *
327 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
324 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
328
329
325
326
330 double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
331 (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
332 h_subarray_out_drv + h_comparators);
327 double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
328 (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
329 h_subarray_out_drv + h_comparators);
333
330
334 double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
331 double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
335
332
336 if (deg_bl_muxing > 1)
337 {
338 h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
339 }
340 if (dp.Ndsam_lev_1 > 1)
341 {
342 h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
343 }
344 if (dp.Ndsam_lev_2 > 1)
345 {
346 h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
347 }
333 if (deg_bl_muxing > 1) {
334 h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
335 }
336 if (dp.Ndsam_lev_1 > 1) {
337 h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
338 }
339 if (dp.Ndsam_lev_2 > 1) {
340 h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
341 }
348
342
349 double h_addr_datain_wires;
350 if (!g_ip->ver_htree_wires_over_array)
351 {
352 h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat +
353 (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) *
354 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
343 double h_addr_datain_wires;
344 if (!g_ip->ver_htree_wires_over_array) {
345 h_addr_datain_wires = (dp.number_addr_bits_mat +
346 dp.number_way_select_signals_mat +
347 (dp.num_di_b_mat + dp.num_do_b_mat) /
348 num_subarrays_per_row) *
349 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
355
350
356 if (is_fa || pure_cam)
357 {
358 h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit
359 (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) *
360 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
361 (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP;
351 if (is_fa || pure_cam) {
352 h_addr_datain_wires =
353 (dp.number_addr_bits_mat +
354 dp.number_way_select_signals_mat + //TODO: revisit
355 (dp.num_di_b_mat + dp.num_do_b_mat ) / num_subarrays_per_row) *
356 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
357 (dp.num_si_b_mat + dp.num_so_b_mat ) / num_subarrays_per_row *
358 g_tp.wire_inside_mat.pitch * SCHP;
359 }
360 //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
361 //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
362 h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
363 h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
364 h_addr_datain_wires +
365 h_bit_mux_dec_out_wires +
366 h_senseamp_mux_dec_out_wires;
367
362 }
368 }
363 //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
364 //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
365 h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
366 h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
367 h_addr_datain_wires +
368 h_bit_mux_dec_out_wires +
369 h_senseamp_mux_dec_out_wires;
370
369
371 }
370 // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
371 double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
372 b_mux_predec_blk_drv1->area.get_area() +
373 sa_mux_lev_1_predec_blk_drv1->area.get_area() +
374 sa_mux_lev_2_predec_blk_drv1->area.get_area() +
375 way_sel_drv1->area.get_area() +
376 r_predec_blk_drv2->area.get_area() +
377 b_mux_predec_blk_drv2->area.get_area() +
378 sa_mux_lev_1_predec_blk_drv2->area.get_area() +
379 sa_mux_lev_2_predec_blk_drv2->area.get_area() +
380 r_predec_blk1->area.get_area() +
381 b_mux_predec_blk1->area.get_area() +
382 sa_mux_lev_1_predec_blk1->area.get_area() +
383 sa_mux_lev_2_predec_blk1->area.get_area() +
384 r_predec_blk2->area.get_area() +
385 b_mux_predec_blk2->area.get_area() +
386 sa_mux_lev_1_predec_blk2->area.get_area() +
387 sa_mux_lev_2_predec_blk2->area.get_area() +
388 bit_mux_dec->area.get_area() +
389 sa_mux_lev_1_dec->area.get_area() +
390 sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
372
391
373 // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
374 double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
375 b_mux_predec_blk_drv1->area.get_area() +
376 sa_mux_lev_1_predec_blk_drv1->area.get_area() +
377 sa_mux_lev_2_predec_blk_drv1->area.get_area() +
378 way_sel_drv1->area.get_area() +
379 r_predec_blk_drv2->area.get_area() +
380 b_mux_predec_blk_drv2->area.get_area() +
381 sa_mux_lev_1_predec_blk_drv2->area.get_area() +
382 sa_mux_lev_2_predec_blk_drv2->area.get_area() +
383 r_predec_blk1->area.get_area() +
384 b_mux_predec_blk1->area.get_area() +
385 sa_mux_lev_1_predec_blk1->area.get_area() +
386 sa_mux_lev_2_predec_blk1->area.get_area() +
387 r_predec_blk2->area.get_area() +
388 b_mux_predec_blk2->area.get_area() +
389 sa_mux_lev_1_predec_blk2->area.get_area() +
390 sa_mux_lev_2_predec_blk2->area.get_area() +
391 bit_mux_dec->area.get_area() +
392 sa_mux_lev_1_dec->area.get_area() +
393 sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
392 double area_efficiency_mat;
394
393
395 double area_efficiency_mat;
396
397// if (!is_fa)
398// {
394// if (!is_fa)
395// {
399 assert(num_subarrays_per_mat/num_subarrays_per_row>0);
400 area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area;
396 assert(num_subarrays_per_mat / num_subarrays_per_row > 0);
397 area.h = (num_subarrays_per_mat / num_subarrays_per_row) *
398 subarray.area.h + h_non_cell_area;
401 area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
399 area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
402 area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
403 area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area();
400 area.w = (area.h * area.w + area_mat_center_circuitry) / area.h;
401 area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat *
402 100.0 / area.get_area();
404
405// cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl;
406// cout<<"h_comparators"<<h_comparators<<endl;
407// cout<<"h_subarray_out_drv"<<h_subarray_out_drv<<endl;
408// cout<<"h_addr_datain_wires"<<h_addr_datain_wires<<endl;
409// cout<<"h_bit_mux_dec_out_wires"<<h_bit_mux_dec_out_wires<<endl;
410// cout<<"h_senseamp_mux_dec_out_wires"<<h_senseamp_mux_dec_out_wires<<endl;
411// cout<<"h_non_cell_area"<<h_non_cell_area<<endl;
412// cout<<"area.h =" << (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h<<endl;
413// cout<<"w_non_cell_area"<<w_non_cell_area<<endl;
414// cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl;
415
403
404// cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl;
405// cout<<"h_comparators"<<h_comparators<<endl;
406// cout<<"h_subarray_out_drv"<<h_subarray_out_drv<<endl;
407// cout<<"h_addr_datain_wires"<<h_addr_datain_wires<<endl;
408// cout<<"h_bit_mux_dec_out_wires"<<h_bit_mux_dec_out_wires<<endl;
409// cout<<"h_senseamp_mux_dec_out_wires"<<h_senseamp_mux_dec_out_wires<<endl;
410// cout<<"h_non_cell_area"<<h_non_cell_area<<endl;
411// cout<<"area.h =" << (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h<<endl;
412// cout<<"w_non_cell_area"<<w_non_cell_area<<endl;
413// cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl;
414
416 assert(area.h>0);
417 assert(area.w>0);
415 assert(area.h > 0);
416 assert(area.w > 0);
418// }
419// else
420// {
421// area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.get_h() + h_non_cell_area;
422// area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
423// area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
424// area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area();
425// }
417// }
418// else
419// {
420// area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.get_h() + h_non_cell_area;
421// area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
422// area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
423// area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area();
424// }
426 }
425}
427
428
429
426
427
428
430Mat::~Mat()
431{
432 delete row_dec;
433 delete bit_mux_dec;
434 delete sa_mux_lev_1_dec;
435 delete sa_mux_lev_2_dec;
429Mat::~Mat() {
430 delete row_dec;
431 delete bit_mux_dec;
432 delete sa_mux_lev_1_dec;
433 delete sa_mux_lev_2_dec;
436
434
437 delete r_predec->blk1;
438 delete r_predec->blk2;
439 delete b_mux_predec->blk1;
440 delete b_mux_predec->blk2;
441 delete sa_mux_lev_1_predec->blk1;
442 delete sa_mux_lev_1_predec->blk2;
443 delete sa_mux_lev_2_predec->blk1;
444 delete sa_mux_lev_2_predec->blk2;
445 delete dummy_way_sel_predec_blk1;
446 delete dummy_way_sel_predec_blk2;
435 delete r_predec->blk1;
436 delete r_predec->blk2;
437 delete b_mux_predec->blk1;
438 delete b_mux_predec->blk2;
439 delete sa_mux_lev_1_predec->blk1;
440 delete sa_mux_lev_1_predec->blk2;
441 delete sa_mux_lev_2_predec->blk1;
442 delete sa_mux_lev_2_predec->blk2;
443 delete dummy_way_sel_predec_blk1;
444 delete dummy_way_sel_predec_blk2;
447
445
448 delete r_predec->drv1;
449 delete r_predec->drv2;
450 delete b_mux_predec->drv1;
451 delete b_mux_predec->drv2;
452 delete sa_mux_lev_1_predec->drv1;
453 delete sa_mux_lev_1_predec->drv2;
454 delete sa_mux_lev_2_predec->drv1;
455 delete sa_mux_lev_2_predec->drv2;
456 delete way_sel_drv1;
457 delete dummy_way_sel_predec_blk_drv2;
446 delete r_predec->drv1;
447 delete r_predec->drv2;
448 delete b_mux_predec->drv1;
449 delete b_mux_predec->drv2;
450 delete sa_mux_lev_1_predec->drv1;
451 delete sa_mux_lev_1_predec->drv2;
452 delete sa_mux_lev_2_predec->drv1;
453 delete sa_mux_lev_2_predec->drv2;
454 delete way_sel_drv1;
455 delete dummy_way_sel_predec_blk_drv2;
458
456
459 delete r_predec;
460 delete b_mux_predec;
461 delete sa_mux_lev_1_predec;
462 delete sa_mux_lev_2_predec;
457 delete r_predec;
458 delete b_mux_predec;
459 delete sa_mux_lev_1_predec;
460 delete sa_mux_lev_2_predec;
463
461
464 delete subarray_out_wire;
465 if (!pure_cam)
466 delete bl_precharge_eq_drv;
462 delete subarray_out_wire;
463 if (!pure_cam)
464 delete bl_precharge_eq_drv;
467
465
468 if (is_fa || pure_cam)
469 {
470 delete sl_precharge_eq_drv ;
471 delete sl_data_drv ;
472 delete cam_bl_precharge_eq_drv;
473 delete ml_precharge_drv;
474 delete ml_to_ram_wl_drv;
475 }
466 if (is_fa || pure_cam) {
467 delete sl_precharge_eq_drv ;
468 delete sl_data_drv ;
469 delete cam_bl_precharge_eq_drv;
470 delete ml_precharge_drv;
471 delete ml_to_ram_wl_drv;
472 }
476}
477
478
479
473}
474
475
476
480double Mat::compute_delays(double inrisetime)
481{
482 int k;
483 double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl;
484 double outrisetime_search, outrisetime, row_dec_outrisetime;
485 // delay calculation for tags of fully associative cache
486 if (is_fa || pure_cam)
487 {
488 //Compute search access time
489 outrisetime_search = compute_cam_delay(inrisetime);
490 if (is_fa)
491 {
492 bl_precharge_eq_drv->compute_delay(0);
493 k = ml_to_ram_wl_drv->number_gates - 1;
494 rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
495 C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
496 drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
497 C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load;
498 tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
499 delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
477double Mat::compute_delays(double inrisetime) {
478 int k;
479 double rd, C_intrinsic, C_ld, tf, R_bl_precharge, r_b_metal, R_bl, C_bl;
480 double outrisetime_search, outrisetime, row_dec_outrisetime;
481 // delay calculation for tags of fully associative cache
482 if (is_fa || pure_cam) {
483 //Compute search access time
484 outrisetime_search = compute_cam_delay(inrisetime);
485 if (is_fa) {
486 bl_precharge_eq_drv->compute_delay(0);
487 k = ml_to_ram_wl_drv->number_gates - 1;
488 rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
489 C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4 *
490 cell.h, is_dram, false, true) +
491 drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4 * cell.h,
492 is_dram, false, true);
493 C_ld = ml_to_ram_wl_drv->c_gate_load +
494 ml_to_ram_wl_drv->c_wire_load;
495 tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
496 delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
500
497
501 R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
502 r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
503 R_bl = subarray.num_rows * r_b_metal;
504 C_bl = subarray.C_bl;
505 delay_bl_restore = bl_precharge_eq_drv->delay +
506 log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
507 (R_bl_precharge * C_bl + R_bl * C_bl / 2);
498 R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
499 r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
500 R_bl = subarray.num_rows * r_b_metal;
501 C_bl = subarray.C_bl;
502 delay_bl_restore = bl_precharge_eq_drv->delay +
503 log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
504 (g_tp.sram.Vbitpre - dp.V_b_sense)) *
505 (R_bl_precharge * C_bl + R_bl * C_bl / 2);
508
509
506
507
510 outrisetime_search = compute_bitline_delay(outrisetime_search);
511 outrisetime_search = compute_sa_delay(outrisetime_search);
512 }
513 outrisetime_search = compute_subarray_out_drv(outrisetime_search);
514 subarray_out_wire->set_in_rise_time(outrisetime_search);
515 outrisetime_search = subarray_out_wire->signal_rise_time();
516 delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
508 outrisetime_search = compute_bitline_delay(outrisetime_search);
509 outrisetime_search = compute_sa_delay(outrisetime_search);
510 }
511 outrisetime_search = compute_subarray_out_drv(outrisetime_search);
512 subarray_out_wire->set_in_rise_time(outrisetime_search);
513 outrisetime_search = subarray_out_wire->signal_rise_time();
514 delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
517
518
515
516
519 //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
520 outrisetime = r_predec->compute_delays(inrisetime);
521 row_dec_outrisetime = row_dec->compute_delays(outrisetime);
517 //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
518 outrisetime = r_predec->compute_delays(inrisetime);
519 row_dec_outrisetime = row_dec->compute_delays(outrisetime);
522
520
523 outrisetime = b_mux_predec->compute_delays(inrisetime);
524 bit_mux_dec->compute_delays(outrisetime);
521 outrisetime = b_mux_predec->compute_delays(inrisetime);
522 bit_mux_dec->compute_delays(outrisetime);
525
523
526 outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
527 sa_mux_lev_1_dec->compute_delays(outrisetime);
524 outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
525 sa_mux_lev_1_dec->compute_delays(outrisetime);
528
526
529 outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
530 sa_mux_lev_2_dec->compute_delays(outrisetime);
527 outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
528 sa_mux_lev_2_dec->compute_delays(outrisetime);
531
529
532 if (pure_cam)
533 {
534 outrisetime = compute_bitline_delay(row_dec_outrisetime);
535 outrisetime = compute_sa_delay(outrisetime);
536 }
537 return outrisetime_search;
530 if (pure_cam) {
531 outrisetime = compute_bitline_delay(row_dec_outrisetime);
532 outrisetime = compute_sa_delay(outrisetime);
533 }
534 return outrisetime_search;
535 } else {
536 bl_precharge_eq_drv->compute_delay(0);
537 if (row_dec->exist == true) {
538 int k = row_dec->num_gates - 1;
539 double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
540 // TODO: this 4*cell.h number must be revisited
541 double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4 *
542 cell.h, is_dram, false, true) +
543 drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, is_dram,
544 false, true);
545 double C_ld = row_dec->C_ld_dec_out;
546 double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
547 delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
548 }
549 double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
550 double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
551 double R_bl = subarray.num_rows * r_b_metal;
552 double C_bl = subarray.C_bl;
553
554 if (is_dram) {
555 delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
556 } else {
557 delay_bl_restore = bl_precharge_eq_drv->delay +
558 log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
559 (g_tp.sram.Vbitpre - dp.V_b_sense)) *
560 (R_bl_precharge * C_bl + R_bl * C_bl / 2);
561 }
538 }
562 }
539 else
540 {
541 bl_precharge_eq_drv->compute_delay(0);
542 if (row_dec->exist == true)
543 {
544 int k = row_dec->num_gates - 1;
545 double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
546 // TODO: this 4*cell.h number must be revisited
547 double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
548 drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
549 double C_ld = row_dec->C_ld_dec_out;
550 double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
551 delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
552 }
553 double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
554 double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
555 double R_bl = subarray.num_rows * r_b_metal;
556 double C_bl = subarray.C_bl;
557
563
558 if (is_dram)
559 {
560 delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
561 }
562 else
563 {
564 delay_bl_restore = bl_precharge_eq_drv->delay +
565 log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
566 (R_bl_precharge * C_bl + R_bl * C_bl / 2);
567 }
568 }
569
570
564
565
566 outrisetime = r_predec->compute_delays(inrisetime);
567 row_dec_outrisetime = row_dec->compute_delays(outrisetime);
571
568
572 outrisetime = r_predec->compute_delays(inrisetime);
573 row_dec_outrisetime = row_dec->compute_delays(outrisetime);
569 outrisetime = b_mux_predec->compute_delays(inrisetime);
570 bit_mux_dec->compute_delays(outrisetime);
574
571
575 outrisetime = b_mux_predec->compute_delays(inrisetime);
576 bit_mux_dec->compute_delays(outrisetime);
572 outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
573 sa_mux_lev_1_dec->compute_delays(outrisetime);
577
574
578 outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
579 sa_mux_lev_1_dec->compute_delays(outrisetime);
575 outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
576 sa_mux_lev_2_dec->compute_delays(outrisetime);
580
577
581 outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
582 sa_mux_lev_2_dec->compute_delays(outrisetime);
578 outrisetime = compute_bitline_delay(row_dec_outrisetime);
579 outrisetime = compute_sa_delay(outrisetime);
580 outrisetime = compute_subarray_out_drv(outrisetime);
581 subarray_out_wire->set_in_rise_time(outrisetime);
582 outrisetime = subarray_out_wire->signal_rise_time();
583
583
584 outrisetime = compute_bitline_delay(row_dec_outrisetime);
585 outrisetime = compute_sa_delay(outrisetime);
586 outrisetime = compute_subarray_out_drv(outrisetime);
587 subarray_out_wire->set_in_rise_time(outrisetime);
588 outrisetime = subarray_out_wire->signal_rise_time();
584 delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
589
585
590 delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
586 if (dp.is_tag == true && dp.fully_assoc == false) {
587 compute_comparator_delay(0);
588 }
591
589
592 if (dp.is_tag == true && dp.fully_assoc == false)
593 {
594 compute_comparator_delay(0);
595 }
596
597 if (row_dec->exist == false)
598 {
599 delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
590 if (row_dec->exist == false) {
591 delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
600 }
592 }
601 return outrisetime;
593 return outrisetime;
602}
603
604
605
594}
595
596
597
606double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h()
607{
598double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() {
608
599
609 double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) +
610 compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP)); // precharge circuitry
600 double height =
601 compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge,
602 camFlag ? cam_cell.w :
603 cell.w / (2 * (RWP + ERP + SCHP))) +
604 // precharge circuitry
605 compute_tr_width_after_folding(g_tp.w_pmos_bl_eq,
606 camFlag ? cam_cell.w :
607 cell.w / (RWP + ERP + SCHP));
611
608
612 if (deg_bl_muxing > 1)
613 {
614 height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP))); // col mux tr height
615 // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
616 }
609 if (deg_bl_muxing > 1) {
610 // col mux tr height
611 height +=
612 compute_tr_width_after_folding(g_tp.w_nmos_b_mux,
613 cell.w / (2 * (RWP + ERP)));
614 // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
615 }
617
616
618 height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height
617 height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height
619
618
620 if (dp.Ndsam_lev_1 > 1)
621 {
622 height += compute_tr_width_after_folding(
623 g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
624 //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
625 }
619 if (dp.Ndsam_lev_1 > 1) {
620 height += compute_tr_width_after_folding(
621 g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
622 //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
623 }
626
624
627 if (dp.Ndsam_lev_2 > 1)
628 {
629 height += compute_tr_width_after_folding(
630 g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
631 //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
625 if (dp.Ndsam_lev_2 > 1) {
626 height += compute_tr_width_after_folding(
627 g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
628 //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
632
629
633 // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
634 height += 2 * compute_tr_width_after_folding(
635 pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
636 height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
637 }
630 // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
631 height += 2 * compute_tr_width_after_folding(
632 pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
633 height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
634 }
638
635
639 // TODO: this should be uncommented...
640 /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
641 {
642 //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
643 double width_write_driver_write_mux = width_write_driver_or_write_mux();
644 double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
645 cell.w *
646 // deg_bl_muxing *
647 dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
648 height += height_write_driver_write_mux;
649 }*/
636 // TODO: this should be uncommented...
637 /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
638 {
639 //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
640 double width_write_driver_write_mux = width_write_driver_or_write_mux();
641 double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
642 cell.w *
643 // deg_bl_muxing *
644 dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
645 height += height_write_driver_write_mux;
646 }*/
650
647
651 return height;
648 return height;
652}
653
654
655
649}
650
651
652
656double Mat::compute_cam_delay(double inrisetime)
657{
653double Mat::compute_cam_delay(double inrisetime) {
658
654
659 double out_time_ramp, this_delay;
660 double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
655 double out_time_ramp, this_delay;
656 double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
661
662
657
658
663 double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
659 double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
664 Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp,
665 Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp,
666 Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p;
667
660 Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp,
661 Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp,
662 Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p;
663
668 double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng;
669 int Htagbits;
664 double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng;
665 int Htagbits;
670
666
671 double driver_c_gate_load;
672 double driver_c_wire_load;
673 double driver_r_wire_load;
674 //double searchline_precharge_time;
667 double driver_c_gate_load;
668 double driver_c_wire_load;
669 double driver_r_wire_load;
670 //double searchline_precharge_time;
675
671
676 double leak_power_cc_inverters_sram_cell = 0;
677 double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
678 double leak_power_RD_port_sram_cell = 0;
679 double leak_power_SCHP_port_sram_cell = 0;
680 double leak_comparator_cam_cell =0;
672 double leak_power_cc_inverters_sram_cell = 0;
673 double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
674 double leak_power_RD_port_sram_cell = 0;
675 double leak_power_SCHP_port_sram_cell = 0;
676 double leak_comparator_cam_cell =0;
681
677
682 double gate_leak_comparator_cam_cell = 0;
683 double gate_leak_power_cc_inverters_sram_cell = 0;
684 double gate_leak_power_RD_port_sram_cell = 0;
685 double gate_leak_power_SCHP_port_sram_cell = 0;
678 double gate_leak_comparator_cam_cell = 0;
679 double gate_leak_power_cc_inverters_sram_cell = 0;
680 double gate_leak_power_RD_port_sram_cell = 0;
681 double gate_leak_power_SCHP_port_sram_cell = 0;
686
682
687 c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um;
688 c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um;
689 r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um;
690 r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um;
683 c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um;
684 c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um;
685 r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um;
686 r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um;
691
687
692 dynSearchEng = 0.0;
693 delay_matchchline = 0.0;
694 double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
695 bool linear_scaling = false;
688 dynSearchEng = 0.0;
689 delay_matchchline = 0.0;
690 double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
691 bool linear_scaling = false;
696
692
697 if (linear_scaling)
698 {
699 Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
700 Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
701 Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
702 Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
703 Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
704 Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
705 Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
706 Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
707 Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
708 Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
709 Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
710 Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
711 Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
712 Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
713 Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
714 Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
715 Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
716 Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
717 Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
693 if (linear_scaling) {
694 Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
695 Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
696 Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
697 Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
698 Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
699 Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
700 Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
701 Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
702 Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
703 Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
704 Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
705 Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
706 Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
707 Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
708 Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
709 Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
710 Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
711 Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
712 Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
718
713
719 Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
720 Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
721 Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
722 Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
723 Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
724 Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
725 Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
726 Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
727 W_hit_miss_n = Wdummyn;
728 W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
729 //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
730 }
731 else
732 {
733 Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
734 Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
735 Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
736 Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
737 Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
738 Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
739 Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
740 Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
741 Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
742 Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
743 Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
744 Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
745 Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
746 Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
747 Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
748 Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
749 Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
750 Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
751 Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
714 Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
715 Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
716 Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
717 Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
718 Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
719 Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
720 Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
721 Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
722 W_hit_miss_n = Wdummyn;
723 W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
724 //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
725 } else {
726 Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
727 Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
728 Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
729 Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
730 Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
731 Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
732 Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
733 Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
734 Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
735 Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
736 Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
737 Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
738 Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
739 Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
740 Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
741 Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
742 Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
743 Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
744 Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
752
745
753 Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process
754 Wdummyn = g_tp.cam.cell_nmos_w;
755 Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
756 Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
757 Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
758 Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
759 Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
760 Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
761 W_hit_miss_n = Wdummyn;
762 W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
763 }
746 Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process
747 Wdummyn = g_tp.cam.cell_nmos_w;
748 Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
749 Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
750 Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
751 Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
752 Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
753 Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
754 W_hit_miss_n = Wdummyn;
755 W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
756 }
764
757
765 Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
758 Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
766
759
767 /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
768 search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
769 From the driver(am and an) to the comparators in all the rows including the dummy row,
770 Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
760 /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
761 search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
762 From the driver(am and an) to the comparators in all the rows including the dummy row,
763 Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
771
764
772 //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
773 //Searchline precharge routes horizontally
774 driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
775 driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
776 driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
765 //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
766 //Searchline precharge routes horizontally
767 driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
768 driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
769 driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
777
770
778 sl_precharge_eq_drv = new Driver(
779 driver_c_gate_load,
780 driver_c_wire_load,
781 driver_r_wire_load,
782 is_dram);
771 sl_precharge_eq_drv = new Driver(
772 driver_c_gate_load,
773 driver_c_wire_load,
774 driver_r_wire_load,
775 is_dram);
783
776
784 //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
785 //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
786 driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
787 driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
788 driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
789 sl_data_drv = new Driver(
790 driver_c_gate_load,
791 driver_c_wire_load,
792 driver_r_wire_load,
793 is_dram);
777 //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
778 //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
779 driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
780 driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
781 driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
782 sl_data_drv = new Driver(
783 driver_c_gate_load,
784 driver_c_wire_load,
785 driver_r_wire_load,
786 is_dram);
794
787
795 sl_precharge_eq_drv->compute_delay(0);
796 double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
797 double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
798 double R_bl = (subarray.num_rows + 1) * r_b_metal;
799 double C_bl = subarray.C_bl_cam;
800 delay_cam_sl_restore = sl_precharge_eq_drv->delay
801 + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2);
788 sl_precharge_eq_drv->compute_delay(0);
789 double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
790 double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
791 double R_bl = (subarray.num_rows + 1) * r_b_metal;
792 double C_bl = subarray.C_bl_cam;
793 delay_cam_sl_restore = sl_precharge_eq_drv->delay
794 + log(g_tp.cam.Vbitpre) * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
802
795
803 out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
796 out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
804
797
805 //matchline ops delay
806 delay_matchchline += sl_data_drv->delay;
798 //matchline ops delay
799 delay_matchchline += sl_data_drv->delay;
807
800
808 /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
809 //matchline delay, matchline power, matchline_reset for cycle time computation,
801 /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
802 //matchline delay, matchline power, matchline_reset for cycle time computation,
810
803
811 ////matchline precharge circuitry routes vertically
812 //There are two matchline precharge driver chains per subarray.
813 driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
814 driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
815 driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
804 ////matchline precharge circuitry routes vertically
805 //There are two matchline precharge driver chains per subarray.
806 driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
807 driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
808 driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
816
809
817 ml_precharge_drv = new Driver(
818 driver_c_gate_load,
819 driver_c_wire_load,
820 driver_r_wire_load,
821 is_dram);
810 ml_precharge_drv = new Driver(
811 driver_c_gate_load,
812 driver_c_wire_load,
813 driver_r_wire_load,
814 is_dram);
822
815
823 ml_precharge_drv->compute_delay(0);
816 ml_precharge_drv->compute_delay(0);
824
825
817
818
826 rd = tr_R_on(Wdummyn, NCH, 2, is_dram);
827 c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit
828 + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline
819 rd = tr_R_on(Wdummyn, NCH, 2, is_dram);
820 c_intrinsic = Htagbits *
821 (2 * drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def,
822 is_dram)//TODO: the cell_h_def should be revisit
823 + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram) /
824 Htagbits);//since each halve only has one precharge tx per matchline
829
825
830 Cwire = c_matchline_metal * Htagbits;
831 Rwire = r_matchline_metal * Htagbits;
832 c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
826 Cwire = c_matchline_metal * Htagbits;
827 Rwire = r_matchline_metal * Htagbits;
828 c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
833
829
834 double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
835 //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
836 double R_ml = Rwire;
837 double C_ml = Cwire + c_intrinsic;
838 delay_cam_ml_reset = ml_precharge_drv->delay
839 + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too
830 double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
831 //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
832 double R_ml = Rwire;
833 double C_ml = Cwire + c_intrinsic;
834 //TODO: latest CAM has sense amps on matchlines too
835 delay_cam_ml_reset = ml_precharge_drv->delay
836 + log(g_tp.cam.Vbitpre) * (R_ml_precharge * C_ml + R_ml * C_ml / 2);
840
837
841 //matchline ops delay
842 tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
843 this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
844 delay_matchchline += this_delay;
845 out_time_ramp = this_delay / VTHFA3;
838 //matchline ops delay
839 tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
840 this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
841 delay_matchchline += this_delay;
842 out_time_ramp = this_delay / VTHFA3;
846
843
847 dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise
848 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves
844 dynSearchEng += ((c_intrinsic + Cwire + c_gate_load) *
845 (subarray.num_rows + 1)) //TODO: need to be precise
846 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *
847 2;//each subarry has two halves
849
848
850 /* third stage, from the NAND2 gates to the drivers in the dummy row */
851 rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
852 c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
853 drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2;
854 c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
855 tf = rd * (c_intrinsic + c_gate_load);
856 this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
857 out_time_ramp = this_delay / (1 - VTHFA4);
858 delay_matchchline += this_delay;
849 /* third stage, from the NAND2 gates to the drivers in the dummy row */
850 rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
851 c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
852 drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram) * 2;
853 c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
854 tf = rd * (c_intrinsic + c_gate_load);
855 this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
856 out_time_ramp = this_delay / (1 - VTHFA4);
857 delay_matchchline += this_delay;
859
858
860 //only the dummy row has the extra inverter between NAND and NOR gates
861 dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl;
859 //only the dummy row has the extra inverter between NAND and NOR gates
860 dynSearchEng += (c_intrinsic * (subarray.num_rows + 1) + c_gate_load * 2) *
861 g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl;
862
862
863 /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
864 rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
865 c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
866 Cwire = c_matchline_metal * Htagbits + c_searchline_metal * (subarray.num_rows+1)/2;
867 Rwire = r_matchline_metal * Htagbits + r_searchline_metal * (subarray.num_rows+1)/2;
868 c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
869 tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
870 this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
871 out_time_ramp = this_delay / VTHFA5;
872 delay_matchchline += this_delay;
863 /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
864 rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
865 c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
866 Cwire = c_matchline_metal * Htagbits + c_searchline_metal *
867 (subarray.num_rows + 1) / 2;
868 Rwire = r_matchline_metal * Htagbits + r_searchline_metal *
869 (subarray.num_rows + 1) / 2;
870 c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
871 tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
872 this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
873 out_time_ramp = this_delay / VTHFA5;
874 delay_matchchline += this_delay;
873
875
874 dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
876 dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows * c_gate_load) *
877 g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
875
878
876 /*final statge from the NOR gate to drive the wordline of the data portion */
879 /*final statge from the NOR gate to drive the wordline of the data portion */
877
880
878 //searchline data driver There are two matchline precharge driver chains per subarray.
879 driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
880 driver_c_wire_load = subarray.C_wl_ram;
881 driver_r_wire_load = subarray.R_wl_ram;
881 //searchline data driver There are two matchline precharge driver chains per subarray.
882 driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
883 driver_c_wire_load = subarray.C_wl_ram;
884 driver_r_wire_load = subarray.R_wl_ram;
882
885
883 ml_to_ram_wl_drv = new Driver(
884 driver_c_gate_load,
885 driver_c_wire_load,
886 driver_r_wire_load,
887 is_dram);
886 ml_to_ram_wl_drv = new Driver(
887 driver_c_gate_load,
888 driver_c_wire_load,
889 driver_r_wire_load,
890 is_dram);
888
889
890
891
892
893
891 rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
892 c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
893 c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
894 tf = rd * (c_intrinsic + c_gate_load);
895 this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
896 out_time_ramp = this_delay / (1-0.5);
897 delay_matchchline += this_delay;
894 rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
895 c_intrinsic = 2 * drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
896 drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
897 c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
898 tf = rd * (c_intrinsic + c_gate_load);
899 this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
900 out_time_ramp = this_delay / (1 - 0.5);
901 delay_matchchline += this_delay;
898
902
899 out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
903 out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
900
904
901 //c_gate_load energy is computed in ml_to_ram_wl_drv
902 dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
905 //c_gate_load energy is computed in ml_to_ram_wl_drv
906 dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
903
904
907
908
905 /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
906 /*Precharge the hitting logic */
907 c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
908 Cwire = c_searchline_metal * subarray.num_rows;
909 Rwire = r_searchline_metal * subarray.num_rows;
910 c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
909 /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
910 /*Precharge the hitting logic */
911 c_intrinsic = 2 *
912 drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
913 Cwire = c_searchline_metal * subarray.num_rows;
914 Rwire = r_searchline_metal * subarray.num_rows;
915 c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
916 subarray.num_rows;
911
917
912 rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
913 //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
914 double R_hit_miss = Rwire;
915 double C_hit_miss = Cwire + c_intrinsic;
916 delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
917 dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
918 rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
919 //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
920 double R_hit_miss = Rwire;
921 double C_hit_miss = Cwire + c_intrinsic;
922 delay_hit_miss_reset = log(g_tp.cam.Vbitpre) *
923 (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
924 dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
918
925
919 /*hitting logic evaluation */
920 c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
921 Cwire = c_searchline_metal * subarray.num_rows;
922 Rwire = r_searchline_metal * subarray.num_rows;
923 c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
926 /*hitting logic evaluation */
927 c_intrinsic = 2 *
928 drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
929 Cwire = c_searchline_metal * subarray.num_rows;
930 Rwire = r_searchline_metal * subarray.num_rows;
931 c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
932 subarray.num_rows;
924
933
925 rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
926 tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
934 rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
935 tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
927
936
928 delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
937 delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
929
938
930 if (is_fa)
931 delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
939 if (is_fa)
940 delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
932
941
933 dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
942 dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
934
943
935 /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
944 /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
936
945
937 power_matchline.searchOp.dynamic = dynSearchEng;
946 power_matchline.searchOp.dynamic = dynSearchEng;
938
947
939 //leakage in one subarray
940 double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2?
941 double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true);
942 double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
943 double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv
948 //leakage in one subarray
949 double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2?
950 double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true);
951 double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
952 1, inv, false, true) * 2;
953 //approx XOR with Inv
954 double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv,
955 false, true) * 2;
944
956
945 leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd;
946 leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd;
947 leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
948 leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd;
949 leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports
957 leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd;
958 leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd;
959 leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
960 leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd;
961 leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports
950
962
951 power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
952 leak_comparator_cam_cell +
953 leak_power_acc_tr_RW_or_WR_port_sram_cell +
954 leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
955 leak_power_RD_port_sram_cell * ERP +
956 leak_power_SCHP_port_sram_cell*SCHP;
963 power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
964 leak_comparator_cam_cell +
965 leak_power_acc_tr_RW_or_WR_port_sram_cell +
966 leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
967 leak_power_RD_port_sram_cell * ERP +
968 leak_power_SCHP_port_sram_cell * SCHP;
957// power_matchline.searchOp.leakage += leak_comparator_cam_cell;
969// power_matchline.searchOp.leakage += leak_comparator_cam_cell;
958 power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
959 power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
960 power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
961 power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd;
962 //In idle states, the hit/miss txs are closed (on) therefore no Isub
963 power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
970 power_matchline.searchOp.leakage *= (subarray.num_rows + 1) *
971 subarray.num_cols_fa_cam;//TODO:dumy line precise
972 power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
973 cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
974 power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
975 cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
976 power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
977 cmos_Isub_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
978 //In idle states, the hit/miss txs are closed (on) therefore no Isub
979 power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
964 // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
965
980 // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
981
966 //in idle state, Ig_on only possibly exist in access transistors of read only ports
967 double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
968 double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
969 double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2;
982 //in idle state, Ig_on only possibly exist in access transistors of read only ports
983 double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
984 double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
985 1, inv, false, true) * 2;
986 double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv,
987 false, true) * 2;
970
988
971 gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd;
972 gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd;
973 gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
974 gate_leak_power_SCHP_port_sram_cell = 0;
989 gate_leak_comparator_cam_cell = Ig_cell_comparator * g_tp.cam_cell.Vdd;
990 gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.cam_cell.Vdd;
991 gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
992 gate_leak_power_SCHP_port_sram_cell = 0;
975
993
976 //cout<<"power_matchline.searchOp.leakage"<994 //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
977
995
978 power_matchline.searchOp.gate_leakage += gate_leak_power_cc_inverters_sram_cell;
979 power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
980 power_matchline.searchOp.gate_leakage += gate_leak_power_SCHP_port_sram_cell*SCHP + gate_leak_power_RD_port_sram_cell * ERP;
981 power_matchline.searchOp.gate_leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
982 power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(0, Wfaprechp,1, pmos) * g_tp.cam_cell.Vdd;
983 power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
984 power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
985 power_matchline.searchOp.gate_leakage += subarray.num_rows * cmos_Ig_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
986 + cmos_Ig_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
996 power_matchline.searchOp.gate_leakage +=
997 gate_leak_power_cc_inverters_sram_cell;
998 power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
999 power_matchline.searchOp.gate_leakage +=
1000 gate_leak_power_SCHP_port_sram_cell * SCHP +
1001 gate_leak_power_RD_port_sram_cell * ERP;
1002 power_matchline.searchOp.gate_leakage *= (subarray.num_rows + 1) *
1003 subarray.num_cols_fa_cam;//TODO:dumy line precise
1004 power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
1005 cmos_Ig_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
1006 power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
1007 cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
1008 power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
1009 cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
1010 power_matchline.searchOp.gate_leakage += subarray.num_rows *
1011 cmos_Ig_leakage(W_hit_miss_n, 0, 1, nmos) * g_tp.cam_cell.Vdd +
1012 + cmos_Ig_leakage(0, W_hit_miss_p, 1, pmos) * g_tp.cam_cell.Vdd;
987
988
1013
1014
989 return out_time_ramp;
1015 return out_time_ramp;
990}
991
992
1016}
1017
1018
993double Mat::width_write_driver_or_write_mux()
994{
995 // calculate resistance of SRAM cell pull-up PMOS transistor
996 // cam and sram have same cell trasistor properties
997 double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
998 double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true);
999 double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
1000 double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
1019double Mat::width_write_driver_or_write_mux() {
1020 // calculate resistance of SRAM cell pull-up PMOS transistor
1021 // cam and sram have same cell trasistor properties
1022 double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
1023 double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true);
1024 double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
1025 double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
1001
1026
1002 return width_write_driver_nmos;
1027 return width_write_driver_nmos;
1003}
1004
1005
1006
1007double Mat::compute_comparators_height(
1008 int tagbits,
1009 int number_ways_in_mat,
1028}
1029
1030
1031
1032double Mat::compute_comparators_height(
1033 int tagbits,
1034 int number_ways_in_mat,
1010 double subarray_mem_cell_area_width)
1011{
1012 double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
1013 double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
1014 return cumulative_area / subarray_mem_cell_area_width;
1035 double subarray_mem_cell_area_width) {
1036 double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
1037 double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
1038 return cumulative_area / subarray_mem_cell_area_width;
1015}
1016
1017
1018
1039}
1040
1041
1042
1019double Mat::compute_bitline_delay(double inrisetime)
1020{
1021 double V_b_pre, v_th_mem_cell, V_wl;
1022 double tstep;
1023 double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
1024 double R_cell_pull_down=0.0, R_cell_acc =0.0, r_dev=0.0;
1025 int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
1043double Mat::compute_bitline_delay(double inrisetime) {
1044 double V_b_pre, v_th_mem_cell, V_wl;
1045 double tstep;
1046 double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
1047 double R_cell_pull_down = 0.0, R_cell_acc = 0.0, r_dev = 0.0;
1048 int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
1026
1049
1027 double R_b_metal = camFlag? cam_cell.h:cell.h * g_tp.wire_local.R_per_um;
1028 double R_bl = subarray.num_rows * R_b_metal;
1029 double C_bl = subarray.C_bl;
1050 double R_b_metal = camFlag ? cam_cell.h : cell.h * g_tp.wire_local.R_per_um;
1051 double R_bl = subarray.num_rows * R_b_metal;
1052 double C_bl = subarray.C_bl;
1030
1053
1031 // TODO: no leakage for DRAMs?
1032 double leak_power_cc_inverters_sram_cell = 0;
1033 double gate_leak_power_cc_inverters_sram_cell = 0;
1034 double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
1035 double leak_power_RD_port_sram_cell = 0;
1036 double gate_leak_power_RD_port_sram_cell = 0;
1054 // TODO: no leakage for DRAMs?
1055 double leak_power_cc_inverters_sram_cell = 0;
1056 double gate_leak_power_cc_inverters_sram_cell = 0;
1057 double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
1058 double leak_power_RD_port_sram_cell = 0;
1059 double gate_leak_power_RD_port_sram_cell = 0;
1037
1060
1038 if (is_dram == true)
1039 {
1040 V_b_pre = g_tp.dram.Vbitpre;
1041 v_th_mem_cell = g_tp.dram_acc.Vth;
1042 V_wl = g_tp.vpp;
1043 //The access transistor is not folded. So we just need to specify a threshold value for the
1044 //folding width that is equal to or greater than Wmemcella.
1045 R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
1046 r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
1047 }
1048 else
1049 { //SRAM
1050 V_b_pre = g_tp.sram.Vbitpre;
1051 v_th_mem_cell = g_tp.sram_cell.Vth;
1052 V_wl = g_tp.sram_cell.Vdd;
1053 R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
1054 R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
1061 if (is_dram == true) {
1062 V_b_pre = g_tp.dram.Vbitpre;
1063 v_th_mem_cell = g_tp.dram_acc.Vth;
1064 V_wl = g_tp.vpp;
1065 //The access transistor is not folded. So we just need to specify a
1066 // threshold value for the folding width that is equal to or greater
1067 // than Wmemcella.
1068 R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
1069 r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
1070 } else { //SRAM
1071 V_b_pre = g_tp.sram.Vbitpre;
1072 v_th_mem_cell = g_tp.sram_cell.Vth;
1073 V_wl = g_tp.sram_cell.Vdd;
1074 R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
1075 R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
1055
1076
1056 //Leakage current of an SRAM cell
1057 double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);//TODO: how much is the idle time? just by *2?
1058 double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,false, true);
1059 double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true)*2;//two invs per cell
1077 //Leakage current of an SRAM cell
1078 //TODO: how much is the idle time? just by *2?
1079 double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,
1080 false, true);
1081 double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,
1082 false, true);
1083 double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w,
1084 g_tp.sram.cell_pmos_w, 1, inv, false,
1085 true) * 2;//two invs per cell
1060
1086
1061 leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd;
1062 leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
1063 leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd;
1087 leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd;
1088 leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
1089 leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd;
1064
1065
1090
1091
1066 //in idle state, Ig_on only possibly exist in access transistors of read only ports
1067 double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);
1068 double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true);
1092 //in idle state, Ig_on only possibly exist in access transistors of read only ports
1093 double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,
1094 false, true);
1095 double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w,
1096 g_tp.sram.cell_pmos_w, 1, inv, false,
1097 true);
1069
1098
1070 gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd;
1071 gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
1072 }
1099 gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.sram_cell.Vdd;
1100 gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
1101 }
1073
1074
1102
1103
1075 double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram);
1076 double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
1077 double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
1078 double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
1079 double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
1080 drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1081 drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
1082 double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
1104 double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0,
1105 camFlag ? cam_cell.w : cell.w /
1106 (2 * (RWP + ERP + SCHP)), is_dram);
1107 double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
1108 double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0,
1109 camFlag ? cam_cell.w :
1110 cell.w * deg_bl_muxing /
1111 (RWP + ERP + SCHP), is_dram);
1112 double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
1113 double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0,
1114 is_dram) +
1115 drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag ? cam_cell.w :
1116 cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1117 drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ? cam_cell.w :
1118 cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
1119 double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
1120 camFlag ? cam_cell.w :
1121 cell.w * deg_bl_muxing /
1122 (RWP + ERP + SCHP), is_dram);
1083
1123
1084 if (is_dram)
1085 {
1086 double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl));
1087 tstep = 2.3 * fraction * r_dev *
1088 (g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) /
1089 (g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux));
1090 delay_writeback = tstep;
1091 dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
1092 (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
1093 dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) *
1094 (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100;
1095 per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
1096 (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
1097 }
1098 else
1099 {
1100 double tau;
1124 if (is_dram) {
1125 double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd / 2) *
1126 g_tp.dram_cell_C /
1127 (g_tp.dram_cell_C + C_bl));
1128 tstep = 2.3 * fraction * r_dev *
1129 (g_tp.dram_cell_C * (C_bl + 2 * C_drain_sense_amp_iso +
1130 C_sense_amp_latch + C_drain_sense_amp_mux)) /
1131 (g_tp.dram_cell_C + (C_bl + 2 * C_drain_sense_amp_iso +
1132 C_sense_amp_latch + C_drain_sense_amp_mux));
1133 delay_writeback = tstep;
1134 dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch +
1135 C_drain_sense_amp_mux) *
1136 (g_tp.dram_cell_Vdd / 2) *
1137 g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
1138 dynWriteEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch) *
1139 (g_tp.dram_cell_Vdd / 2) *
1140 g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ *
1141 num_act_mats_hor_dir * 100;
1142 per_bitline_read_energy = (C_bl + 2 * C_drain_sense_amp_iso +
1143 C_sense_amp_latch + C_drain_sense_amp_mux) *
1144 (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
1145 } else {
1146 double tau;
1101
1147
1102 if (deg_bl_muxing > 1)
1103 {
1104 tau = (R_cell_pull_down + R_cell_acc) *
1105 (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
1106 R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
1107 R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
1108 R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
1109 dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /*
1110 subarray.num_cols * num_subarrays_per_mat*/;
1111 dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
1112 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing);
1113 dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
1114 num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
1115 //Write Ops are differential for SRAM
1116 }
1117 else
1118 {
1119 tau = (R_cell_pull_down + R_cell_acc) *
1120 (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
1121 R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
1122 dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
1123 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
1124 dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
1125 num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
1148 if (deg_bl_muxing > 1) {
1149 tau = (R_cell_pull_down + R_cell_acc) *
1150 (C_bl + 2 * C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
1151 C_sense_amp_latch + C_drain_sense_amp_mux) +
1152 R_bl * (C_bl / 2 + 2 * C_drain_bit_mux + 2 *
1153 C_drain_sense_amp_iso + C_sense_amp_latch +
1154 C_drain_sense_amp_mux) +
1155 R_bit_mux * (C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
1156 C_sense_amp_latch + C_drain_sense_amp_mux) +
1157 R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch +
1158 C_drain_sense_amp_mux);
1159 dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense *
1160 g_tp.sram_cell.Vdd;
1161 dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch +
1162 C_drain_sense_amp_mux) *
1163 2 * dp.V_b_sense * g_tp.sram_cell.Vdd *
1164 (1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
1165 deg_bl_muxing);
1166 dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ /
1167 deg_bl_muxing) / deg_senseamp_muxing) *
1168 num_act_mats_hor_dir * (C_bl + 2 * C_drain_bit_mux) *
1169 g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
1170 //Write Ops are differential for SRAM
1171 } else {
1172 tau = (R_cell_pull_down + R_cell_acc) *
1173 (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
1174 R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
1175 dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
1176 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
1177 dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
1178 deg_bl_muxing) / deg_senseamp_muxing) *
1179 num_act_mats_hor_dir * C_bl) *
1180 g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
1126
1181
1182 }
1183 tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
1184 power_bitline.readOp.leakage =
1185 leak_power_cc_inverters_sram_cell +
1186 leak_power_acc_tr_RW_or_WR_port_sram_cell +
1187 leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
1188 leak_power_RD_port_sram_cell * ERP;
1189 power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
1190 gate_leak_power_RD_port_sram_cell * ERP;
1191
1127 }
1192 }
1128 tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
1129 power_bitline.readOp.leakage =
1130 leak_power_cc_inverters_sram_cell +
1131 leak_power_acc_tr_RW_or_WR_port_sram_cell +
1132 leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
1133 leak_power_RD_port_sram_cell * ERP;
1134 power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
1135 gate_leak_power_RD_port_sram_cell * ERP;
1136
1193
1137 }
1138
1139// cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl;
1140// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
1141// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
1142// cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl;
1143
1144
1194// cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl;
1195// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
1196// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
1197// cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl;
1198
1199
1145 /* take input rise time into account */
1146 double m = V_wl / inrisetime;
1147 if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m))
1148 {
1149 delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell)/ m);
1150 }
1151 else
1152 {
1153 delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
1154 }
1200 /* take input rise time into account */
1201 double m = V_wl / inrisetime;
1202 if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m)) {
1203 delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell) / m);
1204 } else {
1205 delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
1206 }
1155
1207
1156 bool is_fa = (dp.fully_assoc) ? true : false;
1208 bool is_fa = (dp.fully_assoc) ? true : false;
1157
1209
1158 if (dp.is_tag == false || is_fa == false)
1159 {
1160 power_bitline.readOp.dynamic = dynRdEnergy;
1161 power_bitline.writeOp.dynamic = dynWriteEnergy;
1162 }
1210 if (dp.is_tag == false || is_fa == false) {
1211 power_bitline.readOp.dynamic = dynRdEnergy;
1212 power_bitline.writeOp.dynamic = dynWriteEnergy;
1213 }
1163
1214
1164 double outrisetime = 0;
1165 return outrisetime;
1215 double outrisetime = 0;
1216 return outrisetime;
1166}
1167
1168
1169
1217}
1218
1219
1220
1170double Mat::compute_sa_delay(double inrisetime)
1171{
1172 //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
1221double Mat::compute_sa_delay(double inrisetime) {
1222 //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
1173
1223
1174 //Bitline circuitry leakage.
1175 double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram);
1176 double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
1177 double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
1178 double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
1224 //Bitline circuitry leakage.
1225 double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram);
1226 double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
1227 double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
1228 double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
1179
1229
1180 double lkgIdlePh = IsenseEn;//+ 2*IoBufP;
1181 //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
1182 double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
1183 //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
1184 // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
1185 double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
1186 leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
1187 leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
1230 double lkgIdlePh = IsenseEn;//+ 2*IoBufP;
1231 //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
1232 double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
1233 //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
1234 // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
1235 double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
1236 leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
1237 leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
1188
1238
1189 // sense amplifier has to drive logic in "data out driver" and sense precharge load.
1190 // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
1191 //constant as well as the magnitude of input differential voltage.
1192 double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
1193 drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1194 drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1195 drain_C_(g_tp.w_iso,PCH,1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1196 drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
1197 double tau = C_ld / g_tp.gm_sense_amp_latch;
1198 delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
1199 power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
1239 // sense amplifier has to drive logic in "data out driver" and sense precharge load.
1240 // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
1241 //constant as well as the magnitude of input differential voltage.
1242 double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
1243 drain_C_(g_tp.w_sense_n, NCH, 1, 0,
1244 camFlag ? cam_cell.w : cell.w * deg_bl_muxing /
1245 (RWP + ERP + SCHP), is_dram) +
1246 drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ?
1247 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
1248 is_dram) +
1249 drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag ?
1250 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
1251 is_dram) +
1252 drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
1253 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
1254 is_dram);
1255 double tau = C_ld / g_tp.gm_sense_amp_latch;
1256 delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
1257 power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
1200 num_subarrays_per_mat * num_act_mats_hor_dir*/;
1258 num_subarrays_per_mat * num_act_mats_hor_dir*/;
1201 power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
1259 power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
1202
1260
1203 double outrisetime = 0;
1204 return outrisetime;
1261 double outrisetime = 0;
1262 return outrisetime;
1205}
1206
1207
1208
1263}
1264
1265
1266
1209double Mat::compute_subarray_out_drv(double inrisetime)
1210{
1211 double C_ld, rd, tf, this_delay;
1212 double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
1267double Mat::compute_subarray_out_drv(double inrisetime) {
1268 double C_ld, rd, tf, this_delay;
1269 double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
1213
1270
1214 // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
1215 rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
1216 C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1217 gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
1218 tf = rd * C_ld;
1219 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1220 delay_subarray_out_drv += this_delay;
1221 inrisetime = this_delay/(1.0 - 0.5);
1222 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1223 power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
1224 power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
1225 // delay of signal through inverter-buffer to second level of sense-amp mux.
1226 // internal delay of buffer
1227 rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
1228 C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
1229 drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1230 gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
1231 tf = rd * C_ld;
1232 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1233 delay_subarray_out_drv += this_delay;
1234 inrisetime = this_delay/(1.0 - 0.5);
1235 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1236 power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram)* g_tp.peri_global.Vdd;
1237 power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
1271 // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
1272 rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
1273 C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
1274 camFlag ? cam_cell.w : cell.w *
1275 deg_bl_muxing / (RWP + ERP + SCHP),
1276 is_dram) +
1277 gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
1278 tf = rd * C_ld;
1279 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1280 delay_subarray_out_drv += this_delay;
1281 inrisetime = this_delay / (1.0 - 0.5);
1282 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1283 power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
1284 power_subarray_out_drv.readOp.gate_leakage +=
1285 cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
1286 // delay of signal through inverter-buffer to second level of sense-amp mux.
1287 // internal delay of buffer
1288 rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
1289 C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
1290 drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1291 gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
1292 tf = rd * C_ld;
1293 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1294 delay_subarray_out_drv += this_delay;
1295 inrisetime = this_delay / (1.0 - 0.5);
1296 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1297 power_subarray_out_drv.readOp.leakage +=
1298 cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
1299 inv, is_dram) * g_tp.peri_global.Vdd;
1300 power_subarray_out_drv.readOp.gate_leakage +=
1301 cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
1302 inv) * g_tp.peri_global.Vdd;
1238
1303
1239 // inverter driving drain of pass transistor of second level of sense-amp mux.
1240 rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
1241 C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
1242 drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1243 drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram);
1244 tf = rd * C_ld;
1245 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1246 delay_subarray_out_drv += this_delay;
1247 inrisetime = this_delay/(1.0 - 0.5);
1248 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1249 power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
1250 power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
1304 // inverter driving drain of pass transistor of second level of sense-amp mux.
1305 rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
1306 C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
1307 drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def,
1308 is_dram) +
1309 drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
1310 cam_cell.w : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 /
1311 (RWP + ERP + SCHP), is_dram);
1312 tf = rd * C_ld;
1313 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1314 delay_subarray_out_drv += this_delay;
1315 inrisetime = this_delay / (1.0 - 0.5);
1316 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1317 power_subarray_out_drv.readOp.leakage +=
1318 cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
1319 inv) * g_tp.peri_global.Vdd;
1320 power_subarray_out_drv.readOp.gate_leakage +=
1321 cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
1322 inv) * g_tp.peri_global.Vdd;
1251
1252
1323
1324
1253 // delay of signal through pass-transistor to input of subarray output driver.
1254 rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
1255 C_ld = dp.Ndsam_lev_2 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram) +
1256 //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
1257 gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
1258 tf = rd * C_ld;
1259 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1260 delay_subarray_out_drv += this_delay;
1261 inrisetime = this_delay/(1.0 - 0.5);
1262 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1263 power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
1264 power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
1325 // delay of signal through pass-transistor to input of subarray output driver.
1326 rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
1327 C_ld = dp.Ndsam_lev_2 *
1328 drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? cam_cell.w :
1329 cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP),
1330 is_dram) +
1331 //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
1332 gate_C(subarray_out_wire->repeater_size *
1333 (subarray_out_wire->wire_length /
1334 subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ *
1335 (1 + p_to_n_sz_r), 0.0, is_dram);
1336 tf = rd * C_ld;
1337 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1338 delay_subarray_out_drv += this_delay;
1339 inrisetime = this_delay / (1.0 - 0.5);
1340 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1341 power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
1342 power_subarray_out_drv.readOp.gate_leakage +=
1343 cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
1265
1266
1344
1345
1267 return inrisetime;
1346 return inrisetime;
1268}
1269
1270
1271
1347}
1348
1349
1350
1272double Mat::compute_comparator_delay(double inrisetime)
1273{
1274 int A = g_ip->tag_assoc;
1351double Mat::compute_comparator_delay(double inrisetime) {
1352 int A = g_ip->tag_assoc;
1275
1353
1276 int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
1277 // a multiple of 4.
1354 int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
1355 // a multiple of 4.
1278
1356
1279 /* First Inverter */
1280 double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) +
1281 drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1282 drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1283 double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
1284 double tf = Req*Ceq;
1285 double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL);
1286 double nextinputtime = st1del/VTHCOMPINV;
1287 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1357 /* First Inverter */
1358 double Ceq = gate_C(g_tp.w_comp_inv_n2 + g_tp.w_comp_inv_p2, 0, is_dram) +
1359 drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1360 drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1361 double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
1362 double tf = Req * Ceq;
1363 double st1del = horowitz(inrisetime, tf, VTHCOMPINV, VTHCOMPINV, FALL);
1364 double nextinputtime = st1del / VTHCOMPINV;
1365 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1288
1366
1289 //For each degree of associativity
1290 //there are 4 such quarter comparators
1291 double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
1292 double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
1293 /* Second Inverter */
1294 Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) +
1295 drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1296 drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1297 Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
1298 tf = Req*Ceq;
1299 double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE);
1300 nextinputtime = st2del/(1.0-VTHCOMPINV);
1301 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1302 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
1303 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
1367 //For each degree of associativity
1368 //there are 4 such quarter comparators
1369 double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1,
1370 g_tp.w_comp_inv_p1, 1, inv,
1371 is_dram) * 4 * A;
1372 double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1,
1373 g_tp.w_comp_inv_p1, 1, inv,
1374 is_dram) * 4 * A;
1375 /* Second Inverter */
1376 Ceq = gate_C(g_tp.w_comp_inv_n3 + g_tp.w_comp_inv_p3, 0, is_dram) +
1377 drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1378 drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1379 Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
1380 tf = Req * Ceq;
1381 double st2del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHCOMPINV, RISE);
1382 nextinputtime = st2del / (1.0 - VTHCOMPINV);
1383 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1384 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
1385 inv, is_dram) * 4 * A;
1386 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
1387 inv, is_dram) * 4 * A;
1304
1388
1305 /* Third Inverter */
1306 Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) +
1307 drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1308 drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1309 Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
1310 tf = Req*Ceq;
1311 double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL);
1312 nextinputtime = st3del/(VTHEVALINV);
1313 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1314 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
1315 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
1389 /* Third Inverter */
1390 Ceq = gate_C(g_tp.w_eval_inv_n + g_tp.w_eval_inv_p, 0, is_dram) +
1391 drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1392 drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1393 Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
1394 tf = Req * Ceq;
1395 double st3del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHEVALINV, FALL);
1396 nextinputtime = st3del / (VTHEVALINV);
1397 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1398 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1,
1399 inv, is_dram) * 4 * A;
1400 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3,
1401 1, inv, is_dram) * 4 * A;
1316
1402
1317 /* Final Inverter (virtual ground driver) discharging compare part */
1318 double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram);
1319 double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */
1320 double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
1321 drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
1322 drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
1323 drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram);
1324 double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
1325 drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
1326 drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
1327 gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram);
1328 power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1329 power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1);
1330 lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
1331 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2
1403 /* Final Inverter (virtual ground driver) discharging compare part */
1404 double r1 = tr_R_on(g_tp.w_comp_n, NCH, 2, is_dram);
1405 double r2 = tr_R_on(g_tp.w_eval_inv_n, NCH, 1, is_dram); /* was switch */
1406 double c2 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
1407 g_tp.cell_h_def, is_dram) +
1408 drain_C_(g_tp.w_comp_n, NCH, 2, 1,
1409 g_tp.cell_h_def, is_dram)) +
1410 drain_C_(g_tp.w_eval_inv_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1411 drain_C_(g_tp.w_eval_inv_n, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1412 double c1 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
1413 g_tp.cell_h_def, is_dram) +
1414 drain_C_(g_tp.w_comp_n, NCH, 2, 1,
1415 g_tp.cell_h_def, is_dram)) +
1416 drain_C_(g_tp.w_comp_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1417 gate_C(WmuxdrvNANDn + WmuxdrvNANDp, 0, is_dram);
1418 power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1419 power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1);
1420 lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
1421 inv, is_dram) * 4 * A;
1422 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
1423 is_dram) * 4 * A; // stack factor of 0.2
1332
1424
1333 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
1334 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter
1425 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
1426 inv, is_dram) * 4 * A;
1427 //for gate leakage this equals to a inverter
1428 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
1429 is_dram) * 4 * A;
1335
1430
1336 /* time to go to threshold of mux driver */
1337 double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND);
1338 /* take into account non-zero input rise time */
1339 double m = g_tp.peri_global.Vdd/nextinputtime;
1340 double Tcomparatorni;
1431 /* time to go to threshold of mux driver */
1432 double tstep = (r2 * c2 + (r1 + r2) * c1) * log(1.0 / VTHMUXNAND);
1433 /* take into account non-zero input rise time */
1434 double m = g_tp.peri_global.Vdd / nextinputtime;
1435 double Tcomparatorni;
1341
1436
1342 if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m))
1343 {
1344 double a = m;
1345 double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
1346 double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
1347 Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a);
1348 }
1349 else
1350 {
1351 Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m;
1352 }
1353 delay_comparator = Tcomparatorni+st1del+st2del+st3del;
1354 power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
1355 power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
1437 if ((tstep) <= (0.5*(g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / m)) {
1438 double a = m;
1439 double b = 2 * ((g_tp.peri_global.Vdd * VTHEVALINV) -
1440 g_tp.peri_global.Vth);
1441 double c = -2 * (tstep) * (g_tp.peri_global.Vdd -
1442 g_tp.peri_global.Vth) + 1 / m *
1443 ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) *
1444 ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth);
1445 Tcomparatorni = (-b + sqrt(b * b - 4 * a * c)) / (2 * a);
1446 } else {
1447 Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd +
1448 g_tp.peri_global.Vth) / (2 * m) -
1449 (g_tp.peri_global.Vdd * VTHEVALINV) / m;
1450 }
1451 delay_comparator = Tcomparatorni + st1del + st2del + st3del;
1452 power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
1453 power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
1356
1454
1357 return Tcomparatorni / (1.0 - VTHMUXNAND);;
1455 return Tcomparatorni / (1.0 - VTHMUXNAND);;
1358}
1359
1360
1361
1456}
1457
1458
1459
1362void Mat::compute_power_energy()
1363{
1364 //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
1460void Mat::compute_power_energy() {
1461 //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
1365 //when search all subarrays and all mats are fully active
1462 //when search all subarrays and all mats are fully active
1366 //when plain read/write only one subarray in a single mat is active.
1463 //when plain read/write only one subarray in a single mat is active.
1367
1368 // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat.
1464
1465 // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat.
1369 power.readOp.dynamic += r_predec->power.readOp.dynamic +
1370 b_mux_predec->power.readOp.dynamic +
1371 sa_mux_lev_1_predec->power.readOp.dynamic +
1372 sa_mux_lev_2_predec->power.readOp.dynamic;
1466 power.readOp.dynamic += r_predec->power.readOp.dynamic +
1467 b_mux_predec->power.readOp.dynamic +
1468 sa_mux_lev_1_predec->power.readOp.dynamic +
1469 sa_mux_lev_2_predec->power.readOp.dynamic;
1373
1470
1374 // add energy consumed in decoders
1375 power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic;
1376 if (!(is_fa||pure_cam))
1377 power_row_decoders.readOp.dynamic *= num_subarrays_per_mat;
1471 // add energy consumed in decoders
1472 power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic;
1473 if (!(is_fa || pure_cam))
1474 power_row_decoders.readOp.dynamic *= num_subarrays_per_mat;
1378
1475
1379 // add energy consumed in bitline prechagers, SAs, and bitlines
1380 if (!(is_fa||pure_cam))
1381 {
1382 // add energy consumed in bitline prechagers
1383 power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
1384 power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
1476 // add energy consumed in bitline prechagers, SAs, and bitlines
1477 if (!(is_fa || pure_cam)) {
1478 // add energy consumed in bitline prechagers
1479 power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
1480 power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
1385
1481
1386 //Add sense amps energy
1387 num_sa_subarray = subarray.num_cols / deg_bl_muxing;
1388 power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ;
1482 //Add sense amps energy
1483 num_sa_subarray = subarray.num_cols / deg_bl_muxing;
1484 power_sa.readOp.dynamic *= num_sa_subarray * num_subarrays_per_mat ;
1389
1485
1390 // add energy consumed in bitlines
1391 //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
1392 power_bitline.readOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
1393 power_bitline.writeOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
1394 //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
1395 //Add subarray output energy
1396 power_subarray_out_drv.readOp.dynamic =
1397 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1486 // add energy consumed in bitlines
1487 //cout<<"bitline power"<1488 power_bitline.readOp.dynamic *= num_subarrays_per_mat *
1489 subarray.num_cols;
1490 power_bitline.writeOp.dynamic *= num_subarrays_per_mat *
1491 subarray.num_cols;
1492 //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
1493 //Add subarray output energy
1494 power_subarray_out_drv.readOp.dynamic =
1495 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1398
1496
1399 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1400 power_sa.readOp.dynamic +
1401 power_bitline.readOp.dynamic +
1402 power_subarray_out_drv.readOp.dynamic;
1497 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1498 power_sa.readOp.dynamic +
1499 power_bitline.readOp.dynamic +
1500 power_subarray_out_drv.readOp.dynamic;
1403
1501
1404 power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1405 bit_mux_dec->power.readOp.dynamic +
1406 sa_mux_lev_1_dec->power.readOp.dynamic +
1407 sa_mux_lev_2_dec->power.readOp.dynamic +
1408 power_comparator.readOp.dynamic;
1409 }
1502 power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1503 bit_mux_dec->power.readOp.dynamic +
1504 sa_mux_lev_1_dec->power.readOp.dynamic +
1505 sa_mux_lev_2_dec->power.readOp.dynamic +
1506 power_comparator.readOp.dynamic;
1507 }
1410
1508
1411 else if (is_fa)
1412 {
1413 //for plain read/write only one subarray in a mat is active
1414 // add energy consumed in bitline prechagers
1415 power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
1416 + cam_bl_precharge_eq_drv->power.readOp.dynamic;
1417 power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
1509 else if (is_fa) {
1510 //for plain read/write only one subarray in a mat is active
1511 // add energy consumed in bitline prechagers
1512 power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
1513 + cam_bl_precharge_eq_drv->power.readOp.dynamic;
1514 power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
1418
1515
1419 //Add sense amps energy
1420 num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing;
1421 num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing;
1422 power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search;
1423 power_sa.readOp.dynamic *= num_sa_subarray;
1516 //Add sense amps energy
1517 num_sa_subarray = (subarray.num_cols_fa_cam +
1518 subarray.num_cols_fa_ram) / deg_bl_muxing;
1519 num_sa_subarray_search = subarray.num_cols_fa_ram / deg_bl_muxing;
1520 power_sa.searchOp.dynamic = power_sa.readOp.dynamic *
1521 num_sa_subarray_search;
1522 power_sa.readOp.dynamic *= num_sa_subarray;
1424
1425
1523
1524
1426 // add energy consumed in bitlines
1427 power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
1428 power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
1429 power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
1430 power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
1525 // add energy consumed in bitlines
1526 power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
1527 power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam +
1528 subarray.num_cols_fa_ram);
1529 power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam +
1530 subarray.num_cols_fa_ram);
1531 power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
1431
1532
1432 //Add subarray output energy
1433 power_subarray_out_drv.searchOp.dynamic =
1434 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
1435 power_subarray_out_drv.readOp.dynamic =
1436 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1533 //Add subarray output energy
1534 power_subarray_out_drv.searchOp.dynamic =
1535 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
1536 power_subarray_out_drv.readOp.dynamic =
1537 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1437
1438
1538
1539
1439 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1440 power_sa.readOp.dynamic +
1441 power_bitline.readOp.dynamic +
1442 power_subarray_out_drv.readOp.dynamic;
1540 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1541 power_sa.readOp.dynamic +
1542 power_bitline.readOp.dynamic +
1543 power_subarray_out_drv.readOp.dynamic;
1443
1544
1444 power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1445 bit_mux_dec->power.readOp.dynamic +
1446 sa_mux_lev_1_dec->power.readOp.dynamic +
1447 sa_mux_lev_2_dec->power.readOp.dynamic +
1448 power_comparator.readOp.dynamic;
1545 power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1546 bit_mux_dec->power.readOp.dynamic +
1547 sa_mux_lev_1_dec->power.readOp.dynamic +
1548 sa_mux_lev_2_dec->power.readOp.dynamic +
1549 power_comparator.readOp.dynamic;
1449
1550
1450 //add energy consumed inside cam
1451 power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
1452 power_searchline_precharge = sl_precharge_eq_drv->power;
1453 power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
1454 power_searchline = sl_data_drv->power;
1455 power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
1456 power_matchline_precharge = ml_precharge_drv->power;
1457 power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
1458 power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
1459 power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
1551 //add energy consumed inside cam
1552 power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
1553 power_searchline_precharge = sl_precharge_eq_drv->power;
1554 power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
1555 power_searchline = sl_data_drv->power;
1556 power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
1557 subarray.num_cols_fa_cam * num_subarrays_per_mat;;
1558 power_matchline_precharge = ml_precharge_drv->power;
1559 power_matchline_precharge.searchOp.dynamic =
1560 power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
1561 power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
1562 power_ml_to_ram_wl_drv.searchOp.dynamic =
1563 ml_to_ram_wl_drv->power.readOp.dynamic;
1460
1564
1461 power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
1462 power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
1463 power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
1464 power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
1565 power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
1566 power_cam_all_active.searchOp.dynamic +=
1567 power_searchline_precharge.searchOp.dynamic;
1568 power_cam_all_active.searchOp.dynamic +=
1569 power_searchline.searchOp.dynamic;
1570 power_cam_all_active.searchOp.dynamic +=
1571 power_matchline_precharge.searchOp.dynamic;
1465
1572
1466 power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
1467 //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
1573 power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
1574 //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
1468
1575
1469 }
1470 else
1471 {
1472 // add energy consumed in bitline prechagers
1473 power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
1474 //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
1475 //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
1476 //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
1576 } else {
1577 // add energy consumed in bitline prechagers
1578 power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
1579 //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
1580 //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
1581 //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
1477
1582
1478 //Add sense amps energy
1479 num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing;
1480 power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
1481 power_sa.searchOp.dynamic = 0;
1583 //Add sense amps energy
1584 num_sa_subarray = subarray.num_cols_fa_cam / deg_bl_muxing;
1585 power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
1586 power_sa.searchOp.dynamic = 0;
1482
1587
1483 power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
1484 power_bitline.searchOp.dynamic = 0;
1485 power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
1588 power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
1589 power_bitline.searchOp.dynamic = 0;
1590 power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
1486
1591
1487 power_subarray_out_drv.searchOp.dynamic =
1488 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
1489 power_subarray_out_drv.readOp.dynamic =
1490 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1592 power_subarray_out_drv.searchOp.dynamic =
1593 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
1594 power_subarray_out_drv.readOp.dynamic =
1595 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1491
1596
1492 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1493 power_sa.readOp.dynamic +
1494 power_bitline.readOp.dynamic +
1495 power_subarray_out_drv.readOp.dynamic;
1597 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1598 power_sa.readOp.dynamic +
1599 power_bitline.readOp.dynamic +
1600 power_subarray_out_drv.readOp.dynamic;
1496
1601
1497 power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1498 bit_mux_dec->power.readOp.dynamic +
1499 sa_mux_lev_1_dec->power.readOp.dynamic +
1500 sa_mux_lev_2_dec->power.readOp.dynamic +
1501 power_comparator.readOp.dynamic;
1602 power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1603 bit_mux_dec->power.readOp.dynamic +
1604 sa_mux_lev_1_dec->power.readOp.dynamic +
1605 sa_mux_lev_2_dec->power.readOp.dynamic +
1606 power_comparator.readOp.dynamic;
1502
1503
1607
1608
1504 ////add energy consumed inside cam
1505 power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
1506 power_searchline_precharge = sl_precharge_eq_drv->power;
1507 power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
1508 power_searchline = sl_data_drv->power;
1509 power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
1510 power_matchline_precharge = ml_precharge_drv->power;
1511 power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
1512 power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
1513 power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
1609 ////add energy consumed inside cam
1610 power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
1611 power_searchline_precharge = sl_precharge_eq_drv->power;
1612 power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
1613 power_searchline = sl_data_drv->power;
1614 power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
1615 subarray.num_cols_fa_cam * num_subarrays_per_mat;;
1616 power_matchline_precharge = ml_precharge_drv->power;
1617 power_matchline_precharge.searchOp.dynamic =
1618 power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
1619 power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
1620 power_ml_to_ram_wl_drv.searchOp.dynamic =
1621 ml_to_ram_wl_drv->power.readOp.dynamic;
1514
1622
1515 power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
1516 power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
1517 power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
1518 power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
1623 power_cam_all_active.searchOp.dynamic =
1624 power_matchline.searchOp.dynamic;
1625 power_cam_all_active.searchOp.dynamic +=
1626 power_searchline_precharge.searchOp.dynamic;
1627 power_cam_all_active.searchOp.dynamic +=
1628 power_searchline.searchOp.dynamic;
1629 power_cam_all_active.searchOp.dynamic +=
1630 power_matchline_precharge.searchOp.dynamic;
1519
1631
1520 power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
1521 //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
1632 power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
1633 //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
1522
1634
1523 }
1635 }
1524
1525
1526
1636
1637
1638
1527 // calculate leakage power
1528 if (!(is_fa || pure_cam))
1529 {
1639 // calculate leakage power
1640 if (!(is_fa || pure_cam)) {
1530 int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1531
1532 power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1641 int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1642
1643 power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1533 power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1534 power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
1644 power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1645 power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
1646 (RWP + ERP);
1535
1647
1536 //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
1537 power_subarray_out_drv.readOp.leakage =
1538 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1539 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
1648 //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
1649 power_subarray_out_drv.readOp.leakage =
1650 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1651 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
1540
1652
1541 power.readOp.leakage += power_bitline.readOp.leakage +
1542 power_bl_precharge_eq_drv.readOp.leakage +
1543 power_sa.readOp.leakage +
1544 power_subarray_out_drv.readOp.leakage;
1545 //cout<<"leakage"<
1653 power.readOp.leakage += power_bitline.readOp.leakage +
1654 power_bl_precharge_eq_drv.readOp.leakage +
1655 power_sa.readOp.leakage +
1656 power_subarray_out_drv.readOp.leakage;
1657 //cout<<"leakage"<<power.readOp.leakage<<endl;
1546
1658
1547 power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
1548 power.readOp.leakage += power_comparator.readOp.leakage;
1659 power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
1660 power.readOp.leakage += power_comparator.readOp.leakage;
1549
1661
1550 //cout<<"leakage1"<1662 //cout<<"leakage1"<<power.readOp.leakage<<endl;
1551
1663
1552 // leakage power
1553 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
1554 power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
1555 power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
1556 power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
1664 // leakage power
1665 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
1666 power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
1667 power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
1668 power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
1557
1669
1558 power.readOp.leakage += r_predec->power.readOp.leakage +
1559 b_mux_predec->power.readOp.leakage +
1560 sa_mux_lev_1_predec->power.readOp.leakage +
1561 sa_mux_lev_2_predec->power.readOp.leakage +
1562 power_row_decoders.readOp.leakage +
1563 power_bit_mux_decoders.readOp.leakage +
1564 power_sa_mux_lev_1_decoders.readOp.leakage +
1565 power_sa_mux_lev_2_decoders.readOp.leakage;
1566 //cout<<"leakage2"<
1670 power.readOp.leakage += r_predec->power.readOp.leakage +
1671 b_mux_predec->power.readOp.leakage +
1672 sa_mux_lev_1_predec->power.readOp.leakage +
1673 sa_mux_lev_2_predec->power.readOp.leakage +
1674 power_row_decoders.readOp.leakage +
1675 power_bit_mux_decoders.readOp.leakage +
1676 power_sa_mux_lev_1_decoders.readOp.leakage +
1677 power_sa_mux_lev_2_decoders.readOp.leakage;
1678 //cout<<"leakage2"<<power.readOp.leakage<<endl;
1567
1679
1568 //++++Below is gate leakage
1680 //++++Below is gate leakage
1569 power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1681 power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1570 power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1571 power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
1682 power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1683 power_sa.readOp.gate_leakage *= num_sa_subarray *
1684 num_subarrays_per_mat * (RWP + ERP);
1572
1685
1573 //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
1574 power_subarray_out_drv.readOp.gate_leakage =
1575 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1576 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
1686 //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
1687 power_subarray_out_drv.readOp.gate_leakage =
1688 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1689 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
1577
1690
1578 power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
1579 power_bl_precharge_eq_drv.readOp.gate_leakage +
1580 power_sa.readOp.gate_leakage +
1581 power_subarray_out_drv.readOp.gate_leakage;
1582 //cout<<"leakage"<
1691 power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
1692 power_bl_precharge_eq_drv.readOp.gate_leakage +
1693 power_sa.readOp.gate_leakage +
1694 power_subarray_out_drv.readOp.gate_leakage;
1695 //cout<<"leakage"<<power.readOp.leakage<<endl;
1583
1696
1584 power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
1585 power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
1697 power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
1698 power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
1586
1699
1587 //cout<<"leakage1"<1700 //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
1588
1701
1589 // gate_leakage power
1590 power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
1591 power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
1592 power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
1593 power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
1702 // gate_leakage power
1703 power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
1704 power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
1705 power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
1706 power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
1594
1707
1595 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1596 b_mux_predec->power.readOp.gate_leakage +
1597 sa_mux_lev_1_predec->power.readOp.gate_leakage +
1598 sa_mux_lev_2_predec->power.readOp.gate_leakage +
1599 power_row_decoders.readOp.gate_leakage +
1600 power_bit_mux_decoders.readOp.gate_leakage +
1601 power_sa_mux_lev_1_decoders.readOp.gate_leakage +
1602 power_sa_mux_lev_2_decoders.readOp.gate_leakage;
1603 }
1604 else if (is_fa)
1605 {
1606 int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1708 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1709 b_mux_predec->power.readOp.gate_leakage +
1710 sa_mux_lev_1_predec->power.readOp.gate_leakage +
1711 sa_mux_lev_2_predec->power.readOp.gate_leakage +
1712 power_row_decoders.readOp.gate_leakage +
1713 power_bit_mux_decoders.readOp.gate_leakage +
1714 power_sa_mux_lev_1_decoders.readOp.gate_leakage +
1715 power_sa_mux_lev_2_decoders.readOp.gate_leakage;
1716 } else if (is_fa) {
1717 int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1607
1718
1608 power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1609 power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1610 power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1611 power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
1719 power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1720 power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1721 power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1722 power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
1723 (RWP + ERP + SCHP);
1612
1724
1613 //cout<<"leakage3"<<power.readOp.leakage<<endl;
1725 //cout<<"leakage3"<
1614
1615
1726
1727
1616 power_subarray_out_drv.readOp.leakage =
1617 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1618 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1728 power_subarray_out_drv.readOp.leakage =
1729 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1730 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1619
1731
1620 power.readOp.leakage += power_bitline.readOp.leakage +
1621 power_bl_precharge_eq_drv.readOp.leakage +
1622 power_bl_precharge_eq_drv.searchOp.leakage +
1623 power_sa.readOp.leakage +
1624 power_subarray_out_drv.readOp.leakage;
1732 power.readOp.leakage += power_bitline.readOp.leakage +
1733 power_bl_precharge_eq_drv.readOp.leakage +
1734 power_bl_precharge_eq_drv.searchOp.leakage +
1735 power_sa.readOp.leakage +
1736 power_subarray_out_drv.readOp.leakage;
1625
1737
1626 //cout<<"leakage4"<<power.readOp.leakage<<endl;
1738 //cout<<"leakage4"<
1627
1739
1628 // leakage power
1629 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
1630 power.readOp.leakage += r_predec->power.readOp.leakage +
1631 power_row_decoders.readOp.leakage;
1740 // leakage power
1741 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
1742 power.readOp.leakage += r_predec->power.readOp.leakage +
1743 power_row_decoders.readOp.leakage;
1632
1744
1633 //cout<<"leakage5"<<power.readOp.leakage<<endl;
1745 //cout<<"leakage5"<
1634
1746
1635 //inside cam
1636 power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
1637 power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
1638 power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
1639 power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
1640 power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
1747 //inside cam
1748 power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
1749 power_cam_all_active.searchOp.leakage +=
1750 sl_precharge_eq_drv->power.readOp.leakage;
1751 power_cam_all_active.searchOp.leakage +=
1752 sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
1753 power_cam_all_active.searchOp.leakage +=
1754 ml_precharge_drv->power.readOp.dynamic;
1755 power_cam_all_active.searchOp.leakage *=
1756 num_subarrays_per_mat;
1641
1757
1642 power.readOp.leakage += power_cam_all_active.searchOp.leakage;
1758 power.readOp.leakage += power_cam_all_active.searchOp.leakage;
1643
1644// cout<<"leakage6"<<power.readOp.leakage<<endl;
1645
1759
1760// cout<<"leakage6"<<power.readOp.leakage<<endl;
1761
1646 //+++Below is gate leakage
1647 power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1648 power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1649 power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1650 power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
1762 //+++Below is gate leakage
1763 power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1764 power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1765 power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1766 power_sa.readOp.gate_leakage *= num_sa_subarray *
1767 num_subarrays_per_mat * (RWP + ERP + SCHP);
1651
1768
1652 //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
1769 //cout<<"leakage3"<
1653
1654
1770
1771
1655 power_subarray_out_drv.readOp.gate_leakage =
1656 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1657 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1772 power_subarray_out_drv.readOp.gate_leakage =
1773 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1774 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1658
1775
1659 power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
1660 power_bl_precharge_eq_drv.readOp.gate_leakage +
1661 power_bl_precharge_eq_drv.searchOp.gate_leakage +
1662 power_sa.readOp.gate_leakage +
1663 power_subarray_out_drv.readOp.gate_leakage;
1776 power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
1777 power_bl_precharge_eq_drv.readOp.gate_leakage +
1778 power_bl_precharge_eq_drv.searchOp.gate_leakage +
1779 power_sa.readOp.gate_leakage +
1780 power_subarray_out_drv.readOp.gate_leakage;
1664
1781
1665 //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
1782 //cout<<"leakage4"<
1666
1783
1667 // gate_leakage power
1668 power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
1669 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1670 power_row_decoders.readOp.gate_leakage;
1784 // gate_leakage power
1785 power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
1786 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1787 power_row_decoders.readOp.gate_leakage;
1671
1788
1672 //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
1789 //cout<<"leakage5"<
1673
1790
1674 //inside cam
1675 power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
1676 power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
1677 power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
1678 power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
1679 power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
1791 //inside cam
1792 power_cam_all_active.searchOp.gate_leakage =
1793 power_matchline.searchOp.gate_leakage;
1794 power_cam_all_active.searchOp.gate_leakage +=
1795 sl_precharge_eq_drv->power.readOp.gate_leakage;
1796 power_cam_all_active.searchOp.gate_leakage +=
1797 sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
1798 power_cam_all_active.searchOp.gate_leakage +=
1799 ml_precharge_drv->power.readOp.dynamic;
1800 power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
1680
1801
1681 power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
1802 power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
1682
1803
1683 }
1684 else
1685 {
1686 int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1804 } else {
1805 int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1687
1806
1688 //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1689 //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1690 power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1691 power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
1807 //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1808 //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1809 power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1810 power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
1811 (RWP + ERP + SCHP);
1692
1693
1812
1813
1694 power_subarray_out_drv.readOp.leakage =
1695 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1696 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1814 power_subarray_out_drv.readOp.leakage =
1815 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1816 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1697
1817
1698 power.readOp.leakage += //power_bitline.readOp.leakage +
1699 //power_bl_precharge_eq_drv.readOp.leakage +
1700 power_bl_precharge_eq_drv.searchOp.leakage +
1701 power_sa.readOp.leakage +
1702 power_subarray_out_drv.readOp.leakage;
1818 power.readOp.leakage += //power_bitline.readOp.leakage +
1819 //power_bl_precharge_eq_drv.readOp.leakage +
1820 power_bl_precharge_eq_drv.searchOp.leakage +
1821 power_sa.readOp.leakage +
1822 power_subarray_out_drv.readOp.leakage;
1703
1823
1704 // leakage power
1705 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
1706 power.readOp.leakage += r_predec->power.readOp.leakage +
1707 power_row_decoders.readOp.leakage;
1824 // leakage power
1825 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage *
1826 subarray.num_rows * num_subarrays_per_mat * (RWP + ERP + EWP);
1827 power.readOp.leakage += r_predec->power.readOp.leakage +
1828 power_row_decoders.readOp.leakage;
1708
1829
1709 //inside cam
1710 power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
1711 power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
1712 power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
1713 power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
1714 power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
1830 //inside cam
1831 power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
1832 power_cam_all_active.searchOp.leakage +=
1833 sl_precharge_eq_drv->power.readOp.leakage;
1834 power_cam_all_active.searchOp.leakage +=
1835 sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
1836 power_cam_all_active.searchOp.leakage +=
1837 ml_precharge_drv->power.readOp.dynamic;
1838 power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
1715
1839
1716 power.readOp.leakage += power_cam_all_active.searchOp.leakage;
1840 power.readOp.leakage += power_cam_all_active.searchOp.leakage;
1717
1841
1718 //+++Below is gate leakage
1719 power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1720 power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
1842 //+++Below is gate leakage
1843 power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1844 power_sa.readOp.gate_leakage *= num_sa_subarray *
1845 num_subarrays_per_mat * (RWP + ERP + SCHP);
1721
1722
1846
1847
1723 power_subarray_out_drv.readOp.gate_leakage =
1724 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1725 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1848 power_subarray_out_drv.readOp.gate_leakage =
1849 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1850 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1726
1851
1727 power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
1728 //power_bl_precharge_eq_drv.readOp.gate_leakage +
1729 power_bl_precharge_eq_drv.searchOp.gate_leakage +
1730 power_sa.readOp.gate_leakage +
1731 power_subarray_out_drv.readOp.gate_leakage;
1852 power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
1853 //power_bl_precharge_eq_drv.readOp.gate_leakage +
1854 power_bl_precharge_eq_drv.searchOp.gate_leakage +
1855 power_sa.readOp.gate_leakage +
1856 power_subarray_out_drv.readOp.gate_leakage;
1732
1857
1733 // gate_leakage power
1734 power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
1735 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1736 power_row_decoders.readOp.gate_leakage;
1858 // gate_leakage power
1859 power_row_decoders.readOp.gate_leakage =
1860 row_dec->power.readOp.gate_leakage * subarray.num_rows *
1861 num_subarrays_per_mat * (RWP + ERP + EWP);
1862 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1863 power_row_decoders.readOp.gate_leakage;
1737
1864
1738 //inside cam
1739 power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
1740 power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
1741 power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
1742 power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
1743 power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
1865 //inside cam
1866 power_cam_all_active.searchOp.gate_leakage =
1867 power_matchline.searchOp.gate_leakage;
1868 power_cam_all_active.searchOp.gate_leakage +=
1869 sl_precharge_eq_drv->power.readOp.gate_leakage;
1870 power_cam_all_active.searchOp.gate_leakage +=
1871 sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
1872 power_cam_all_active.searchOp.gate_leakage +=
1873 ml_precharge_drv->power.readOp.dynamic;
1874 power_cam_all_active.searchOp.gate_leakage *=
1875 num_subarrays_per_mat;
1744
1876
1745 power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
1746 }
1877 power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
1878 }
1747}
1748
1879}
1880