decoder.cc (10152:52c552138ba1) decoder.cc (10234:5cb711fa6176)
1/*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
1/*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
5 * All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the

--- 7 unchanged lines hidden (view full) ---

20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
6 * All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the

--- 7 unchanged lines hidden (view full) ---

21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 ***************************************************************************/
31
32
33
34#include <cassert>
35#include <cmath>
36#include <iostream>

--- 9 unchanged lines hidden (view full) ---

46 int _num_dec_signals,
47 bool flag_way_select,
48 double _C_ld_dec_out,
49 double _R_wire_dec_out,
50 bool fully_assoc_,
51 bool is_dram_,
52 bool is_wl_tr_,
53 const Area & cell_)
30 *
31 ***************************************************************************/
32
33
34
35#include <cassert>
36#include <cmath>
37#include <iostream>

--- 9 unchanged lines hidden (view full) ---

47 int _num_dec_signals,
48 bool flag_way_select,
49 double _C_ld_dec_out,
50 double _R_wire_dec_out,
51 bool fully_assoc_,
52 bool is_dram_,
53 bool is_wl_tr_,
54 const Area & cell_)
54:exist(false),
55 C_ld_dec_out(_C_ld_dec_out),
56 R_wire_dec_out(_R_wire_dec_out),
57 num_gates(0), num_gates_min(2),
58 delay(0),
59 //power(),
60 fully_assoc(fully_assoc_), is_dram(is_dram_),
61 is_wl_tr(is_wl_tr_), cell(cell_)
62{
55 : exist(false),
56 C_ld_dec_out(_C_ld_dec_out),
57 R_wire_dec_out(_R_wire_dec_out),
58 num_gates(0), num_gates_min(2),
59 delay(0),
60 //power(),
61 fully_assoc(fully_assoc_), is_dram(is_dram_),
62 is_wl_tr(is_wl_tr_), cell(cell_) {
63
63
64 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
65 {
66 w_dec_n[i] = 0;
67 w_dec_p[i] = 0;
68 }
64 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
65 w_dec_n[i] = 0;
66 w_dec_p[i] = 0;
67 }
69
68
70 /*
71 * _num_dec_signals is the number of decoded signal as output
72 * num_addr_bits_dec is the number of signal to be decoded
73 * as the decoders input.
74 */
75 int num_addr_bits_dec = _log2(_num_dec_signals);
69 /*
70 * _num_dec_signals is the number of decoded signal as output
71 * num_addr_bits_dec is the number of signal to be decoded
72 * as the decoders input.
73 */
74 int num_addr_bits_dec = _log2(_num_dec_signals);
76
75
77 if (num_addr_bits_dec < 4)
78 {
79 if (flag_way_select)
80 {
81 exist = true;
82 num_in_signals = 2;
83 }
84 else
85 {
86 num_in_signals = 0;
87 }
88 }
89 else
90 {
91 exist = true;
76 if (num_addr_bits_dec < 4) {
77 if (flag_way_select) {
78 exist = true;
79 num_in_signals = 2;
80 } else {
81 num_in_signals = 0;
82 }
83 } else {
84 exist = true;
92
85
93 if (flag_way_select)
94 {
95 num_in_signals = 3;
86 if (flag_way_select) {
87 num_in_signals = 3;
88 } else {
89 num_in_signals = 2;
90 }
96 }
91 }
97 else
98 {
99 num_in_signals = 2;
100 }
101 }
102
92
103 assert(cell.h>0);
104 assert(cell.w>0);
105 // the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
106 //area.h = 4 * cell.h;
107 area.h = g_tp.h_dec * cell.h;
93 assert(cell.h > 0);
94 assert(cell.w > 0);
95 // the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
96 //area.h = 4 * cell.h;
97 area.h = g_tp.h_dec * cell.h;
108
98
109 compute_widths();
110 compute_area();
99 compute_widths();
100 compute_area();
111}
112
113
114
101}
102
103
104
115void Decoder::compute_widths()
116{
117 double F;
118 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
119 double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
120 double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
105void Decoder::compute_widths() {
106 double F;
107 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
108 double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
109 double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
121
110
122 if (exist)
123 {
124 if (num_in_signals == 2 || fully_assoc)
125 {
126 w_dec_n[0] = 2 * g_tp.min_w_nmos_;
127 w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
128 F = gnand2;
129 }
130 else
131 {
132 w_dec_n[0] = 3 * g_tp.min_w_nmos_;
133 w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
134 F = gnand3;
135 }
111 if (exist) {
112 if (num_in_signals == 2 || fully_assoc) {
113 w_dec_n[0] = 2 * g_tp.min_w_nmos_;
114 w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
115 F = gnand2;
116 } else {
117 w_dec_n[0] = 3 * g_tp.min_w_nmos_;
118 w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
119 F = gnand3;
120 }
136
121
137 F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
138 gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
139 num_gates = logical_effort(
140 num_gates_min,
141 num_in_signals == 2 ? gnand2 : gnand3,
142 F,
143 w_dec_n,
144 w_dec_p,
145 C_ld_dec_out,
146 p_to_n_sz_ratio,
147 is_dram,
148 is_wl_tr,
149 g_tp.max_w_nmos_dec);
150 }
122 F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
123 gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
124 num_gates = logical_effort(
125 num_gates_min,
126 num_in_signals == 2 ? gnand2 : gnand3,
127 F,
128 w_dec_n,
129 w_dec_p,
130 C_ld_dec_out,
131 p_to_n_sz_ratio,
132 is_dram,
133 is_wl_tr,
134 g_tp.max_w_nmos_dec);
135 }
151}
152
153
154
136}
137
138
139
155void Decoder::compute_area()
156{
157 double cumulative_area = 0;
158 double cumulative_curr = 0; // cumulative leakage current
159 double cumulative_curr_Ig = 0; // cumulative leakage current
140void Decoder::compute_area() {
141 double cumulative_area = 0;
142 double cumulative_curr = 0; // cumulative leakage current
143 double cumulative_curr_Ig = 0; // cumulative leakage current
160
144
161 if (exist)
162 { // First check if this decoder exists
163 if (num_in_signals == 2)
164 {
165 cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
166 cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
167 cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
168 }
169 else if (num_in_signals == 3)
170 {
171 cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
172 cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
173 cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
174 }
145 if (exist) { // First check if this decoder exists
146 if (num_in_signals == 2) {
147 cumulative_area =
148 compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
149 cumulative_curr =
150 cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
151 cumulative_curr_Ig =
152 cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
153 } else if (num_in_signals == 3) {
154 cumulative_area =
155 compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
156 cumulative_curr =
157 cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
158 cumulative_curr_Ig =
159 cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
160 }
175
161
176 for (int i = 1; i < num_gates; i++)
177 {
178 cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
179 cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
180 cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
181 }
182 power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
183 power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
162 for (int i = 1; i < num_gates; i++) {
163 cumulative_area +=
164 compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
165 cumulative_curr +=
166 cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
167 cumulative_curr_Ig =
168 cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
169 }
170 power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
171 power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
184
172
185 area.w = (cumulative_area / area.h);
186 }
173 area.w = (cumulative_area / area.h);
174 }
187}
188
189
190
175}
176
177
178
191double Decoder::compute_delays(double inrisetime)
192{
193 if (exist)
194 {
195 double ret_val = 0; // outrisetime
196 int i;
197 double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
198 double Vdd = g_tp.peri_global.Vdd;
179double Decoder::compute_delays(double inrisetime) {
180 if (exist) {
181 double ret_val = 0; // outrisetime
182 int i;
183 double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
184 double Vdd = g_tp.peri_global.Vdd;
199
185
200 if ((is_wl_tr) && (is_dram))
201 {
202 Vpp = g_tp.vpp;
203 }
204 else if (is_wl_tr)
205 {
206 Vpp = g_tp.sram_cell.Vdd;
207 }
208 else
209 {
210 Vpp = g_tp.peri_global.Vdd;
211 }
186 if ((is_wl_tr) && (is_dram)) {
187 Vpp = g_tp.vpp;
188 } else if (is_wl_tr) {
189 Vpp = g_tp.sram_cell.Vdd;
190 } else {
191 Vpp = g_tp.peri_global.Vdd;
192 }
212
193
213 // first check whether a decoder is required at all
214 rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
215 c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
216 c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
217 drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
218 tf = rd * (c_intrinsic + c_load);
219 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
220 delay += this_delay;
221 inrisetime = this_delay / (1.0 - 0.5);
222 power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
194 // first check whether a decoder is required at all
195 rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
196 c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
197 c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
198 drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
199 tf = rd * (c_intrinsic + c_load);
200 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
201 delay += this_delay;
202 inrisetime = this_delay / (1.0 - 0.5);
203 power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
223
204
224 for (i = 1; i < num_gates - 1; ++i)
225 {
226 rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
227 c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
228 c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
229 drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
230 tf = rd * (c_intrinsic + c_load);
231 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
232 delay += this_delay;
233 inrisetime = this_delay / (1.0 - 0.5);
234 power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
235 }
205 for (i = 1; i < num_gates - 1; ++i) {
206 rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
207 c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
208 c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
209 drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
210 tf = rd * (c_intrinsic + c_load);
211 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
212 delay += this_delay;
213 inrisetime = this_delay / (1.0 - 0.5);
214 power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
215 }
236
216
237 // add delay of final inverter that drives the wordline
238 i = num_gates - 1;
239 c_load = C_ld_dec_out;
240 rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
241 c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
242 drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
243 tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2;
244 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
245 delay += this_delay;
246 ret_val = this_delay / (1.0 - 0.5);
247 power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
217 // add delay of final inverter that drives the wordline
218 i = num_gates - 1;
219 c_load = C_ld_dec_out;
220 rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
221 c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
222 drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
223 tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2;
224 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
225 delay += this_delay;
226 ret_val = this_delay / (1.0 - 0.5);
227 power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
248
228
249 return ret_val;
250 }
251 else
252 {
253 return 0.0;
254 }
229 return ret_val;
230 } else {
231 return 0.0;
232 }
255}
256
257void Decoder::leakage_feedback(double temperature)
258{
259 double cumulative_curr = 0; // cumulative leakage current
260 double cumulative_curr_Ig = 0; // cumulative leakage current
261
262 if (exist)

--- 23 unchanged lines hidden (view full) ---

286PredecBlk::PredecBlk(
287 int num_dec_signals,
288 Decoder * dec_,
289 double C_wire_predec_blk_out,
290 double R_wire_predec_blk_out_,
291 int num_dec_per_predec,
292 bool is_dram,
293 bool is_blk1)
233}
234
235void Decoder::leakage_feedback(double temperature)
236{
237 double cumulative_curr = 0; // cumulative leakage current
238 double cumulative_curr_Ig = 0; // cumulative leakage current
239
240 if (exist)

--- 23 unchanged lines hidden (view full) ---

264PredecBlk::PredecBlk(
265 int num_dec_signals,
266 Decoder * dec_,
267 double C_wire_predec_blk_out,
268 double R_wire_predec_blk_out_,
269 int num_dec_per_predec,
270 bool is_dram,
271 bool is_blk1)
294 :dec(dec_),
295 exist(false),
296 number_input_addr_bits(0),
297 C_ld_predec_blk_out(0),
298 R_wire_predec_blk_out(0),
299 branch_effort_nand2_gate_output(1),
300 branch_effort_nand3_gate_output(1),
301 flag_two_unique_paths(false),
302 flag_L2_gate(0),
303 number_inputs_L1_gate(0),
304 number_gates_L1_nand2_path(0),
305 number_gates_L1_nand3_path(0),
306 number_gates_L2(0),
307 min_number_gates_L1(2),
308 min_number_gates_L2(2),
309 num_L1_active_nand2_path(0),
310 num_L1_active_nand3_path(0),
311 delay_nand2_path(0),
312 delay_nand3_path(0),
313 power_nand2_path(),
314 power_nand3_path(),
315 power_L2(),
316 is_dram_(is_dram)
317{
318 int branch_effort_predec_out;
319 double C_ld_dec_gate;
320 int num_addr_bits_dec = _log2(num_dec_signals);
321 int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2;
322 int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits;
272 : dec(dec_),
273 exist(false),
274 number_input_addr_bits(0),
275 C_ld_predec_blk_out(0),
276 R_wire_predec_blk_out(0),
277 branch_effort_nand2_gate_output(1),
278 branch_effort_nand3_gate_output(1),
279 flag_two_unique_paths(false),
280 flag_L2_gate(0),
281 number_inputs_L1_gate(0),
282 number_gates_L1_nand2_path(0),
283 number_gates_L1_nand3_path(0),
284 number_gates_L2(0),
285 min_number_gates_L1(2),
286 min_number_gates_L2(2),
287 num_L1_active_nand2_path(0),
288 num_L1_active_nand3_path(0),
289 delay_nand2_path(0),
290 delay_nand3_path(0),
291 power_nand2_path(),
292 power_nand3_path(),
293 power_L2(),
294 is_dram_(is_dram) {
295 int branch_effort_predec_out;
296 double C_ld_dec_gate;
297 int num_addr_bits_dec = _log2(num_dec_signals);
298 int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2;
299 int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits;
323
300
324 w_L1_nand2_n[0] = 0;
325 w_L1_nand2_p[0] = 0;
326 w_L1_nand3_n[0] = 0;
327 w_L1_nand3_p[0] = 0;
301 w_L1_nand2_n[0] = 0;
302 w_L1_nand2_p[0] = 0;
303 w_L1_nand3_n[0] = 0;
304 w_L1_nand3_p[0] = 0;
328
305
329 if (is_blk1 == true)
330 {
331 if (num_addr_bits_dec <= 0)
332 {
333 return;
306 if (is_blk1 == true) {
307 if (num_addr_bits_dec <= 0) {
308 return;
309 } else if (num_addr_bits_dec < 4) {
310 // Just one predecoder block is required with NAND2 gates. No decoder required.
311 // The first level of predecoding directly drives the decoder output load
312 exist = true;
313 number_input_addr_bits = num_addr_bits_dec;
314 R_wire_predec_blk_out = dec->R_wire_dec_out;
315 C_ld_predec_blk_out = dec->C_ld_dec_out;
316 } else {
317 exist = true;
318 number_input_addr_bits = blk1_num_input_addr_bits;
319 branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
320 C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
321 R_wire_predec_blk_out = R_wire_predec_blk_out_;
322 C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
323 }
324 } else {
325 if (num_addr_bits_dec >= 4) {
326 exist = true;
327 number_input_addr_bits = blk2_num_input_addr_bits;
328 branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
329 C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
330 R_wire_predec_blk_out = R_wire_predec_blk_out_;
331 C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
332 }
334 }
333 }
335 else if (num_addr_bits_dec < 4)
336 {
337 // Just one predecoder block is required with NAND2 gates. No decoder required.
338 // The first level of predecoding directly drives the decoder output load
339 exist = true;
340 number_input_addr_bits = num_addr_bits_dec;
341 R_wire_predec_blk_out = dec->R_wire_dec_out;
342 C_ld_predec_blk_out = dec->C_ld_dec_out;
343 }
344 else
345 {
346 exist = true;
347 number_input_addr_bits = blk1_num_input_addr_bits;
348 branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
349 C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
350 R_wire_predec_blk_out = R_wire_predec_blk_out_;
351 C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
352 }
353 }
354 else
355 {
356 if (num_addr_bits_dec >= 4)
357 {
358 exist = true;
359 number_input_addr_bits = blk2_num_input_addr_bits;
360 branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
361 C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
362 R_wire_predec_blk_out = R_wire_predec_blk_out_;
363 C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
364 }
365 }
366
334
367 compute_widths();
368 compute_area();
335 compute_widths();
336 compute_area();
369}
370
371
372
337}
338
339
340
373void PredecBlk::compute_widths()
374{
375 double F, c_load_nand3_path, c_load_nand2_path;
376 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
377 double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
378 double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
341void PredecBlk::compute_widths() {
342 double F, c_load_nand3_path, c_load_nand2_path;
343 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
344 double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
345 double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
379
346
380 if (exist == false) return;
347 if (exist == false) return;
381
382
348
349
383 switch (number_input_addr_bits)
384 {
350 switch (number_input_addr_bits) {
385 case 1:
351 case 1:
386 flag_two_unique_paths = false;
387 number_inputs_L1_gate = 2;
388 flag_L2_gate = 0;
389 break;
352 flag_two_unique_paths = false;
353 number_inputs_L1_gate = 2;
354 flag_L2_gate = 0;
355 break;
390 case 2:
356 case 2:
391 flag_two_unique_paths = false;
392 number_inputs_L1_gate = 2;
393 flag_L2_gate = 0;
394 break;
357 flag_two_unique_paths = false;
358 number_inputs_L1_gate = 2;
359 flag_L2_gate = 0;
360 break;
395 case 3:
361 case 3:
396 flag_two_unique_paths = false;
397 number_inputs_L1_gate = 3;
398 flag_L2_gate = 0;
399 break;
362 flag_two_unique_paths = false;
363 number_inputs_L1_gate = 3;
364 flag_L2_gate = 0;
365 break;
400 case 4:
366 case 4:
401 flag_two_unique_paths = false;
402 number_inputs_L1_gate = 2;
403 flag_L2_gate = 2;
404 branch_effort_nand2_gate_output = 4;
405 break;
367 flag_two_unique_paths = false;
368 number_inputs_L1_gate = 2;
369 flag_L2_gate = 2;
370 branch_effort_nand2_gate_output = 4;
371 break;
406 case 5:
372 case 5:
407 flag_two_unique_paths = true;
408 flag_L2_gate = 2;
409 branch_effort_nand2_gate_output = 8;
410 branch_effort_nand3_gate_output = 4;
411 break;
373 flag_two_unique_paths = true;
374 flag_L2_gate = 2;
375 branch_effort_nand2_gate_output = 8;
376 branch_effort_nand3_gate_output = 4;
377 break;
412 case 6:
378 case 6:
413 flag_two_unique_paths = false;
414 number_inputs_L1_gate = 3;
415 flag_L2_gate = 2;
416 branch_effort_nand3_gate_output = 8;
417 break;
379 flag_two_unique_paths = false;
380 number_inputs_L1_gate = 3;
381 flag_L2_gate = 2;
382 branch_effort_nand3_gate_output = 8;
383 break;
418 case 7:
384 case 7:
419 flag_two_unique_paths = true;
420 flag_L2_gate = 3;
421 branch_effort_nand2_gate_output = 32;
422 branch_effort_nand3_gate_output = 16;
423 break;
385 flag_two_unique_paths = true;
386 flag_L2_gate = 3;
387 branch_effort_nand2_gate_output = 32;
388 branch_effort_nand3_gate_output = 16;
389 break;
424 case 8:
390 case 8:
425 flag_two_unique_paths = true;
426 flag_L2_gate = 3;
427 branch_effort_nand2_gate_output = 64;
428 branch_effort_nand3_gate_output = 32;
429 break;
391 flag_two_unique_paths = true;
392 flag_L2_gate = 3;
393 branch_effort_nand2_gate_output = 64;
394 branch_effort_nand3_gate_output = 32;
395 break;
430 case 9:
396 case 9:
431 flag_two_unique_paths = false;
432 number_inputs_L1_gate = 3;
433 flag_L2_gate = 3;
434 branch_effort_nand3_gate_output = 64;
435 break;
397 flag_two_unique_paths = false;
398 number_inputs_L1_gate = 3;
399 flag_L2_gate = 3;
400 branch_effort_nand3_gate_output = 64;
401 break;
436 default:
402 default:
437 assert(0);
438 break;
439 }
440
441 // find the number of gates and sizing in second level of predecoder (if there is a second level)
442 if (flag_L2_gate)
443 {
444 if (flag_L2_gate == 2)
445 { // 2nd level is a NAND2 gate
446 w_L2_n[0] = 2 * g_tp.min_w_nmos_;
447 F = gnand2;
403 assert(0);
404 break;
448 }
405 }
449 else
450 { // 2nd level is a NAND3 gate
451 w_L2_n[0] = 3 * g_tp.min_w_nmos_;
452 F = gnand3;
453 }
454 w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
455 F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
456 number_gates_L2 = logical_effort(
457 min_number_gates_L2,
458 flag_L2_gate == 2 ? gnand2 : gnand3,
459 F,
460 w_L2_n,
461 w_L2_p,
462 C_ld_predec_blk_out,
463 p_to_n_sz_ratio,
464 is_dram_, false,
465 g_tp.max_w_nmos_);
466
406
467 // Now find the number of gates and widths in first level of predecoder
468 if ((flag_two_unique_paths)||(number_inputs_L1_gate == 2))
469 { // Whenever flag_two_unique_paths is true, it means first level of decoder employs
470 // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it means
471 // a NAND2 gate is used in the first level of the predecoder
472 c_load_nand2_path = branch_effort_nand2_gate_output *
473 (gate_C(w_L2_n[0], 0, is_dram_) +
474 gate_C(w_L2_p[0], 0, is_dram_));
475 w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
476 w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
477 F = gnand2 * c_load_nand2_path /
478 (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
479 gate_C(w_L1_nand2_p[0], 0, is_dram_));
480 number_gates_L1_nand2_path = logical_effort(
481 min_number_gates_L1,
482 gnand2,
483 F,
484 w_L1_nand2_n,
485 w_L1_nand2_p,
486 c_load_nand2_path,
487 p_to_n_sz_ratio,
488 is_dram_, false,
489 g_tp.max_w_nmos_);
490 }
407 // find the number of gates and sizing in second level of predecoder (if there is a second level)
408 if (flag_L2_gate) {
409 if (flag_L2_gate == 2) { // 2nd level is a NAND2 gate
410 w_L2_n[0] = 2 * g_tp.min_w_nmos_;
411 F = gnand2;
412 } else { // 2nd level is a NAND3 gate
413 w_L2_n[0] = 3 * g_tp.min_w_nmos_;
414 F = gnand3;
415 }
416 w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
417 F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
418 number_gates_L2 = logical_effort(
419 min_number_gates_L2,
420 flag_L2_gate == 2 ? gnand2 : gnand3,
421 F,
422 w_L2_n,
423 w_L2_p,
424 C_ld_predec_blk_out,
425 p_to_n_sz_ratio,
426 is_dram_, false,
427 g_tp.max_w_nmos_);
491
428
492 //Now find widths of gates along path in which first gate is a NAND3
493 if ((flag_two_unique_paths)||(number_inputs_L1_gate == 3))
494 { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
495 // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
496 // a NAND3 gate is used in the first level of the predecoder
497 c_load_nand3_path = branch_effort_nand3_gate_output *
498 (gate_C(w_L2_n[0], 0, is_dram_) +
499 gate_C(w_L2_p[0], 0, is_dram_));
500 w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
501 w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
502 F = gnand3 * c_load_nand3_path /
503 (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
504 gate_C(w_L1_nand3_p[0], 0, is_dram_));
505 number_gates_L1_nand3_path = logical_effort(
506 min_number_gates_L1,
507 gnand3,
508 F,
509 w_L1_nand3_n,
510 w_L1_nand3_p,
511 c_load_nand3_path,
512 p_to_n_sz_ratio,
513 is_dram_, false,
514 g_tp.max_w_nmos_);
429 // Now find the number of gates and widths in first level of predecoder
430 if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
431 // Whenever flag_two_unique_paths is true, it means first level of
432 // decoder employs
433 // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2,
434 // it means
435 // a NAND2 gate is used in the first level of the predecoder
436 c_load_nand2_path = branch_effort_nand2_gate_output *
437 (gate_C(w_L2_n[0], 0, is_dram_) +
438 gate_C(w_L2_p[0], 0, is_dram_));
439 w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
440 w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
441 F = gnand2 * c_load_nand2_path /
442 (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
443 gate_C(w_L1_nand2_p[0], 0, is_dram_));
444 number_gates_L1_nand2_path = logical_effort(
445 min_number_gates_L1,
446 gnand2,
447 F,
448 w_L1_nand2_n,
449 w_L1_nand2_p,
450 c_load_nand2_path,
451 p_to_n_sz_ratio,
452 is_dram_, false,
453 g_tp.max_w_nmos_);
454 }
455
456 //Now find widths of gates along path in which first gate is a NAND3
457 if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
458 // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
459 // a NAND3 gate is used in the first level of the predecoder
460 c_load_nand3_path = branch_effort_nand3_gate_output *
461 (gate_C(w_L2_n[0], 0, is_dram_) +
462 gate_C(w_L2_p[0], 0, is_dram_));
463 w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
464 w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
465 F = gnand3 * c_load_nand3_path /
466 (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
467 gate_C(w_L1_nand3_p[0], 0, is_dram_));
468 number_gates_L1_nand3_path = logical_effort(
469 min_number_gates_L1,
470 gnand3,
471 F,
472 w_L1_nand3_n,
473 w_L1_nand3_p,
474 c_load_nand3_path,
475 p_to_n_sz_ratio,
476 is_dram_, false,
477 g_tp.max_w_nmos_);
478 }
479 } else { // find number of gates and widths in first level of predecoder block when there is no second level
480 if (number_inputs_L1_gate == 2) {
481 w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
482 w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
483 F = gnand2 * C_ld_predec_blk_out /
484 (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
485 gate_C(w_L1_nand2_p[0], 0, is_dram_));
486 number_gates_L1_nand2_path = logical_effort(
487 min_number_gates_L1,
488 gnand2,
489 F,
490 w_L1_nand2_n,
491 w_L1_nand2_p,
492 C_ld_predec_blk_out,
493 p_to_n_sz_ratio,
494 is_dram_, false,
495 g_tp.max_w_nmos_);
496 } else if (number_inputs_L1_gate == 3) {
497 w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
498 w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
499 F = gnand3 * C_ld_predec_blk_out /
500 (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
501 gate_C(w_L1_nand3_p[0], 0, is_dram_));
502 number_gates_L1_nand3_path = logical_effort(
503 min_number_gates_L1,
504 gnand3,
505 F,
506 w_L1_nand3_n,
507 w_L1_nand3_p,
508 C_ld_predec_blk_out,
509 p_to_n_sz_ratio,
510 is_dram_, false,
511 g_tp.max_w_nmos_);
512 }
515 }
513 }
516 }
517 else
518 { // find number of gates and widths in first level of predecoder block when there is no second level
519 if (number_inputs_L1_gate == 2)
520 {
521 w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
522 w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
523 F = gnand2*C_ld_predec_blk_out /
524 (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
525 gate_C(w_L1_nand2_p[0], 0, is_dram_));
526 number_gates_L1_nand2_path = logical_effort(
527 min_number_gates_L1,
528 gnand2,
529 F,
530 w_L1_nand2_n,
531 w_L1_nand2_p,
532 C_ld_predec_blk_out,
533 p_to_n_sz_ratio,
534 is_dram_, false,
535 g_tp.max_w_nmos_);
536 }
537 else if (number_inputs_L1_gate == 3)
538 {
539 w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
540 w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
541 F = gnand3*C_ld_predec_blk_out /
542 (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
543 gate_C(w_L1_nand3_p[0], 0, is_dram_));
544 number_gates_L1_nand3_path = logical_effort(
545 min_number_gates_L1,
546 gnand3,
547 F,
548 w_L1_nand3_n,
549 w_L1_nand3_p,
550 C_ld_predec_blk_out,
551 p_to_n_sz_ratio,
552 is_dram_, false,
553 g_tp.max_w_nmos_);
554 }
555 }
556}
557
558
559
514}
515
516
517
560void PredecBlk::compute_area()
561{
562 if (exist)
563 { // First check whether a predecoder block is needed
564 int num_L1_nand2 = 0;
565 int num_L1_nand3 = 0;
566 int num_L2 = 0;
567 double tot_area_L1_nand3 =0;
568 double leak_L1_nand3 =0;
569 double gate_leak_L1_nand3 =0;
518void PredecBlk::compute_area() {
519 if (exist) { // First check whether a predecoder block is needed
520 int num_L1_nand2 = 0;
521 int num_L1_nand3 = 0;
522 int num_L2 = 0;
523 double tot_area_L1_nand3 = 0;
524 double leak_L1_nand3 = 0;
525 double gate_leak_L1_nand3 = 0;
570
526
571 double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def);
572 double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
573 double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
574 if (number_inputs_L1_gate != 3) {
575 tot_area_L1_nand3 = 0;
576 leak_L1_nand3 = 0;
577 gate_leak_L1_nand3 =0;
578 }
579 else {
580 tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
581 leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
582 gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
583 }
527 double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def);
528 double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
529 double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
530 if (number_inputs_L1_gate != 3) {
531 tot_area_L1_nand3 = 0;
532 leak_L1_nand3 = 0;
533 gate_leak_L1_nand3 = 0;
534 } else {
535 tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
536 leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
537 gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
538 }
584
539
585 switch (number_input_addr_bits)
586 {
587 case 1: //2 NAND2 gates
588 num_L1_nand2 = 2;
589 num_L2 = 0;
590 num_L1_active_nand2_path =1;
591 num_L1_active_nand3_path =0;
592 break;
593 case 2: //4 NAND2 gates
594 num_L1_nand2 = 4;
595 num_L2 = 0;
596 num_L1_active_nand2_path =1;
597 num_L1_active_nand3_path =0;
598 break;
599 case 3: //8 NAND3 gates
600 num_L1_nand3 = 8;
601 num_L2 = 0;
602 num_L1_active_nand2_path =0;
603 num_L1_active_nand3_path =1;
604 break;
605 case 4: //4 + 4 NAND2 gates
606 num_L1_nand2 = 8;
607 num_L2 = 16;
608 num_L1_active_nand2_path =2;
609 num_L1_active_nand3_path =0;
610 break;
611 case 5: //4 NAND2 gates, 8 NAND3 gates
612 num_L1_nand2 = 4;
613 num_L1_nand3 = 8;
614 num_L2 = 32;
615 num_L1_active_nand2_path =1;
616 num_L1_active_nand3_path =1;
617 break;
618 case 6: //8 + 8 NAND3 gates
619 num_L1_nand3 = 16;
620 num_L2 = 64;
621 num_L1_active_nand2_path =0;
622 num_L1_active_nand3_path =2;
623 break;
624 case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
625 num_L1_nand2 = 8;
626 num_L1_nand3 = 8;
627 num_L2 = 128;
628 num_L1_active_nand2_path =2;
629 num_L1_active_nand3_path =1;
630 break;
631 case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
632 num_L1_nand2 = 4;
633 num_L1_nand3 = 16;
634 num_L2 = 256;
635 num_L1_active_nand2_path =2;
636 num_L1_active_nand3_path =2;
637 break;
638 case 9: //8 + 8 + 8 NAND3 gates
639 num_L1_nand3 = 24;
640 num_L2 = 512;
641 num_L1_active_nand2_path =0;
642 num_L1_active_nand3_path =3;
643 break;
644 default:
645 break;
646 }
540 switch (number_input_addr_bits) {
541 case 1: //2 NAND2 gates
542 num_L1_nand2 = 2;
543 num_L2 = 0;
544 num_L1_active_nand2_path = 1;
545 num_L1_active_nand3_path = 0;
546 break;
547 case 2: //4 NAND2 gates
548 num_L1_nand2 = 4;
549 num_L2 = 0;
550 num_L1_active_nand2_path = 1;
551 num_L1_active_nand3_path = 0;
552 break;
553 case 3: //8 NAND3 gates
554 num_L1_nand3 = 8;
555 num_L2 = 0;
556 num_L1_active_nand2_path = 0;
557 num_L1_active_nand3_path = 1;
558 break;
559 case 4: //4 + 4 NAND2 gates
560 num_L1_nand2 = 8;
561 num_L2 = 16;
562 num_L1_active_nand2_path = 2;
563 num_L1_active_nand3_path = 0;
564 break;
565 case 5: //4 NAND2 gates, 8 NAND3 gates
566 num_L1_nand2 = 4;
567 num_L1_nand3 = 8;
568 num_L2 = 32;
569 num_L1_active_nand2_path = 1;
570 num_L1_active_nand3_path = 1;
571 break;
572 case 6: //8 + 8 NAND3 gates
573 num_L1_nand3 = 16;
574 num_L2 = 64;
575 num_L1_active_nand2_path = 0;
576 num_L1_active_nand3_path = 2;
577 break;
578 case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
579 num_L1_nand2 = 8;
580 num_L1_nand3 = 8;
581 num_L2 = 128;
582 num_L1_active_nand2_path = 2;
583 num_L1_active_nand3_path = 1;
584 break;
585 case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
586 num_L1_nand2 = 4;
587 num_L1_nand3 = 16;
588 num_L2 = 256;
589 num_L1_active_nand2_path = 2;
590 num_L1_active_nand3_path = 2;
591 break;
592 case 9: //8 + 8 + 8 NAND3 gates
593 num_L1_nand3 = 24;
594 num_L2 = 512;
595 num_L1_active_nand2_path = 0;
596 num_L1_active_nand3_path = 3;
597 break;
598 default:
599 break;
600 }
647
601
648 for (int i = 1; i < number_gates_L1_nand2_path; ++i)
649 {
650 tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
651 leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
652 gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
653 }
654 tot_area_L1_nand2 *= num_L1_nand2;
655 leak_L1_nand2 *= num_L1_nand2;
656 gate_leak_L1_nand2 *= num_L1_nand2;
602 for (int i = 1; i < number_gates_L1_nand2_path; ++i) {
603 tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
604 leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
605 gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
606 }
607 tot_area_L1_nand2 *= num_L1_nand2;
608 leak_L1_nand2 *= num_L1_nand2;
609 gate_leak_L1_nand2 *= num_L1_nand2;
657
610
658 for (int i = 1; i < number_gates_L1_nand3_path; ++i)
659 {
660 tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
661 leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
662 gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
663 }
664 tot_area_L1_nand3 *= num_L1_nand3;
665 leak_L1_nand3 *= num_L1_nand3;
666 gate_leak_L1_nand3 *= num_L1_nand3;
611 for (int i = 1; i < number_gates_L1_nand3_path; ++i) {
612 tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
613 leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
614 gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
615 }
616 tot_area_L1_nand3 *= num_L1_nand3;
617 leak_L1_nand3 *= num_L1_nand3;
618 gate_leak_L1_nand3 *= num_L1_nand3;
667
619
668 double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3;
669 double cumulative_area_L2 = 0.0;
670 double leakage_L2 = 0.0;
671 double gate_leakage_L2 = 0.0;
620 double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3;
621 double cumulative_area_L2 = 0.0;
622 double leakage_L2 = 0.0;
623 double gate_leakage_L2 = 0.0;
672
624
673 if (flag_L2_gate == 2)
674 {
675 cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
676 leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
677 gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
678 }
679 else if (flag_L2_gate == 3)
680 {
681 cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
682 leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
683 gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
684 }
625 if (flag_L2_gate == 2) {
626 cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
627 leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
628 gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
629 } else if (flag_L2_gate == 3) {
630 cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
631 leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
632 gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
633 }
685
634
686 for (int i = 1; i < number_gates_L2; ++i)
687 {
688 cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
689 leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
690 gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
691 }
692 cumulative_area_L2 *= num_L2;
693 leakage_L2 *= num_L2;
694 gate_leakage_L2 *= num_L2;
635 for (int i = 1; i < number_gates_L2; ++i) {
636 cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
637 leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
638 gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
639 }
640 cumulative_area_L2 *= num_L2;
641 leakage_L2 *= num_L2;
642 gate_leakage_L2 *= num_L2;
695
643
696 power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
697 power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
698 power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd;
699 area.set_area(cumulative_area_L1 + cumulative_area_L2);
700 power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
701 power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
702 power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
703 }
644 power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
645 power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
646 power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd;
647 area.set_area(cumulative_area_L1 + cumulative_area_L2);
648 power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
649 power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
650 power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
651 }
704}
705
706
707
708pair<double, double> PredecBlk::compute_delays(
652}
653
654
655
656pair<double, double> PredecBlk::compute_delays(
709 pair inrisetime) //
710{
711 pair<double, double> ret_val;
712 ret_val.first = 0; // outrisetime_nand2_path
713 ret_val.second = 0; // outrisetime_nand3_path
657 pair<double, double> inrisetime) { // <nand2, nand3>
658 pair<double, double> ret_val;
659 ret_val.first = 0; // outrisetime_nand2_path
660 ret_val.second = 0; // outrisetime_nand3_path
714
661
715 double inrisetime_nand2_path = inrisetime.first;
716 double inrisetime_nand3_path = inrisetime.second;
717 int i;
718 double rd, c_load, c_intrinsic, tf, this_delay;
719 double Vdd = g_tp.peri_global.Vdd;
662 double inrisetime_nand2_path = inrisetime.first;
663 double inrisetime_nand3_path = inrisetime.second;
664 int i;
665 double rd, c_load, c_intrinsic, tf, this_delay;
666 double Vdd = g_tp.peri_global.Vdd;
720
667
721 // TODO: following delay calculation part can be greatly simplified.
722 // first check whether a predecoder block is required
723 if (exist)
724 {
725 //Find delay in first level of predecoder block
726 //First find delay in path
727 if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2))
728 {
729 //First gate is a NAND2 gate
730 rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
731 c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
732 c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
733 drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
734 tf = rd * (c_intrinsic + c_load);
735 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
736 delay_nand2_path += this_delay;
737 inrisetime_nand2_path = this_delay / (1.0 - 0.5);
738 power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
668 // TODO: following delay calculation part can be greatly simplified.
669 // first check whether a predecoder block is required
670 if (exist) {
671 //Find delay in first level of predecoder block
672 //First find delay in path
673 if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
674 //First gate is a NAND2 gate
675 rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
676 c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
677 c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
678 drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
679 tf = rd * (c_intrinsic + c_load);
680 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
681 delay_nand2_path += this_delay;
682 inrisetime_nand2_path = this_delay / (1.0 - 0.5);
683 power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
739
684
740 //Add delays of all but the last inverter in the chain
741 for (i = 1; i < number_gates_L1_nand2_path - 1; ++i)
742 {
743 rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
744 c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
745 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
746 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
747 tf = rd * (c_intrinsic + c_load);
748 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
749 delay_nand2_path += this_delay;
750 inrisetime_nand2_path = this_delay / (1.0 - 0.5);
751 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
752 }
685 //Add delays of all but the last inverter in the chain
686 for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) {
687 rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
688 c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
689 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
690 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
691 tf = rd * (c_intrinsic + c_load);
692 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
693 delay_nand2_path += this_delay;
694 inrisetime_nand2_path = this_delay / (1.0 - 0.5);
695 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
696 }
753
697
754 //Add delay of the last inverter
755 i = number_gates_L1_nand2_path - 1;
756 rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
757 if (flag_L2_gate)
758 {
759 c_load = branch_effort_nand2_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
760 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
761 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
762 tf = rd * (c_intrinsic + c_load);
763 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
764 delay_nand2_path += this_delay;
765 inrisetime_nand2_path = this_delay / (1.0 - 0.5);
766 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
767 }
768 else
769 { //First level directly drives decoder output load
770 c_load = C_ld_predec_blk_out;
771 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
772 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
773 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
774 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
775 delay_nand2_path += this_delay;
776 ret_val.first = this_delay / (1.0 - 0.5);
777 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
778 }
779 }
698 //Add delay of the last inverter
699 i = number_gates_L1_nand2_path - 1;
700 rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
701 if (flag_L2_gate) {
702 c_load = branch_effort_nand2_gate_output *
703 (gate_C(w_L2_n[0], 0, is_dram_) +
704 gate_C(w_L2_p[0], 0, is_dram_));
705 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
706 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
707 tf = rd * (c_intrinsic + c_load);
708 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
709 delay_nand2_path += this_delay;
710 inrisetime_nand2_path = this_delay / (1.0 - 0.5);
711 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
712 } else { //First level directly drives decoder output load
713 c_load = C_ld_predec_blk_out;
714 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
715 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
716 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
717 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
718 delay_nand2_path += this_delay;
719 ret_val.first = this_delay / (1.0 - 0.5);
720 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
721 }
722 }
780
723
781 if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3))
782 { //Check if the number of gates in the first level is more than 1.
783 //First gate is a NAND3 gate
784 rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
785 c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
786 c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
787 drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
788 tf = rd * (c_intrinsic + c_load);
789 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
790 delay_nand3_path += this_delay;
791 inrisetime_nand3_path = this_delay / (1.0 - 0.5);
792 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
724 if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) {
725 //Check if the number of gates in the first level is more than 1.
726 //First gate is a NAND3 gate
727 rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
728 c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
729 c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
730 drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
731 tf = rd * (c_intrinsic + c_load);
732 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
733 delay_nand3_path += this_delay;
734 inrisetime_nand3_path = this_delay / (1.0 - 0.5);
735 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
793
736
794 //Add delays of all but the last inverter in the chain
795 for (i = 1; i < number_gates_L1_nand3_path - 1; ++i)
796 {
797 rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
798 c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
799 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
800 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
801 tf = rd * (c_intrinsic + c_load);
802 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
803 delay_nand3_path += this_delay;
804 inrisetime_nand3_path = this_delay / (1.0 - 0.5);
805 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
806 }
737 //Add delays of all but the last inverter in the chain
738 for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) {
739 rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
740 c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
741 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
742 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
743 tf = rd * (c_intrinsic + c_load);
744 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
745 delay_nand3_path += this_delay;
746 inrisetime_nand3_path = this_delay / (1.0 - 0.5);
747 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
748 }
807
749
808 //Add delay of the last inverter
809 i = number_gates_L1_nand3_path - 1;
810 rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
811 if (flag_L2_gate)
812 {
813 c_load = branch_effort_nand3_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
814 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
815 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
816 tf = rd * (c_intrinsic + c_load);
817 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
818 delay_nand3_path += this_delay;
819 inrisetime_nand3_path = this_delay / (1.0 - 0.5);
820 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
821 }
822 else
823 { //First level directly drives decoder output load
824 c_load = C_ld_predec_blk_out;
825 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
826 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
827 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
828 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
829 delay_nand3_path += this_delay;
830 ret_val.second = this_delay / (1.0 - 0.5);
831 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
832 }
833 }
750 //Add delay of the last inverter
751 i = number_gates_L1_nand3_path - 1;
752 rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
753 if (flag_L2_gate) {
754 c_load = branch_effort_nand3_gate_output *
755 (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0,
756 is_dram_));
757 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
758 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
759 tf = rd * (c_intrinsic + c_load);
760 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
761 delay_nand3_path += this_delay;
762 inrisetime_nand3_path = this_delay / (1.0 - 0.5);
763 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
764 } else { //First level directly drives decoder output load
765 c_load = C_ld_predec_blk_out;
766 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
767 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
768 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
769 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
770 delay_nand3_path += this_delay;
771 ret_val.second = this_delay / (1.0 - 0.5);
772 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
773 }
774 }
834
775
835 // Find delay through second level
836 if (flag_L2_gate)
837 {
838 if (flag_L2_gate == 2)
839 {
840 rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
841 c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
842 c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
843 drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
844 tf = rd * (c_intrinsic + c_load);
845 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
846 delay_nand2_path += this_delay;
847 inrisetime_nand2_path = this_delay / (1.0 - 0.5);
848 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
849 }
850 else
851 { // flag_L2_gate = 3
852 rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
853 c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
854 c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
855 drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
856 tf = rd * (c_intrinsic + c_load);
857 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
858 delay_nand3_path += this_delay;
859 inrisetime_nand3_path = this_delay / (1.0 - 0.5);
860 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
861 }
776 // Find delay through second level
777 if (flag_L2_gate) {
778 if (flag_L2_gate == 2) {
779 rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
780 c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
781 c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
782 drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
783 tf = rd * (c_intrinsic + c_load);
784 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
785 delay_nand2_path += this_delay;
786 inrisetime_nand2_path = this_delay / (1.0 - 0.5);
787 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
788 } else { // flag_L2_gate = 3
789 rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
790 c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
791 c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
792 drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
793 tf = rd * (c_intrinsic + c_load);
794 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
795 delay_nand3_path += this_delay;
796 inrisetime_nand3_path = this_delay / (1.0 - 0.5);
797 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
798 }
862
799
863 for (i = 1; i < number_gates_L2 - 1; ++i)
864 {
865 rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
866 c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
867 c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
868 drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
869 tf = rd * (c_intrinsic + c_load);
870 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
871 delay_nand2_path += this_delay;
872 inrisetime_nand2_path = this_delay / (1.0 - 0.5);
873 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
874 delay_nand3_path += this_delay;
875 inrisetime_nand3_path = this_delay / (1.0 - 0.5);
876 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
877 }
800 for (i = 1; i < number_gates_L2 - 1; ++i) {
801 rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
802 c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
803 c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
804 drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
805 tf = rd * (c_intrinsic + c_load);
806 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
807 delay_nand2_path += this_delay;
808 inrisetime_nand2_path = this_delay / (1.0 - 0.5);
809 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
810 delay_nand3_path += this_delay;
811 inrisetime_nand3_path = this_delay / (1.0 - 0.5);
812 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
813 }
878
814
879 //Add delay of final inverter that drives the wordline decoders
880 i = number_gates_L2 - 1;
881 c_load = C_ld_predec_blk_out;
882 rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
883 c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
884 drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
885 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
886 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
887 delay_nand2_path += this_delay;
888 ret_val.first = this_delay / (1.0 - 0.5);
889 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
890 delay_nand3_path += this_delay;
891 ret_val.second = this_delay / (1.0 - 0.5);
892 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
815 //Add delay of final inverter that drives the wordline decoders
816 i = number_gates_L2 - 1;
817 c_load = C_ld_predec_blk_out;
818 rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
819 c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
820 drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
821 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
822 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
823 delay_nand2_path += this_delay;
824 ret_val.first = this_delay / (1.0 - 0.5);
825 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
826 delay_nand3_path += this_delay;
827 ret_val.second = this_delay / (1.0 - 0.5);
828 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
829 }
893 }
830 }
894 }
895
831
896 delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second;
897 return ret_val;
832 delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second;
833 return ret_val;
898}
899
900void PredecBlk::leakage_feedback(double temperature)
901{
902 if (exist)
903 { // First check whether a predecoder block is needed
904 int num_L1_nand2 = 0;
905 int num_L1_nand3 = 0;

--- 122 unchanged lines hidden (view full) ---

1028 power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
1029 }
1030}
1031
1032PredecBlkDrv::PredecBlkDrv(
1033 int way_select_,
1034 PredecBlk * blk_,
1035 bool is_dram)
834}
835
836void PredecBlk::leakage_feedback(double temperature)
837{
838 if (exist)
839 { // First check whether a predecoder block is needed
840 int num_L1_nand2 = 0;
841 int num_L1_nand3 = 0;

--- 122 unchanged lines hidden (view full) ---

964 power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
965 }
966}
967
968PredecBlkDrv::PredecBlkDrv(
969 int way_select_,
970 PredecBlk * blk_,
971 bool is_dram)
1036 :flag_driver_exists(0),
1037 number_gates_nand2_path(0),
1038 number_gates_nand3_path(0),
1039 min_number_gates(2),
1040 num_buffers_driving_1_nand2_load(0),
1041 num_buffers_driving_2_nand2_load(0),
1042 num_buffers_driving_4_nand2_load(0),
1043 num_buffers_driving_2_nand3_load(0),
1044 num_buffers_driving_8_nand3_load(0),
1045 num_buffers_nand3_path(0),
1046 c_load_nand2_path_out(0),
1047 c_load_nand3_path_out(0),
1048 r_load_nand2_path_out(0),
1049 r_load_nand3_path_out(0),
1050 delay_nand2_path(0),
1051 delay_nand3_path(0),
1052 power_nand2_path(),
1053 power_nand3_path(),
1054 blk(blk_), dec(blk->dec),
1055 is_dram_(is_dram),
1056 way_select(way_select_)
1057{
1058 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
1059 {
1060 width_nand2_path_n[i] = 0;
1061 width_nand2_path_p[i] = 0;
1062 width_nand3_path_n[i] = 0;
1063 width_nand3_path_p[i] = 0;
1064 }
972 : flag_driver_exists(0),
973 number_gates_nand2_path(0),
974 number_gates_nand3_path(0),
975 min_number_gates(2),
976 num_buffers_driving_1_nand2_load(0),
977 num_buffers_driving_2_nand2_load(0),
978 num_buffers_driving_4_nand2_load(0),
979 num_buffers_driving_2_nand3_load(0),
980 num_buffers_driving_8_nand3_load(0),
981 num_buffers_nand3_path(0),
982 c_load_nand2_path_out(0),
983 c_load_nand3_path_out(0),
984 r_load_nand2_path_out(0),
985 r_load_nand3_path_out(0),
986 delay_nand2_path(0),
987 delay_nand3_path(0),
988 power_nand2_path(),
989 power_nand3_path(),
990 blk(blk_), dec(blk->dec),
991 is_dram_(is_dram),
992 way_select(way_select_) {
993 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
994 width_nand2_path_n[i] = 0;
995 width_nand2_path_p[i] = 0;
996 width_nand3_path_n[i] = 0;
997 width_nand3_path_p[i] = 0;
998 }
1065
999
1066 number_input_addr_bits = blk->number_input_addr_bits;
1000 number_input_addr_bits = blk->number_input_addr_bits;
1067
1001
1068 if (way_select > 1)
1069 {
1070 flag_driver_exists = 1;
1071 number_input_addr_bits = way_select;
1072 if (dec->num_in_signals == 2)
1073 {
1074 c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
1075 num_buffers_driving_2_nand2_load = number_input_addr_bits;
1002 if (way_select > 1) {
1003 flag_driver_exists = 1;
1004 number_input_addr_bits = way_select;
1005 if (dec->num_in_signals == 2) {
1006 c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
1007 num_buffers_driving_2_nand2_load = number_input_addr_bits;
1008 } else if (dec->num_in_signals == 3) {
1009 c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
1010 num_buffers_driving_2_nand3_load = number_input_addr_bits;
1011 }
1012 } else if (way_select == 0) {
1013 if (blk->exist) {
1014 flag_driver_exists = 1;
1015 }
1076 }
1016 }
1077 else if (dec->num_in_signals == 3)
1078 {
1079 c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
1080 num_buffers_driving_2_nand3_load = number_input_addr_bits;
1081 }
1082 }
1083 else if (way_select == 0)
1084 {
1085 if (blk->exist)
1086 {
1087 flag_driver_exists = 1;
1088 }
1089 }
1090
1017
1091 compute_widths();
1092 compute_area();
1018 compute_widths();
1019 compute_area();
1093}
1094
1095
1096
1020}
1021
1022
1023
1097void PredecBlkDrv::compute_widths()
1098{
1099 // The predecode block driver accepts as input the address bits from the h-tree network. For
1100 // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
1101 // inversion to generate addrbar and simply treat addrbar as addr.
1024void PredecBlkDrv::compute_widths() {
1025 // The predecode block driver accepts as input the address bits from the h-tree network. For
1026 // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
1027 // inversion to generate addrbar and simply treat addrbar as addr.
1102
1028
1103 double F;
1104 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
1029 double F;
1030 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
1105
1031
1106 if (flag_driver_exists)
1107 {
1108 double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
1109 double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
1032 if (flag_driver_exists) {
1033 double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
1034 double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
1110
1035
1111 if (way_select == 0)
1112 {
1113 if (blk->number_input_addr_bits == 1)
1114 { //2 NAND2 gates
1115 num_buffers_driving_2_nand2_load = 1;
1116 c_load_nand2_path_out = 2 * C_nand2_gate_blk;
1117 }
1118 else if (blk->number_input_addr_bits == 2)
1119 { //4 NAND2 gates one 2-4 decoder
1120 num_buffers_driving_4_nand2_load = 2;
1121 c_load_nand2_path_out = 4 * C_nand2_gate_blk;
1122 }
1123 else if (blk->number_input_addr_bits == 3)
1124 { //8 NAND3 gates one 3-8 decoder
1125 num_buffers_driving_8_nand3_load = 3;
1126 c_load_nand3_path_out = 8 * C_nand3_gate_blk;
1127 }
1128 else if (blk->number_input_addr_bits == 4)
1129 { //4 + 4 NAND2 gates two 2-4 decoder
1130 num_buffers_driving_4_nand2_load = 4;
1131 c_load_nand2_path_out = 4 * C_nand2_gate_blk;
1132 }
1133 else if (blk->number_input_addr_bits == 5)
1134 { //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 decoder
1135 num_buffers_driving_4_nand2_load = 2;
1136 num_buffers_driving_8_nand3_load = 3;
1137 c_load_nand2_path_out = 4 * C_nand2_gate_blk;
1138 c_load_nand3_path_out = 8 * C_nand3_gate_blk;
1139 }
1140 else if (blk->number_input_addr_bits == 6)
1141 { //8 + 8 NAND3 gates two 3-8 decoder
1142 num_buffers_driving_8_nand3_load = 6;
1143 c_load_nand3_path_out = 8 * C_nand3_gate_blk;
1144 }
1145 else if (blk->number_input_addr_bits == 7)
1146 { //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 decoder
1147 num_buffers_driving_4_nand2_load = 4;
1148 num_buffers_driving_8_nand3_load = 3;
1149 c_load_nand2_path_out = 4 * C_nand2_gate_blk;
1150 c_load_nand3_path_out = 8 * C_nand3_gate_blk;
1151 }
1152 else if (blk->number_input_addr_bits == 8)
1153 { //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 decoder
1154 num_buffers_driving_4_nand2_load = 2;
1155 num_buffers_driving_8_nand3_load = 6;
1156 c_load_nand2_path_out = 4 * C_nand2_gate_blk;
1157 c_load_nand3_path_out = 8 * C_nand3_gate_blk;
1158 }
1159 else if (blk->number_input_addr_bits == 9)
1160 { //8 + 8 + 8 NAND3 gates three 3-8 decoder
1161 num_buffers_driving_8_nand3_load = 9;
1162 c_load_nand3_path_out = 8 * C_nand3_gate_blk;
1163 }
1164 }
1036 if (way_select == 0) {
1037 if (blk->number_input_addr_bits == 1) {
1038 //2 NAND2 gates
1039 num_buffers_driving_2_nand2_load = 1;
1040 c_load_nand2_path_out = 2 * C_nand2_gate_blk;
1041 } else if (blk->number_input_addr_bits == 2) {
1042 //4 NAND2 gates one 2-4 decoder
1043 num_buffers_driving_4_nand2_load = 2;
1044 c_load_nand2_path_out = 4 * C_nand2_gate_blk;
1045 } else if (blk->number_input_addr_bits == 3) {
1046 //8 NAND3 gates one 3-8 decoder
1047 num_buffers_driving_8_nand3_load = 3;
1048 c_load_nand3_path_out = 8 * C_nand3_gate_blk;
1049 } else if (blk->number_input_addr_bits == 4) {
1050 //4 + 4 NAND2 gates two 2-4 decoder
1051 num_buffers_driving_4_nand2_load = 4;
1052 c_load_nand2_path_out = 4 * C_nand2_gate_blk;
1053 } else if (blk->number_input_addr_bits == 5) {
1054 //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8
1055 //decoder
1056 num_buffers_driving_4_nand2_load = 2;
1057 num_buffers_driving_8_nand3_load = 3;
1058 c_load_nand2_path_out = 4 * C_nand2_gate_blk;
1059 c_load_nand3_path_out = 8 * C_nand3_gate_blk;
1060 } else if (blk->number_input_addr_bits == 6) {
1061 //8 + 8 NAND3 gates two 3-8 decoder
1062 num_buffers_driving_8_nand3_load = 6;
1063 c_load_nand3_path_out = 8 * C_nand3_gate_blk;
1064 } else if (blk->number_input_addr_bits == 7) {
1065 //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8
1066 //decoder
1067 num_buffers_driving_4_nand2_load = 4;
1068 num_buffers_driving_8_nand3_load = 3;
1069 c_load_nand2_path_out = 4 * C_nand2_gate_blk;
1070 c_load_nand3_path_out = 8 * C_nand3_gate_blk;
1071 } else if (blk->number_input_addr_bits == 8) {
1072 //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8
1073 //decoder
1074 num_buffers_driving_4_nand2_load = 2;
1075 num_buffers_driving_8_nand3_load = 6;
1076 c_load_nand2_path_out = 4 * C_nand2_gate_blk;
1077 c_load_nand3_path_out = 8 * C_nand3_gate_blk;
1078 } else if (blk->number_input_addr_bits == 9) {
1079 //8 + 8 + 8 NAND3 gates three 3-8 decoder
1080 num_buffers_driving_8_nand3_load = 9;
1081 c_load_nand3_path_out = 8 * C_nand3_gate_blk;
1082 }
1083 }
1165
1084
1166 if ((blk->flag_two_unique_paths) ||
1167 (blk->number_inputs_L1_gate == 2) ||
1168 (number_input_addr_bits == 0) ||
1169 ((way_select)&&(dec->num_in_signals == 2)))
1170 { //this means that way_select is driving NAND2 in decoder.
1171 width_nand2_path_n[0] = g_tp.min_w_nmos_;
1172 width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
1173 F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
1174 number_gates_nand2_path = logical_effort(
1175 min_number_gates,
1176 1,
1177 F,
1178 width_nand2_path_n,
1179 width_nand2_path_p,
1180 c_load_nand2_path_out,
1181 p_to_n_sz_ratio,
1182 is_dram_, false, g_tp.max_w_nmos_);
1183 }
1085 if ((blk->flag_two_unique_paths) ||
1086 (blk->number_inputs_L1_gate == 2) ||
1087 (number_input_addr_bits == 0) ||
1088 ((way_select) && (dec->num_in_signals == 2))) {
1089 //this means that way_select is driving NAND2 in decoder.
1090 width_nand2_path_n[0] = g_tp.min_w_nmos_;
1091 width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
1092 F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
1093 number_gates_nand2_path = logical_effort(
1094 min_number_gates,
1095 1,
1096 F,
1097 width_nand2_path_n,
1098 width_nand2_path_p,
1099 c_load_nand2_path_out,
1100 p_to_n_sz_ratio,
1101 is_dram_, false, g_tp.max_w_nmos_);
1102 }
1184
1103
1185 if ((blk->flag_two_unique_paths) ||
1186 (blk->number_inputs_L1_gate == 3) ||
1187 ((way_select)&&(dec->num_in_signals == 3)))
1188 { //this means that way_select is driving NAND3 in decoder.
1189 width_nand3_path_n[0] = g_tp.min_w_nmos_;
1190 width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
1191 F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
1192 number_gates_nand3_path = logical_effort(
1193 min_number_gates,
1194 1,
1195 F,
1196 width_nand3_path_n,
1197 width_nand3_path_p,
1198 c_load_nand3_path_out,
1199 p_to_n_sz_ratio,
1200 is_dram_, false, g_tp.max_w_nmos_);
1104 if ((blk->flag_two_unique_paths) ||
1105 (blk->number_inputs_L1_gate == 3) ||
1106 ((way_select) && (dec->num_in_signals == 3))) {
1107 //this means that way_select is driving NAND3 in decoder.
1108 width_nand3_path_n[0] = g_tp.min_w_nmos_;
1109 width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
1110 F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
1111 number_gates_nand3_path = logical_effort(
1112 min_number_gates,
1113 1,
1114 F,
1115 width_nand3_path_n,
1116 width_nand3_path_p,
1117 c_load_nand3_path_out,
1118 p_to_n_sz_ratio,
1119 is_dram_, false, g_tp.max_w_nmos_);
1120 }
1201 }
1121 }
1202 }
1203}
1204
1205
1206
1122}
1123
1124
1125
1207void PredecBlkDrv::compute_area()
1208{
1209 double area_nand2_path = 0;
1210 double area_nand3_path = 0;
1211 double leak_nand2_path = 0;
1212 double leak_nand3_path = 0;
1213 double gate_leak_nand2_path = 0;
1214 double gate_leak_nand3_path = 0;
1126void PredecBlkDrv::compute_area() {
1127 double area_nand2_path = 0;
1128 double area_nand3_path = 0;
1129 double leak_nand2_path = 0;
1130 double leak_nand3_path = 0;
1131 double gate_leak_nand2_path = 0;
1132 double gate_leak_nand3_path = 0;
1215
1133
1216 if (flag_driver_exists)
1217 { // first check whether a predecoder block driver is needed
1218 for (int i = 0; i < number_gates_nand2_path; ++i)
1219 {
1220 area_nand2_path += compute_gate_area(INV, 1, width_nand2_path_p[i], width_nand2_path_n[i], g_tp.cell_h_def);
1221 leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
1222 gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
1223 }
1224 area_nand2_path *= (num_buffers_driving_1_nand2_load +
1225 num_buffers_driving_2_nand2_load +
1226 num_buffers_driving_4_nand2_load);
1227 leak_nand2_path *= (num_buffers_driving_1_nand2_load +
1228 num_buffers_driving_2_nand2_load +
1229 num_buffers_driving_4_nand2_load);
1230 gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
1134 if (flag_driver_exists) {
1135 // first check whether a predecoder block driver is needed
1136 for (int i = 0; i < number_gates_nand2_path; ++i) {
1137 area_nand2_path +=
1138 compute_gate_area(INV, 1, width_nand2_path_p[i],
1139 width_nand2_path_n[i], g_tp.cell_h_def);
1140 leak_nand2_path +=
1141 cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
1142 1, inv, is_dram_);
1143 gate_leak_nand2_path +=
1144 cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
1145 1, inv, is_dram_);
1146 }
1147 area_nand2_path *= (num_buffers_driving_1_nand2_load +
1231 num_buffers_driving_2_nand2_load +
1232 num_buffers_driving_4_nand2_load);
1148 num_buffers_driving_2_nand2_load +
1149 num_buffers_driving_4_nand2_load);
1150 leak_nand2_path *= (num_buffers_driving_1_nand2_load +
1151 num_buffers_driving_2_nand2_load +
1152 num_buffers_driving_4_nand2_load);
1153 gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
1154 num_buffers_driving_2_nand2_load +
1155 num_buffers_driving_4_nand2_load);
1233
1156
1234 for (int i = 0; i < number_gates_nand3_path; ++i)
1235 {
1236 area_nand3_path += compute_gate_area(INV, 1, width_nand3_path_p[i], width_nand3_path_n[i], g_tp.cell_h_def);
1237 leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
1238 gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
1239 }
1240 area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1241 leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1242 gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1157 for (int i = 0; i < number_gates_nand3_path; ++i) {
1158 area_nand3_path +=
1159 compute_gate_area(INV, 1, width_nand3_path_p[i],
1160 width_nand3_path_n[i], g_tp.cell_h_def);
1161 leak_nand3_path +=
1162 cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
1163 1, inv, is_dram_);
1164 gate_leak_nand3_path +=
1165 cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
1166 1, inv, is_dram_);
1167 }
1168 area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1169 leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1170 gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1243
1171
1244 power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
1245 power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
1246 power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
1247 power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
1248 area.set_area(area_nand2_path + area_nand3_path);
1249 }
1172 power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
1173 power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
1174 power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
1175 power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
1176 area.set_area(area_nand2_path + area_nand3_path);
1177 }
1250}
1251
1252
1253
1254pair<double, double> PredecBlkDrv::compute_delays(
1255 double inrisetime_nand2_path,
1178}
1179
1180
1181
1182pair<double, double> PredecBlkDrv::compute_delays(
1183 double inrisetime_nand2_path,
1256 double inrisetime_nand3_path)
1257{
1258 pair<double, double> ret_val;
1259 ret_val.first = 0; // outrisetime_nand2_path
1260 ret_val.second = 0; // outrisetime_nand3_path
1261 int i;
1262 double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
1263 double Vdd = g_tp.peri_global.Vdd;
1184 double inrisetime_nand3_path) {
1185 pair<double, double> ret_val;
1186 ret_val.first = 0; // outrisetime_nand2_path
1187 ret_val.second = 0; // outrisetime_nand3_path
1188 int i;
1189 double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
1190 double Vdd = g_tp.peri_global.Vdd;
1264
1191
1265 if (flag_driver_exists)
1266 {
1267 for (i = 0; i < number_gates_nand2_path - 1; ++i)
1268 {
1269 rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
1270 c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
1271 c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1272 drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1273 tf = rd * (c_intrinsic + c_gate_load);
1274 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
1275 delay_nand2_path += this_delay;
1276 inrisetime_nand2_path = this_delay / (1.0 - 0.5);
1277 power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
1278 }
1192 if (flag_driver_exists) {
1193 for (i = 0; i < number_gates_nand2_path - 1; ++i) {
1194 rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
1195 c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
1196 c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1197 drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1198 tf = rd * (c_intrinsic + c_gate_load);
1199 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
1200 delay_nand2_path += this_delay;
1201 inrisetime_nand2_path = this_delay / (1.0 - 0.5);
1202 power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
1203 }
1279
1204
1280 // Final inverter drives the predecoder block or the decoder output load
1281 if (number_gates_nand2_path != 0)
1282 {
1283 i = number_gates_nand2_path - 1;
1284 rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
1285 c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1286 drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1287 c_load = c_load_nand2_path_out;
1288 tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out*c_load/ 2;
1289 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
1290 delay_nand2_path += this_delay;
1291 ret_val.first = this_delay / (1.0 - 0.5);
1292 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
1205 // Final inverter drives the predecoder block or the decoder output load
1206 if (number_gates_nand2_path != 0) {
1207 i = number_gates_nand2_path - 1;
1208 rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
1209 c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1210 drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1211 c_load = c_load_nand2_path_out;
1212 tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out * c_load / 2;
1213 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
1214 delay_nand2_path += this_delay;
1215 ret_val.first = this_delay / (1.0 - 0.5);
1216 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
1293// cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <<endl;
1217// cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <<endl;
1294 }
1218 }
1295
1219
1296 for (i = 0; i < number_gates_nand3_path - 1; ++i)
1297 {
1298 rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
1299 c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
1300 c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1301 drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1302 tf = rd * (c_intrinsic + c_gate_load);
1303 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
1304 delay_nand3_path += this_delay;
1305 inrisetime_nand3_path = this_delay / (1.0 - 0.5);
1306 power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
1307 }
1220 for (i = 0; i < number_gates_nand3_path - 1; ++i) {
1221 rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
1222 c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
1223 c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1224 drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1225 tf = rd * (c_intrinsic + c_gate_load);
1226 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
1227 delay_nand3_path += this_delay;
1228 inrisetime_nand3_path = this_delay / (1.0 - 0.5);
1229 power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
1230 }
1308
1231
1309 // Final inverter drives the predecoder block or the decoder output load
1310 if (number_gates_nand3_path != 0)
1311 {
1312 i = number_gates_nand3_path - 1;
1313 rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
1314 c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1315 drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1316 c_load = c_load_nand3_path_out;
1317 tf = rd*(c_intrinsic + c_load) + r_load_nand3_path_out*c_load / 2;
1318 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
1319 delay_nand3_path += this_delay;
1320 ret_val.second = this_delay / (1.0 - 0.5);
1321 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
1232 // Final inverter drives the predecoder block or the decoder output load
1233 if (number_gates_nand3_path != 0) {
1234 i = number_gates_nand3_path - 1;
1235 rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
1236 c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1237 drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1238 c_load = c_load_nand3_path_out;
1239 tf = rd * (c_intrinsic + c_load) + r_load_nand3_path_out * c_load / 2;
1240 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
1241 delay_nand3_path += this_delay;
1242 ret_val.second = this_delay / (1.0 - 0.5);
1243 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
1244 }
1322 }
1245 }
1323 }
1324 return ret_val;
1246 return ret_val;
1325}
1326
1327
1247}
1248
1249
1328double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir)
1329{
1330 return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
1331 num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
1250double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir) {
1251 return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
1252 num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
1332}
1333
1334
1335
1336Predec::Predec(
1337 PredecBlkDrv * drv1_,
1338 PredecBlkDrv * drv2_)
1253}
1254
1255
1256
1257Predec::Predec(
1258 PredecBlkDrv * drv1_,
1259 PredecBlkDrv * drv2_)
1339:blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_)
1340{
1341 driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
1342 drv1->power_nand3_path.readOp.leakage +
1343 drv2->power_nand2_path.readOp.leakage +
1344 drv2->power_nand3_path.readOp.leakage;
1345 block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
1346 blk1->power_nand3_path.readOp.leakage +
1347 blk1->power_L2.readOp.leakage +
1348 blk2->power_nand2_path.readOp.leakage +
1349 blk2->power_nand3_path.readOp.leakage +
1350 blk2->power_L2.readOp.leakage;
1351 power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
1260 : blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) {
1261 driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
1262 drv1->power_nand3_path.readOp.leakage +
1263 drv2->power_nand2_path.readOp.leakage +
1264 drv2->power_nand3_path.readOp.leakage;
1265 block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
1266 blk1->power_nand3_path.readOp.leakage +
1267 blk1->power_L2.readOp.leakage +
1268 blk2->power_nand2_path.readOp.leakage +
1269 blk2->power_nand3_path.readOp.leakage +
1270 blk2->power_L2.readOp.leakage;
1271 power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
1352
1272
1353 driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
1354 drv1->power_nand3_path.readOp.gate_leakage +
1355 drv2->power_nand2_path.readOp.gate_leakage +
1356 drv2->power_nand3_path.readOp.gate_leakage;
1357 block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
1358 blk1->power_nand3_path.readOp.gate_leakage +
1359 blk1->power_L2.readOp.gate_leakage +
1360 blk2->power_nand2_path.readOp.gate_leakage +
1361 blk2->power_nand3_path.readOp.gate_leakage +
1362 blk2->power_L2.readOp.gate_leakage;
1363 power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
1273 driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
1274 drv1->power_nand3_path.readOp.gate_leakage +
1275 drv2->power_nand2_path.readOp.gate_leakage +
1276 drv2->power_nand3_path.readOp.gate_leakage;
1277 block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
1278 blk1->power_nand3_path.readOp.gate_leakage +
1279 blk1->power_L2.readOp.gate_leakage +
1280 blk2->power_nand2_path.readOp.gate_leakage +
1281 blk2->power_nand3_path.readOp.gate_leakage +
1282 blk2->power_L2.readOp.gate_leakage;
1283 power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
1364}
1365
1366void PredecBlkDrv::leakage_feedback(double temperature)
1367{
1368 double leak_nand2_path = 0;
1369 double leak_nand3_path = 0;
1370 double gate_leak_nand2_path = 0;
1371 double gate_leak_nand3_path = 0;

--- 22 unchanged lines hidden (view full) ---

1394
1395 power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
1396 power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
1397 power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
1398 power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
1399 }
1400}
1401
1284}
1285
1286void PredecBlkDrv::leakage_feedback(double temperature)
1287{
1288 double leak_nand2_path = 0;
1289 double leak_nand3_path = 0;
1290 double gate_leak_nand2_path = 0;
1291 double gate_leak_nand3_path = 0;

--- 22 unchanged lines hidden (view full) ---

1314
1315 power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
1316 power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
1317 power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
1318 power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
1319 }
1320}
1321
1402double Predec::compute_delays(double inrisetime)
1403{
1404 // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
1405 pair<double, double> tmp_pair1, tmp_pair2;
1406 tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
1407 tmp_pair1 = blk1->compute_delays(tmp_pair1);
1408 tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
1409 tmp_pair2 = blk2->compute_delays(tmp_pair2);
1410 tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
1322double Predec::compute_delays(double inrisetime) {
1323 // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
1324 pair<double, double> tmp_pair1, tmp_pair2;
1325 tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
1326 tmp_pair1 = blk1->compute_delays(tmp_pair1);
1327 tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
1328 tmp_pair2 = blk2->compute_delays(tmp_pair2);
1329 tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
1411
1330
1412 driver_power.readOp.dynamic =
1413 drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
1414 drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
1415 drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
1416 drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
1331 driver_power.readOp.dynamic =
1332 drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
1333 drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
1334 drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
1335 drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
1417
1336
1418 block_power.readOp.dynamic =
1419 blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
1420 blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
1421 blk1->power_L2.readOp.dynamic +
1422 blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
1423 blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
1424 blk2->power_L2.readOp.dynamic;
1337 block_power.readOp.dynamic =
1338 blk1->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path +
1339 blk1->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path +
1340 blk1->power_L2.readOp.dynamic +
1341 blk2->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path +
1342 blk2->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path +
1343 blk2->power_L2.readOp.dynamic;
1425
1344
1426 power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
1345 power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
1427
1346
1428 delay = tmp_pair1.first;
1429 return tmp_pair1.second;
1347 delay = tmp_pair1.first;
1348 return tmp_pair1.second;
1430}
1431
1349}
1350
1432
1433void Predec::leakage_feedback(double temperature)
1434{
1435 drv1->leakage_feedback(temperature);
1436 drv2->leakage_feedback(temperature);
1437 blk1->leakage_feedback(temperature);
1438 blk2->leakage_feedback(temperature);
1439
1440 driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +

--- 19 unchanged lines hidden (view full) ---

1460 blk2->power_nand3_path.readOp.gate_leakage +
1461 blk2->power_L2.readOp.gate_leakage;
1462 power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
1463}
1464
1465// returns <delay, risetime>
1466pair<double, double> Predec::get_max_delay_before_decoder(
1467 pair<double, double> input_pair1,
1351void Predec::leakage_feedback(double temperature)
1352{
1353 drv1->leakage_feedback(temperature);
1354 drv2->leakage_feedback(temperature);
1355 blk1->leakage_feedback(temperature);
1356 blk2->leakage_feedback(temperature);
1357
1358 driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +

--- 19 unchanged lines hidden (view full) ---

1378 blk2->power_nand3_path.readOp.gate_leakage +
1379 blk2->power_L2.readOp.gate_leakage;
1380 power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
1381}
1382
1383// returns <delay, risetime>
1384pair<double, double> Predec::get_max_delay_before_decoder(
1385 pair<double, double> input_pair1,
1468 pair input_pair2)
1469{
1470 pair<double, double> ret_val;
1471 double delay;
1386 pair<double, double> input_pair2) {
1387 pair<double, double> ret_val;
1388 double delay;
1472
1389
1473 delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
1474 ret_val.first = delay;
1475 ret_val.second = input_pair1.first;
1476 delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
1477 if (ret_val.first < delay)
1478 {
1390 delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
1479 ret_val.first = delay;
1391 ret_val.first = delay;
1480 ret_val.second = input_pair1.second;
1481 }
1482 delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
1483 if (ret_val.first < delay)
1484 {
1485 ret_val.first = delay;
1486 ret_val.second = input_pair2.first;
1487 }
1488 delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
1489 if (ret_val.first < delay)
1490 {
1491 ret_val.first = delay;
1492 ret_val.second = input_pair2.second;
1493 }
1392 ret_val.second = input_pair1.first;
1393 delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
1394 if (ret_val.first < delay) {
1395 ret_val.first = delay;
1396 ret_val.second = input_pair1.second;
1397 }
1398 delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
1399 if (ret_val.first < delay) {
1400 ret_val.first = delay;
1401 ret_val.second = input_pair2.first;
1402 }
1403 delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
1404 if (ret_val.first < delay) {
1405 ret_val.first = delay;
1406 ret_val.second = input_pair2.second;
1407 }
1494
1408
1495 return ret_val;
1409 return ret_val;
1496}
1497
1498
1499
1410}
1411
1412
1413
1500Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram)
1501:number_gates(0),
1502 min_number_gates(2),
1503 c_gate_load(c_gate_load_),
1504 c_wire_load(c_wire_load_),
1505 r_wire_load(r_wire_load_),
1506 delay(0),
1507 power(),
1508 is_dram_(is_dram)
1509{
1510 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
1511 {
1512 width_n[i] = 0;
1513 width_p[i] = 0;
1514 }
1414Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_,
1415 bool is_dram)
1416 : number_gates(0),
1417 min_number_gates(2),
1418 c_gate_load(c_gate_load_),
1419 c_wire_load(c_wire_load_),
1420 r_wire_load(r_wire_load_),
1421 delay(0),
1422 power(),
1423 is_dram_(is_dram) {
1424 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
1425 width_n[i] = 0;
1426 width_p[i] = 0;
1427 }
1515
1428
1516 compute_widths();
1429 compute_widths();
1517}
1518
1519
1430}
1431
1432
1520void Driver::compute_widths()
1521{
1522 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
1523 double c_load = c_gate_load + c_wire_load;
1524 width_n[0] = g_tp.min_w_nmos_;
1525 width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
1433void Driver::compute_widths() {
1434 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
1435 double c_load = c_gate_load + c_wire_load;
1436 width_n[0] = g_tp.min_w_nmos_;
1437 width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
1526
1438
1527 double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
1528 number_gates = logical_effort(
1529 min_number_gates,
1530 1,
1531 F,
1532 width_n,
1533 width_p,
1534 c_load,
1535 p_to_n_sz_ratio,
1536 is_dram_, false,
1537 g_tp.max_w_nmos_);
1439 double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
1440 number_gates = logical_effort(
1441 min_number_gates,
1442 1,
1443 F,
1444 width_n,
1445 width_p,
1446 c_load,
1447 p_to_n_sz_ratio,
1448 is_dram_, false,
1449 g_tp.max_w_nmos_);
1538}
1539
1540
1541
1450}
1451
1452
1453
1542double Driver::compute_delay(double inrisetime)
1543{
1544 int i;
1545 double rd, c_load, c_intrinsic, tf;
1546 double this_delay = 0;
1454double Driver::compute_delay(double inrisetime) {
1455 int i;
1456 double rd, c_load, c_intrinsic, tf;
1457 double this_delay = 0;
1547
1458
1548 for (i = 0; i < number_gates - 1; ++i)
1549 {
1459 for (i = 0; i < number_gates - 1; ++i) {
1460 rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
1461 c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
1462 c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1463 drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1464 tf = rd * (c_intrinsic + c_load);
1465 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1466 delay += this_delay;
1467 inrisetime = this_delay / (1.0 - 0.5);
1468 power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
1469 g_tp.peri_global.Vdd;
1470 power.readOp.leakage +=
1471 cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
1472 g_tp.peri_global.Vdd;
1473 power.readOp.gate_leakage +=
1474 cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
1475 g_tp.peri_global.Vdd;
1476 }
1477
1478 i = number_gates - 1;
1479 c_load = c_gate_load + c_wire_load;
1550 rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
1480 rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
1551 c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
1552 c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1481 c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1553 drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1554 tf = rd * (c_intrinsic + c_load);
1482 drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1483 tf = rd * (c_intrinsic + c_load) + r_wire_load *
1484 (c_wire_load / 2 + c_gate_load);
1555 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1556 delay += this_delay;
1485 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1486 delay += this_delay;
1557 inrisetime = this_delay / (1.0 - 0.5);
1558 power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1559 power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd;
1560 power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
1561 }
1487 power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
1488 g_tp.peri_global.Vdd;
1489 power.readOp.leakage +=
1490 cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
1491 g_tp.peri_global.Vdd;
1492 power.readOp.gate_leakage +=
1493 cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
1494 g_tp.peri_global.Vdd;
1562
1495
1563 i = number_gates - 1;
1564 c_load = c_gate_load + c_wire_load;
1565 rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
1566 c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1567 drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1568 tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load);
1569 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1570 delay += this_delay;
1571 power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1572 power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd;
1573 power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
1574
1575 return this_delay / (1.0 - 0.5);
1496 return this_delay / (1.0 - 0.5);
1576}
1577
1497}
1498