decoder.cc revision 10234
1/*****************************************************************************
2 *                                McPAT/CACTI
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
6 *                          All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 ***************************************************************************/
32
33
34
35#include <cassert>
36#include <cmath>
37#include <iostream>
38
39#include "area.h"
40#include "decoder.h"
41#include "parameter.h"
42
43using namespace std;
44
45
46Decoder::Decoder(
47    int    _num_dec_signals,
48    bool   flag_way_select,
49    double _C_ld_dec_out,
50    double _R_wire_dec_out,
51    bool   fully_assoc_,
52    bool   is_dram_,
53    bool   is_wl_tr_,
54    const  Area & cell_)
55        : exist(false),
56        C_ld_dec_out(_C_ld_dec_out),
57        R_wire_dec_out(_R_wire_dec_out),
58        num_gates(0), num_gates_min(2),
59        delay(0),
60        //power(),
61        fully_assoc(fully_assoc_), is_dram(is_dram_),
62        is_wl_tr(is_wl_tr_), cell(cell_) {
63
64    for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
65        w_dec_n[i] = 0;
66        w_dec_p[i] = 0;
67    }
68
69    /*
70     * _num_dec_signals is the number of decoded signal as output
71     * num_addr_bits_dec is the number of signal to be decoded
72     * as the decoders input.
73     */
74    int num_addr_bits_dec = _log2(_num_dec_signals);
75
76    if (num_addr_bits_dec < 4) {
77        if (flag_way_select) {
78            exist = true;
79            num_in_signals = 2;
80        } else {
81            num_in_signals = 0;
82        }
83    } else {
84        exist = true;
85
86        if (flag_way_select) {
87            num_in_signals = 3;
88        } else {
89            num_in_signals = 2;
90        }
91    }
92
93    assert(cell.h > 0);
94    assert(cell.w > 0);
95    // the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
96    //area.h = 4 * cell.h;
97    area.h = g_tp.h_dec * cell.h;
98
99    compute_widths();
100    compute_area();
101}
102
103
104
105void Decoder::compute_widths() {
106    double F;
107    double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
108    double gnand2     = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
109    double gnand3     = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
110
111    if (exist) {
112        if (num_in_signals == 2 || fully_assoc) {
113            w_dec_n[0] = 2 * g_tp.min_w_nmos_;
114            w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
115            F = gnand2;
116        } else {
117            w_dec_n[0] = 3 * g_tp.min_w_nmos_;
118            w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
119            F = gnand3;
120        }
121
122        F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
123                             gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
124        num_gates = logical_effort(
125                        num_gates_min,
126                        num_in_signals == 2 ? gnand2 : gnand3,
127                        F,
128                        w_dec_n,
129                        w_dec_p,
130                        C_ld_dec_out,
131                        p_to_n_sz_ratio,
132                        is_dram,
133                        is_wl_tr,
134                        g_tp.max_w_nmos_dec);
135    }
136}
137
138
139
140void Decoder::compute_area() {
141    double cumulative_area = 0;
142    double cumulative_curr = 0;  // cumulative leakage current
143    double cumulative_curr_Ig = 0;  // cumulative leakage current
144
145    if (exist) { // First check if this decoder exists
146        if (num_in_signals == 2) {
147            cumulative_area =
148                compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
149            cumulative_curr =
150                cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
151            cumulative_curr_Ig =
152                cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
153        } else if (num_in_signals == 3) {
154            cumulative_area =
155                compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
156            cumulative_curr =
157                cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
158            cumulative_curr_Ig =
159                cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
160        }
161
162        for (int i = 1; i < num_gates; i++) {
163            cumulative_area +=
164                compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
165            cumulative_curr +=
166                cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
167            cumulative_curr_Ig =
168                cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
169        }
170        power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
171        power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
172
173        area.w = (cumulative_area / area.h);
174    }
175}
176
177
178
179double Decoder::compute_delays(double inrisetime) {
180    if (exist) {
181        double ret_val = 0;  // outrisetime
182        int    i;
183        double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
184        double Vdd = g_tp.peri_global.Vdd;
185
186        if ((is_wl_tr) && (is_dram)) {
187            Vpp = g_tp.vpp;
188        } else if (is_wl_tr) {
189            Vpp = g_tp.sram_cell.Vdd;
190        } else {
191            Vpp = g_tp.peri_global.Vdd;
192        }
193
194        // first check whether a decoder is required at all
195        rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
196        c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
197        c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
198                      drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
199        tf = rd * (c_intrinsic + c_load);
200        this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
201        delay += this_delay;
202        inrisetime = this_delay / (1.0 - 0.5);
203        power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
204
205        for (i = 1; i < num_gates - 1; ++i) {
206            rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
207            c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
208            c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
209                          drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
210            tf = rd * (c_intrinsic + c_load);
211            this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
212            delay += this_delay;
213            inrisetime = this_delay / (1.0 - 0.5);
214            power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
215        }
216
217        // add delay of final inverter that drives the wordline
218        i = num_gates - 1;
219        c_load = C_ld_dec_out;
220        rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
221        c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
222                      drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
223        tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2;
224        this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
225        delay  += this_delay;
226        ret_val = this_delay / (1.0 - 0.5);
227        power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
228
229        return ret_val;
230    } else {
231        return 0.0;
232    }
233}
234
235void Decoder::leakage_feedback(double temperature)
236{
237  double cumulative_curr = 0;  // cumulative leakage current
238  double cumulative_curr_Ig = 0;  // cumulative leakage current
239
240  if (exist)
241  { // First check if this decoder exists
242    if (num_in_signals == 2)
243    {
244      cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
245      cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
246    }
247    else if (num_in_signals == 3)
248    {
249      cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
250      cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
251    }
252
253    for (int i = 1; i < num_gates; i++)
254    {
255      cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
256      cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
257    }
258
259    power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
260    power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
261  }
262}
263
264PredecBlk::PredecBlk(
265    int    num_dec_signals,
266    Decoder * dec_,
267    double C_wire_predec_blk_out,
268    double R_wire_predec_blk_out_,
269    int    num_dec_per_predec,
270    bool   is_dram,
271    bool   is_blk1)
272    : dec(dec_),
273        exist(false),
274        number_input_addr_bits(0),
275        C_ld_predec_blk_out(0),
276        R_wire_predec_blk_out(0),
277        branch_effort_nand2_gate_output(1),
278        branch_effort_nand3_gate_output(1),
279        flag_two_unique_paths(false),
280        flag_L2_gate(0),
281        number_inputs_L1_gate(0),
282        number_gates_L1_nand2_path(0),
283        number_gates_L1_nand3_path(0),
284        number_gates_L2(0),
285        min_number_gates_L1(2),
286        min_number_gates_L2(2),
287        num_L1_active_nand2_path(0),
288        num_L1_active_nand3_path(0),
289        delay_nand2_path(0),
290        delay_nand3_path(0),
291        power_nand2_path(),
292        power_nand3_path(),
293        power_L2(),
294        is_dram_(is_dram) {
295    int    branch_effort_predec_out;
296    double C_ld_dec_gate;
297    int    num_addr_bits_dec = _log2(num_dec_signals);
298    int    blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2;
299    int    blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits;
300
301    w_L1_nand2_n[0] = 0;
302    w_L1_nand2_p[0] = 0;
303    w_L1_nand3_n[0] = 0;
304    w_L1_nand3_p[0] = 0;
305
306    if (is_blk1 == true) {
307        if (num_addr_bits_dec <= 0) {
308            return;
309        } else if (num_addr_bits_dec < 4) {
310            // Just one predecoder block is required with NAND2 gates. No decoder required.
311            // The first level of predecoding directly drives the decoder output load
312            exist = true;
313            number_input_addr_bits = num_addr_bits_dec;
314            R_wire_predec_blk_out = dec->R_wire_dec_out;
315            C_ld_predec_blk_out = dec->C_ld_dec_out;
316        } else {
317            exist = true;
318            number_input_addr_bits   = blk1_num_input_addr_bits;
319            branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
320            C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
321            R_wire_predec_blk_out = R_wire_predec_blk_out_;
322            C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
323        }
324    } else {
325        if (num_addr_bits_dec >= 4) {
326            exist = true;
327            number_input_addr_bits   = blk2_num_input_addr_bits;
328            branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
329            C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
330            R_wire_predec_blk_out = R_wire_predec_blk_out_;
331            C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
332        }
333    }
334
335    compute_widths();
336    compute_area();
337}
338
339
340
341void PredecBlk::compute_widths() {
342    double F, c_load_nand3_path, c_load_nand2_path;
343    double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
344    double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
345    double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
346
347    if (exist == false) return;
348
349
350    switch (number_input_addr_bits) {
351    case 1:
352        flag_two_unique_paths           = false;
353        number_inputs_L1_gate           = 2;
354        flag_L2_gate                    = 0;
355        break;
356    case 2:
357        flag_two_unique_paths           = false;
358        number_inputs_L1_gate           = 2;
359        flag_L2_gate                    = 0;
360        break;
361    case 3:
362        flag_two_unique_paths           = false;
363        number_inputs_L1_gate           = 3;
364        flag_L2_gate                    = 0;
365        break;
366    case 4:
367        flag_two_unique_paths           = false;
368        number_inputs_L1_gate           = 2;
369        flag_L2_gate                    = 2;
370        branch_effort_nand2_gate_output = 4;
371        break;
372    case 5:
373        flag_two_unique_paths           = true;
374        flag_L2_gate                    = 2;
375        branch_effort_nand2_gate_output = 8;
376        branch_effort_nand3_gate_output = 4;
377        break;
378    case 6:
379        flag_two_unique_paths           = false;
380        number_inputs_L1_gate           = 3;
381        flag_L2_gate                    = 2;
382        branch_effort_nand3_gate_output = 8;
383        break;
384    case 7:
385        flag_two_unique_paths           = true;
386        flag_L2_gate                    = 3;
387        branch_effort_nand2_gate_output = 32;
388        branch_effort_nand3_gate_output = 16;
389        break;
390    case 8:
391        flag_two_unique_paths           = true;
392        flag_L2_gate                    = 3;
393        branch_effort_nand2_gate_output = 64;
394        branch_effort_nand3_gate_output = 32;
395        break;
396    case 9:
397        flag_two_unique_paths           = false;
398        number_inputs_L1_gate           = 3;
399        flag_L2_gate                    = 3;
400        branch_effort_nand3_gate_output = 64;
401        break;
402    default:
403        assert(0);
404        break;
405    }
406
407    // find the number of gates and sizing in second level of predecoder (if there is a second level)
408    if (flag_L2_gate) {
409        if (flag_L2_gate == 2) { // 2nd level is a NAND2 gate
410            w_L2_n[0] = 2 * g_tp.min_w_nmos_;
411            F = gnand2;
412        } else { // 2nd level is a NAND3 gate
413            w_L2_n[0] = 3 * g_tp.min_w_nmos_;
414            F = gnand3;
415        }
416        w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
417        F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
418        number_gates_L2 = logical_effort(
419                              min_number_gates_L2,
420                              flag_L2_gate == 2 ? gnand2 : gnand3,
421                              F,
422                              w_L2_n,
423                              w_L2_p,
424                              C_ld_predec_blk_out,
425                              p_to_n_sz_ratio,
426                              is_dram_, false,
427                              g_tp.max_w_nmos_);
428
429        // Now find the number of gates and widths in first level of predecoder
430        if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
431            // Whenever flag_two_unique_paths is true, it means first level of
432            // decoder employs
433            // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2,
434            // it means
435            // a NAND2 gate is used in the first level of the predecoder
436            c_load_nand2_path = branch_effort_nand2_gate_output *
437                                (gate_C(w_L2_n[0], 0, is_dram_) +
438                                 gate_C(w_L2_p[0], 0, is_dram_));
439            w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
440            w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
441            F = gnand2 * c_load_nand2_path /
442                (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
443                 gate_C(w_L1_nand2_p[0], 0, is_dram_));
444            number_gates_L1_nand2_path = logical_effort(
445                                             min_number_gates_L1,
446                                             gnand2,
447                                             F,
448                                             w_L1_nand2_n,
449                                             w_L1_nand2_p,
450                                             c_load_nand2_path,
451                                             p_to_n_sz_ratio,
452                                             is_dram_, false,
453                                             g_tp.max_w_nmos_);
454        }
455
456        //Now find widths of gates along path in which first gate is a NAND3
457        if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
458            // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
459            // a NAND3 gate is used in the first level of the predecoder
460            c_load_nand3_path = branch_effort_nand3_gate_output *
461                                (gate_C(w_L2_n[0], 0, is_dram_) +
462                                 gate_C(w_L2_p[0], 0, is_dram_));
463            w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
464            w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
465            F = gnand3 * c_load_nand3_path /
466                (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
467                 gate_C(w_L1_nand3_p[0], 0, is_dram_));
468            number_gates_L1_nand3_path = logical_effort(
469                                             min_number_gates_L1,
470                                             gnand3,
471                                             F,
472                                             w_L1_nand3_n,
473                                             w_L1_nand3_p,
474                                             c_load_nand3_path,
475                                             p_to_n_sz_ratio,
476                                             is_dram_, false,
477                                             g_tp.max_w_nmos_);
478        }
479    } else { // find number of gates and widths in first level of predecoder block when there is no second level
480        if (number_inputs_L1_gate == 2) {
481            w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
482            w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
483            F = gnand2 * C_ld_predec_blk_out /
484                (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
485                 gate_C(w_L1_nand2_p[0], 0, is_dram_));
486            number_gates_L1_nand2_path = logical_effort(
487                                             min_number_gates_L1,
488                                             gnand2,
489                                             F,
490                                             w_L1_nand2_n,
491                                             w_L1_nand2_p,
492                                             C_ld_predec_blk_out,
493                                             p_to_n_sz_ratio,
494                                             is_dram_, false,
495                                             g_tp.max_w_nmos_);
496        } else if (number_inputs_L1_gate == 3) {
497            w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
498            w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
499            F = gnand3 * C_ld_predec_blk_out /
500                (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
501                 gate_C(w_L1_nand3_p[0], 0, is_dram_));
502            number_gates_L1_nand3_path = logical_effort(
503                                             min_number_gates_L1,
504                                             gnand3,
505                                             F,
506                                             w_L1_nand3_n,
507                                             w_L1_nand3_p,
508                                             C_ld_predec_blk_out,
509                                             p_to_n_sz_ratio,
510                                             is_dram_, false,
511                                             g_tp.max_w_nmos_);
512        }
513    }
514}
515
516
517
518void PredecBlk::compute_area() {
519    if (exist) { // First check whether a predecoder block is needed
520        int num_L1_nand2 = 0;
521        int num_L1_nand3 = 0;
522        int num_L2 = 0;
523        double tot_area_L1_nand3  = 0;
524        double leak_L1_nand3      = 0;
525        double gate_leak_L1_nand3 = 0;
526
527        double tot_area_L1_nand2  = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def);
528        double leak_L1_nand2      = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
529        double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
530        if (number_inputs_L1_gate != 3) {
531            tot_area_L1_nand3 = 0;
532            leak_L1_nand3 = 0;
533            gate_leak_L1_nand3 = 0;
534        } else {
535            tot_area_L1_nand3  = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
536            leak_L1_nand3      = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
537            gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
538        }
539
540        switch (number_input_addr_bits) {
541        case 1: //2 NAND2 gates
542            num_L1_nand2 = 2;
543            num_L2       = 0;
544            num_L1_active_nand2_path = 1;
545            num_L1_active_nand3_path = 0;
546            break;
547        case 2: //4 NAND2 gates
548            num_L1_nand2 = 4;
549            num_L2       = 0;
550            num_L1_active_nand2_path = 1;
551            num_L1_active_nand3_path = 0;
552            break;
553        case 3: //8 NAND3 gates
554            num_L1_nand3 = 8;
555            num_L2       = 0;
556            num_L1_active_nand2_path = 0;
557            num_L1_active_nand3_path = 1;
558            break;
559        case 4: //4 + 4 NAND2 gates
560            num_L1_nand2 = 8;
561            num_L2       = 16;
562            num_L1_active_nand2_path = 2;
563            num_L1_active_nand3_path = 0;
564            break;
565        case 5: //4 NAND2 gates, 8 NAND3 gates
566            num_L1_nand2 = 4;
567            num_L1_nand3 = 8;
568            num_L2       = 32;
569            num_L1_active_nand2_path = 1;
570            num_L1_active_nand3_path = 1;
571            break;
572        case 6: //8 + 8 NAND3 gates
573            num_L1_nand3 = 16;
574            num_L2       = 64;
575            num_L1_active_nand2_path = 0;
576            num_L1_active_nand3_path = 2;
577            break;
578        case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
579            num_L1_nand2 = 8;
580            num_L1_nand3 = 8;
581            num_L2       = 128;
582            num_L1_active_nand2_path = 2;
583            num_L1_active_nand3_path = 1;
584            break;
585        case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
586            num_L1_nand2 = 4;
587            num_L1_nand3 = 16;
588            num_L2       = 256;
589            num_L1_active_nand2_path = 2;
590            num_L1_active_nand3_path = 2;
591            break;
592        case 9: //8 + 8 + 8 NAND3 gates
593            num_L1_nand3 = 24;
594            num_L2       = 512;
595            num_L1_active_nand2_path = 0;
596            num_L1_active_nand3_path = 3;
597            break;
598        default:
599            break;
600        }
601
602        for (int i = 1; i < number_gates_L1_nand2_path; ++i) {
603            tot_area_L1_nand2  += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
604            leak_L1_nand2      += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
605            gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
606        }
607        tot_area_L1_nand2  *= num_L1_nand2;
608        leak_L1_nand2      *= num_L1_nand2;
609        gate_leak_L1_nand2 *= num_L1_nand2;
610
611        for (int i = 1; i < number_gates_L1_nand3_path; ++i) {
612            tot_area_L1_nand3  += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
613            leak_L1_nand3      += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
614            gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
615        }
616        tot_area_L1_nand3  *= num_L1_nand3;
617        leak_L1_nand3      *= num_L1_nand3;
618        gate_leak_L1_nand3 *= num_L1_nand3;
619
620        double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3;
621        double cumulative_area_L2 = 0.0;
622        double leakage_L2         = 0.0;
623        double gate_leakage_L2    = 0.0;
624
625        if (flag_L2_gate == 2) {
626            cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
627            leakage_L2         = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
628            gate_leakage_L2    = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
629        } else if (flag_L2_gate == 3) {
630            cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
631            leakage_L2         = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
632            gate_leakage_L2    = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
633        }
634
635        for (int i = 1; i < number_gates_L2; ++i) {
636            cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
637            leakage_L2         += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
638            gate_leakage_L2    += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
639        }
640        cumulative_area_L2 *= num_L2;
641        leakage_L2         *= num_L2;
642        gate_leakage_L2    *= num_L2;
643
644        power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
645        power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
646        power_L2.readOp.leakage         = leakage_L2    * g_tp.peri_global.Vdd;
647        area.set_area(cumulative_area_L1 + cumulative_area_L2);
648        power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
649        power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
650        power_L2.readOp.gate_leakage         = gate_leakage_L2    * g_tp.peri_global.Vdd;
651    }
652}
653
654
655
656pair<double, double> PredecBlk::compute_delays(
657    pair<double, double> inrisetime) { // <nand2, nand3>
658    pair<double, double> ret_val;
659    ret_val.first  = 0;  // outrisetime_nand2_path
660    ret_val.second = 0;  // outrisetime_nand3_path
661
662    double inrisetime_nand2_path = inrisetime.first;
663    double inrisetime_nand3_path = inrisetime.second;
664    int    i;
665    double rd, c_load, c_intrinsic, tf, this_delay;
666    double Vdd = g_tp.peri_global.Vdd;
667
668    // TODO: following delay calculation part can be greatly simplified.
669    // first check whether a predecoder block is required
670    if (exist) {
671        //Find delay in first level of predecoder block
672        //First find delay in path
673        if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
674            //First gate is a NAND2 gate
675            rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
676            c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
677            c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
678                          drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
679            tf = rd * (c_intrinsic + c_load);
680            this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
681            delay_nand2_path += this_delay;
682            inrisetime_nand2_path = this_delay / (1.0 - 0.5);
683            power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
684
685            //Add delays of all but the last inverter in the chain
686            for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) {
687                rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
688                c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
689                c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
690                              drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
691                tf = rd * (c_intrinsic + c_load);
692                this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
693                delay_nand2_path += this_delay;
694                inrisetime_nand2_path = this_delay / (1.0 - 0.5);
695                power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
696            }
697
698            //Add delay of the last inverter
699            i = number_gates_L1_nand2_path - 1;
700            rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
701            if (flag_L2_gate) {
702                c_load = branch_effort_nand2_gate_output *
703                    (gate_C(w_L2_n[0], 0, is_dram_) +
704                     gate_C(w_L2_p[0], 0, is_dram_));
705                c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
706                              drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
707                tf = rd * (c_intrinsic + c_load);
708                this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
709                delay_nand2_path += this_delay;
710                inrisetime_nand2_path = this_delay / (1.0 - 0.5);
711                power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
712            } else { //First level directly drives decoder output load
713                c_load = C_ld_predec_blk_out;
714                c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
715                              drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
716                tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
717                this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
718                delay_nand2_path += this_delay;
719                ret_val.first = this_delay / (1.0 - 0.5);
720                power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
721            }
722        }
723
724        if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) {
725            //Check if the number of gates in the first level is more than 1.
726            //First gate is a NAND3 gate
727            rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
728            c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
729            c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
730                          drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
731            tf = rd * (c_intrinsic + c_load);
732            this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
733            delay_nand3_path += this_delay;
734            inrisetime_nand3_path = this_delay / (1.0 - 0.5);
735            power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
736
737            //Add delays of all but the last inverter in the chain
738            for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) {
739                rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
740                c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
741                c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
742                              drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
743                tf = rd * (c_intrinsic + c_load);
744                this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
745                delay_nand3_path += this_delay;
746                inrisetime_nand3_path = this_delay / (1.0 - 0.5);
747                power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
748            }
749
750            //Add delay of the last inverter
751            i = number_gates_L1_nand3_path - 1;
752            rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
753            if (flag_L2_gate) {
754                c_load = branch_effort_nand3_gate_output *
755                    (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0,
756                                                             is_dram_));
757                c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
758                              drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
759                tf = rd * (c_intrinsic + c_load);
760                this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
761                delay_nand3_path += this_delay;
762                inrisetime_nand3_path = this_delay / (1.0 - 0.5);
763                power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
764            } else { //First level directly drives decoder output load
765                c_load = C_ld_predec_blk_out;
766                c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
767                              drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
768                tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
769                this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
770                delay_nand3_path += this_delay;
771                ret_val.second = this_delay / (1.0 - 0.5);
772                power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
773            }
774        }
775
776        // Find delay through second level
777        if (flag_L2_gate) {
778            if (flag_L2_gate == 2) {
779                rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
780                c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
781                c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
782                              drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
783                tf = rd * (c_intrinsic + c_load);
784                this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
785                delay_nand2_path += this_delay;
786                inrisetime_nand2_path = this_delay / (1.0 - 0.5);
787                power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
788            } else { // flag_L2_gate = 3
789                rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
790                c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
791                c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
792                              drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
793                tf = rd * (c_intrinsic + c_load);
794                this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
795                delay_nand3_path += this_delay;
796                inrisetime_nand3_path = this_delay / (1.0 - 0.5);
797                power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
798            }
799
800            for (i = 1; i < number_gates_L2 - 1; ++i) {
801                rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
802                c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
803                c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
804                              drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
805                tf = rd * (c_intrinsic + c_load);
806                this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
807                delay_nand2_path += this_delay;
808                inrisetime_nand2_path = this_delay / (1.0 - 0.5);
809                this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
810                delay_nand3_path += this_delay;
811                inrisetime_nand3_path = this_delay / (1.0 - 0.5);
812                power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
813            }
814
815            //Add delay of final inverter that drives the wordline decoders
816            i = number_gates_L2 - 1;
817            c_load = C_ld_predec_blk_out;
818            rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
819            c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
820                          drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
821            tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
822            this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
823            delay_nand2_path += this_delay;
824            ret_val.first = this_delay / (1.0 - 0.5);
825            this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
826            delay_nand3_path += this_delay;
827            ret_val.second = this_delay / (1.0 - 0.5);
828            power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
829        }
830    }
831
832    delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second;
833    return ret_val;
834}
835
836void PredecBlk::leakage_feedback(double temperature)
837{
838  if (exist)
839  { // First check whether a predecoder block is needed
840    int num_L1_nand2 = 0;
841    int num_L1_nand3 = 0;
842    int num_L2 = 0;
843    double leak_L1_nand3      =0;
844    double gate_leak_L1_nand3 =0;
845
846    double leak_L1_nand2      = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
847    double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
848    if (number_inputs_L1_gate != 3) {
849      leak_L1_nand3 = 0;
850      gate_leak_L1_nand3 =0;
851    }
852    else {
853      leak_L1_nand3      = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
854      gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
855    }
856
857    switch (number_input_addr_bits)
858    {
859      case 1: //2 NAND2 gates
860        num_L1_nand2 = 2;
861        num_L2       = 0;
862        num_L1_active_nand2_path =1;
863        num_L1_active_nand3_path =0;
864        break;
865      case 2: //4 NAND2 gates
866        num_L1_nand2 = 4;
867        num_L2       = 0;
868        num_L1_active_nand2_path =1;
869        num_L1_active_nand3_path =0;
870        break;
871      case 3: //8 NAND3 gates
872        num_L1_nand3 = 8;
873        num_L2       = 0;
874        num_L1_active_nand2_path =0;
875        num_L1_active_nand3_path =1;
876        break;
877      case 4: //4 + 4 NAND2 gates
878        num_L1_nand2 = 8;
879        num_L2       = 16;
880        num_L1_active_nand2_path =2;
881        num_L1_active_nand3_path =0;
882        break;
883      case 5: //4 NAND2 gates, 8 NAND3 gates
884        num_L1_nand2 = 4;
885        num_L1_nand3 = 8;
886        num_L2       = 32;
887        num_L1_active_nand2_path =1;
888        num_L1_active_nand3_path =1;
889        break;
890      case 6: //8 + 8 NAND3 gates
891        num_L1_nand3 = 16;
892        num_L2       = 64;
893        num_L1_active_nand2_path =0;
894        num_L1_active_nand3_path =2;
895        break;
896      case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
897        num_L1_nand2 = 8;
898        num_L1_nand3 = 8;
899        num_L2       = 128;
900        num_L1_active_nand2_path =2;
901        num_L1_active_nand3_path =1;
902        break;
903      case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
904        num_L1_nand2 = 4;
905        num_L1_nand3 = 16;
906        num_L2       = 256;
907        num_L1_active_nand2_path =2;
908        num_L1_active_nand3_path =2;
909        break;
910      case 9: //8 + 8 + 8 NAND3 gates
911        num_L1_nand3 = 24;
912        num_L2       = 512;
913        num_L1_active_nand2_path =0;
914        num_L1_active_nand3_path =3;
915        break;
916      default:
917        break;
918    }
919
920    for (int i = 1; i < number_gates_L1_nand2_path; ++i)
921    {
922      leak_L1_nand2      += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
923      gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
924    }
925    leak_L1_nand2      *= num_L1_nand2;
926    gate_leak_L1_nand2 *= num_L1_nand2;
927
928    for (int i = 1; i < number_gates_L1_nand3_path; ++i)
929    {
930      leak_L1_nand3      += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
931      gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
932    }
933    leak_L1_nand3      *= num_L1_nand3;
934    gate_leak_L1_nand3 *= num_L1_nand3;
935
936    double leakage_L2         = 0.0;
937    double gate_leakage_L2    = 0.0;
938
939    if (flag_L2_gate == 2)
940    {
941      leakage_L2         = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
942      gate_leakage_L2    = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
943    }
944    else if (flag_L2_gate == 3)
945    {
946      leakage_L2         = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
947      gate_leakage_L2    = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
948    }
949
950    for (int i = 1; i < number_gates_L2; ++i)
951    {
952      leakage_L2         += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
953      gate_leakage_L2    += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
954    }
955    leakage_L2         *= num_L2;
956    gate_leakage_L2    *= num_L2;
957
958    power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
959    power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
960    power_L2.readOp.leakage         = leakage_L2    * g_tp.peri_global.Vdd;
961
962    power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
963    power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
964    power_L2.readOp.gate_leakage         = gate_leakage_L2    * g_tp.peri_global.Vdd;
965  }
966}
967
968PredecBlkDrv::PredecBlkDrv(
969    int    way_select_,
970    PredecBlk * blk_,
971    bool   is_dram)
972        : flag_driver_exists(0),
973        number_gates_nand2_path(0),
974        number_gates_nand3_path(0),
975        min_number_gates(2),
976        num_buffers_driving_1_nand2_load(0),
977        num_buffers_driving_2_nand2_load(0),
978        num_buffers_driving_4_nand2_load(0),
979        num_buffers_driving_2_nand3_load(0),
980        num_buffers_driving_8_nand3_load(0),
981        num_buffers_nand3_path(0),
982        c_load_nand2_path_out(0),
983        c_load_nand3_path_out(0),
984        r_load_nand2_path_out(0),
985        r_load_nand3_path_out(0),
986        delay_nand2_path(0),
987        delay_nand3_path(0),
988        power_nand2_path(),
989        power_nand3_path(),
990        blk(blk_), dec(blk->dec),
991        is_dram_(is_dram),
992        way_select(way_select_) {
993    for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
994        width_nand2_path_n[i] = 0;
995        width_nand2_path_p[i] = 0;
996        width_nand3_path_n[i] = 0;
997        width_nand3_path_p[i] = 0;
998    }
999
1000    number_input_addr_bits = blk->number_input_addr_bits;
1001
1002    if (way_select > 1) {
1003        flag_driver_exists     = 1;
1004        number_input_addr_bits = way_select;
1005        if (dec->num_in_signals == 2) {
1006            c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
1007            num_buffers_driving_2_nand2_load = number_input_addr_bits;
1008        } else if (dec->num_in_signals == 3) {
1009            c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
1010            num_buffers_driving_2_nand3_load = number_input_addr_bits;
1011        }
1012    } else if (way_select == 0) {
1013        if (blk->exist) {
1014            flag_driver_exists = 1;
1015        }
1016    }
1017
1018    compute_widths();
1019    compute_area();
1020}
1021
1022
1023
1024void PredecBlkDrv::compute_widths() {
1025    // The predecode block driver accepts as input the address bits from the h-tree network. For
1026    // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
1027    // inversion to generate addrbar and simply treat addrbar as addr.
1028
1029    double F;
1030    double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
1031
1032    if (flag_driver_exists) {
1033        double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
1034        double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
1035
1036        if (way_select == 0) {
1037            if (blk->number_input_addr_bits == 1) {
1038                //2 NAND2 gates
1039                num_buffers_driving_2_nand2_load = 1;
1040                c_load_nand2_path_out            = 2 * C_nand2_gate_blk;
1041            } else if (blk->number_input_addr_bits == 2) {
1042                //4 NAND2 gates  one 2-4 decoder
1043                num_buffers_driving_4_nand2_load = 2;
1044                c_load_nand2_path_out            = 4 * C_nand2_gate_blk;
1045            } else if (blk->number_input_addr_bits == 3) {
1046                //8 NAND3 gates  one 3-8 decoder
1047                num_buffers_driving_8_nand3_load = 3;
1048                c_load_nand3_path_out            = 8 * C_nand3_gate_blk;
1049            } else if (blk->number_input_addr_bits == 4) {
1050                //4 + 4 NAND2 gates two 2-4 decoder
1051                num_buffers_driving_4_nand2_load = 4;
1052                c_load_nand2_path_out            = 4 * C_nand2_gate_blk;
1053            } else if (blk->number_input_addr_bits == 5) {
1054                //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8
1055                //decoder
1056                num_buffers_driving_4_nand2_load = 2;
1057                num_buffers_driving_8_nand3_load = 3;
1058                c_load_nand2_path_out            = 4 * C_nand2_gate_blk;
1059                c_load_nand3_path_out            = 8 * C_nand3_gate_blk;
1060            } else if (blk->number_input_addr_bits == 6) {
1061                //8 + 8 NAND3 gates two 3-8 decoder
1062                num_buffers_driving_8_nand3_load = 6;
1063                c_load_nand3_path_out            = 8 * C_nand3_gate_blk;
1064            } else if (blk->number_input_addr_bits == 7) {
1065                //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8
1066                //decoder
1067                num_buffers_driving_4_nand2_load = 4;
1068                num_buffers_driving_8_nand3_load = 3;
1069                c_load_nand2_path_out            = 4 * C_nand2_gate_blk;
1070                c_load_nand3_path_out            = 8 * C_nand3_gate_blk;
1071            } else if (blk->number_input_addr_bits == 8) {
1072                //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8
1073                //decoder
1074                num_buffers_driving_4_nand2_load = 2;
1075                num_buffers_driving_8_nand3_load = 6;
1076                c_load_nand2_path_out            = 4 * C_nand2_gate_blk;
1077                c_load_nand3_path_out            = 8 * C_nand3_gate_blk;
1078            } else if (blk->number_input_addr_bits == 9) {
1079                //8 + 8 + 8 NAND3 gates three 3-8 decoder
1080                num_buffers_driving_8_nand3_load = 9;
1081                c_load_nand3_path_out            = 8 * C_nand3_gate_blk;
1082            }
1083        }
1084
1085        if ((blk->flag_two_unique_paths) ||
1086                (blk->number_inputs_L1_gate == 2) ||
1087                (number_input_addr_bits == 0) ||
1088                ((way_select) && (dec->num_in_signals == 2))) {
1089            //this means that way_select is driving NAND2 in decoder.
1090            width_nand2_path_n[0] = g_tp.min_w_nmos_;
1091            width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
1092            F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
1093            number_gates_nand2_path = logical_effort(
1094                                          min_number_gates,
1095                                          1,
1096                                          F,
1097                                          width_nand2_path_n,
1098                                          width_nand2_path_p,
1099                                          c_load_nand2_path_out,
1100                                          p_to_n_sz_ratio,
1101                                          is_dram_, false, g_tp.max_w_nmos_);
1102        }
1103
1104        if ((blk->flag_two_unique_paths) ||
1105                (blk->number_inputs_L1_gate == 3) ||
1106                ((way_select) && (dec->num_in_signals == 3))) {
1107            //this means that way_select is driving NAND3 in decoder.
1108            width_nand3_path_n[0] = g_tp.min_w_nmos_;
1109            width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
1110            F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
1111            number_gates_nand3_path = logical_effort(
1112                                          min_number_gates,
1113                                          1,
1114                                          F,
1115                                          width_nand3_path_n,
1116                                          width_nand3_path_p,
1117                                          c_load_nand3_path_out,
1118                                          p_to_n_sz_ratio,
1119                                          is_dram_, false, g_tp.max_w_nmos_);
1120        }
1121    }
1122}
1123
1124
1125
1126void PredecBlkDrv::compute_area() {
1127    double area_nand2_path = 0;
1128    double area_nand3_path = 0;
1129    double leak_nand2_path = 0;
1130    double leak_nand3_path = 0;
1131    double gate_leak_nand2_path = 0;
1132    double gate_leak_nand3_path = 0;
1133
1134    if (flag_driver_exists) {
1135        // first check whether a predecoder block driver is needed
1136        for (int i = 0; i < number_gates_nand2_path; ++i) {
1137            area_nand2_path +=
1138                compute_gate_area(INV, 1, width_nand2_path_p[i],
1139                                  width_nand2_path_n[i], g_tp.cell_h_def);
1140            leak_nand2_path +=
1141                cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
1142                                  1, inv, is_dram_);
1143            gate_leak_nand2_path +=
1144                cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
1145                                1, inv, is_dram_);
1146        }
1147        area_nand2_path *= (num_buffers_driving_1_nand2_load +
1148                            num_buffers_driving_2_nand2_load +
1149                            num_buffers_driving_4_nand2_load);
1150        leak_nand2_path *= (num_buffers_driving_1_nand2_load +
1151                            num_buffers_driving_2_nand2_load +
1152                            num_buffers_driving_4_nand2_load);
1153        gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
1154                                 num_buffers_driving_2_nand2_load +
1155                                 num_buffers_driving_4_nand2_load);
1156
1157        for (int i = 0; i < number_gates_nand3_path; ++i) {
1158            area_nand3_path +=
1159                compute_gate_area(INV, 1, width_nand3_path_p[i],
1160                                  width_nand3_path_n[i], g_tp.cell_h_def);
1161            leak_nand3_path +=
1162                cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
1163                                  1, inv, is_dram_);
1164            gate_leak_nand3_path +=
1165                cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
1166                                1, inv, is_dram_);
1167        }
1168        area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1169        leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1170        gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1171
1172        power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
1173        power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
1174        power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
1175        power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
1176        area.set_area(area_nand2_path + area_nand3_path);
1177    }
1178}
1179
1180
1181
1182pair<double, double> PredecBlkDrv::compute_delays(
1183    double inrisetime_nand2_path,
1184    double inrisetime_nand3_path) {
1185    pair<double, double> ret_val;
1186    ret_val.first  = 0;  // outrisetime_nand2_path
1187    ret_val.second = 0;  // outrisetime_nand3_path
1188    int i;
1189    double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
1190    double Vdd = g_tp.peri_global.Vdd;
1191
1192    if (flag_driver_exists) {
1193        for (i = 0; i < number_gates_nand2_path - 1; ++i) {
1194            rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
1195            c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
1196            c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1197                          drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1198            tf = rd * (c_intrinsic + c_gate_load);
1199            this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
1200            delay_nand2_path += this_delay;
1201            inrisetime_nand2_path = this_delay / (1.0 - 0.5);
1202            power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
1203        }
1204
1205        // Final inverter drives the predecoder block or the decoder output load
1206        if (number_gates_nand2_path != 0) {
1207            i = number_gates_nand2_path - 1;
1208            rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
1209            c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1210                          drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1211            c_load = c_load_nand2_path_out;
1212            tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out * c_load / 2;
1213            this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
1214            delay_nand2_path += this_delay;
1215            ret_val.first = this_delay / (1.0 - 0.5);
1216            power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
1217//      cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <<endl;
1218        }
1219
1220        for (i = 0; i < number_gates_nand3_path - 1; ++i) {
1221            rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
1222            c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
1223            c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1224                          drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1225            tf = rd * (c_intrinsic + c_gate_load);
1226            this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
1227            delay_nand3_path += this_delay;
1228            inrisetime_nand3_path = this_delay / (1.0 - 0.5);
1229            power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
1230        }
1231
1232        // Final inverter drives the predecoder block or the decoder output load
1233        if (number_gates_nand3_path != 0) {
1234            i = number_gates_nand3_path - 1;
1235            rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
1236            c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1237                          drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1238            c_load = c_load_nand3_path_out;
1239            tf = rd * (c_intrinsic + c_load) + r_load_nand3_path_out * c_load / 2;
1240            this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
1241            delay_nand3_path += this_delay;
1242            ret_val.second = this_delay / (1.0 - 0.5);
1243            power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
1244        }
1245    }
1246    return ret_val;
1247}
1248
1249
1250double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir) {
1251    return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
1252            num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
1253}
1254
1255
1256
1257Predec::Predec(
1258    PredecBlkDrv * drv1_,
1259    PredecBlkDrv * drv2_)
1260        : blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) {
1261    driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
1262                                  drv1->power_nand3_path.readOp.leakage +
1263                                  drv2->power_nand2_path.readOp.leakage +
1264                                  drv2->power_nand3_path.readOp.leakage;
1265    block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
1266                                 blk1->power_nand3_path.readOp.leakage +
1267                                 blk1->power_L2.readOp.leakage +
1268                                 blk2->power_nand2_path.readOp.leakage +
1269                                 blk2->power_nand3_path.readOp.leakage +
1270                                 blk2->power_L2.readOp.leakage;
1271    power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
1272
1273    driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
1274                                       drv1->power_nand3_path.readOp.gate_leakage +
1275                                       drv2->power_nand2_path.readOp.gate_leakage +
1276                                       drv2->power_nand3_path.readOp.gate_leakage;
1277    block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
1278                                      blk1->power_nand3_path.readOp.gate_leakage +
1279                                      blk1->power_L2.readOp.gate_leakage +
1280                                      blk2->power_nand2_path.readOp.gate_leakage +
1281                                      blk2->power_nand3_path.readOp.gate_leakage +
1282                                      blk2->power_L2.readOp.gate_leakage;
1283    power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
1284}
1285
1286void PredecBlkDrv::leakage_feedback(double temperature)
1287{
1288  double leak_nand2_path = 0;
1289  double leak_nand3_path = 0;
1290  double gate_leak_nand2_path = 0;
1291  double gate_leak_nand3_path = 0;
1292
1293  if (flag_driver_exists)
1294  { // first check whether a predecoder block driver is needed
1295    for (int i = 0; i < number_gates_nand2_path; ++i)
1296    {
1297      leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
1298      gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
1299    }
1300    leak_nand2_path *= (num_buffers_driving_1_nand2_load +
1301                        num_buffers_driving_2_nand2_load +
1302                        num_buffers_driving_4_nand2_load);
1303    gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
1304                            num_buffers_driving_2_nand2_load +
1305                            num_buffers_driving_4_nand2_load);
1306
1307    for (int i = 0; i < number_gates_nand3_path; ++i)
1308    {
1309      leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
1310      gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
1311    }
1312    leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1313    gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1314
1315    power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
1316    power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
1317    power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
1318    power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
1319  }
1320}
1321
1322double Predec::compute_delays(double inrisetime) {
1323    // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
1324    pair<double, double> tmp_pair1, tmp_pair2;
1325    tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
1326    tmp_pair1 = blk1->compute_delays(tmp_pair1);
1327    tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
1328    tmp_pair2 = blk2->compute_delays(tmp_pair2);
1329    tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
1330
1331    driver_power.readOp.dynamic =
1332        drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
1333        drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
1334        drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
1335        drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
1336
1337    block_power.readOp.dynamic =
1338        blk1->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path +
1339        blk1->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path +
1340        blk1->power_L2.readOp.dynamic +
1341        blk2->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path  +
1342        blk2->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path +
1343        blk2->power_L2.readOp.dynamic;
1344
1345    power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
1346
1347    delay = tmp_pair1.first;
1348    return  tmp_pair1.second;
1349}
1350
1351void Predec::leakage_feedback(double temperature)
1352{
1353  drv1->leakage_feedback(temperature);
1354  drv2->leakage_feedback(temperature);
1355  blk1->leakage_feedback(temperature);
1356  blk2->leakage_feedback(temperature);
1357
1358  driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
1359                                drv1->power_nand3_path.readOp.leakage +
1360                                drv2->power_nand2_path.readOp.leakage +
1361                                drv2->power_nand3_path.readOp.leakage;
1362  block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
1363                               blk1->power_nand3_path.readOp.leakage +
1364                               blk1->power_L2.readOp.leakage +
1365                               blk2->power_nand2_path.readOp.leakage +
1366                               blk2->power_nand3_path.readOp.leakage +
1367                               blk2->power_L2.readOp.leakage;
1368  power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
1369
1370  driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
1371                                  drv1->power_nand3_path.readOp.gate_leakage +
1372                                  drv2->power_nand2_path.readOp.gate_leakage +
1373                                  drv2->power_nand3_path.readOp.gate_leakage;
1374  block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
1375                                 blk1->power_nand3_path.readOp.gate_leakage +
1376                                 blk1->power_L2.readOp.gate_leakage +
1377                                 blk2->power_nand2_path.readOp.gate_leakage +
1378                                 blk2->power_nand3_path.readOp.gate_leakage +
1379                                 blk2->power_L2.readOp.gate_leakage;
1380  power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
1381}
1382
1383// returns <delay, risetime>
1384pair<double, double> Predec::get_max_delay_before_decoder(
1385    pair<double, double> input_pair1,
1386    pair<double, double> input_pair2) {
1387    pair<double, double> ret_val;
1388    double delay;
1389
1390    delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
1391    ret_val.first  = delay;
1392    ret_val.second = input_pair1.first;
1393    delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
1394    if (ret_val.first < delay) {
1395        ret_val.first  = delay;
1396        ret_val.second = input_pair1.second;
1397    }
1398    delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
1399    if (ret_val.first < delay) {
1400        ret_val.first  = delay;
1401        ret_val.second = input_pair2.first;
1402    }
1403    delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
1404    if (ret_val.first < delay) {
1405        ret_val.first  = delay;
1406        ret_val.second = input_pair2.second;
1407    }
1408
1409    return ret_val;
1410}
1411
1412
1413
1414Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_,
1415               bool is_dram)
1416    : number_gates(0),
1417      min_number_gates(2),
1418      c_gate_load(c_gate_load_),
1419      c_wire_load(c_wire_load_),
1420      r_wire_load(r_wire_load_),
1421      delay(0),
1422      power(),
1423      is_dram_(is_dram) {
1424    for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
1425        width_n[i] = 0;
1426        width_p[i] = 0;
1427    }
1428
1429    compute_widths();
1430}
1431
1432
1433void Driver::compute_widths() {
1434    double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
1435    double c_load = c_gate_load + c_wire_load;
1436    width_n[0] = g_tp.min_w_nmos_;
1437    width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
1438
1439    double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
1440    number_gates = logical_effort(
1441                       min_number_gates,
1442                       1,
1443                       F,
1444                       width_n,
1445                       width_p,
1446                       c_load,
1447                       p_to_n_sz_ratio,
1448                       is_dram_, false,
1449                       g_tp.max_w_nmos_);
1450}
1451
1452
1453
1454double Driver::compute_delay(double inrisetime) {
1455    int    i;
1456    double rd, c_load, c_intrinsic, tf;
1457    double this_delay = 0;
1458
1459    for (i = 0; i < number_gates - 1; ++i) {
1460        rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
1461        c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
1462        c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1463                      drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1464        tf = rd * (c_intrinsic + c_load);
1465        this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1466        delay += this_delay;
1467        inrisetime = this_delay / (1.0 - 0.5);
1468        power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
1469            g_tp.peri_global.Vdd;
1470        power.readOp.leakage +=
1471            cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
1472            g_tp.peri_global.Vdd;
1473        power.readOp.gate_leakage +=
1474            cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
1475            g_tp.peri_global.Vdd;
1476    }
1477
1478    i = number_gates - 1;
1479    c_load = c_gate_load + c_wire_load;
1480    rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
1481    c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1482        drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1483    tf = rd * (c_intrinsic + c_load) + r_wire_load *
1484        (c_wire_load / 2 + c_gate_load);
1485    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1486    delay += this_delay;
1487    power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
1488        g_tp.peri_global.Vdd;
1489    power.readOp.leakage +=
1490        cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
1491        g_tp.peri_global.Vdd;
1492    power.readOp.gate_leakage +=
1493        cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
1494        g_tp.peri_global.Vdd;
1495
1496    return this_delay / (1.0 - 0.5);
1497}
1498
1499