decoder.cc revision 10152:52c552138ba1
1/*****************************************************************************
2 *                                McPAT/CACTI
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *                          All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution;
14 * neither the name of the copyright holders nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 *
30 ***************************************************************************/
31
32
33
34#include <cassert>
35#include <cmath>
36#include <iostream>
37
38#include "area.h"
39#include "decoder.h"
40#include "parameter.h"
41
42using namespace std;
43
44
45Decoder::Decoder(
46    int    _num_dec_signals,
47    bool   flag_way_select,
48    double _C_ld_dec_out,
49    double _R_wire_dec_out,
50    bool   fully_assoc_,
51    bool   is_dram_,
52    bool   is_wl_tr_,
53    const  Area & cell_)
54:exist(false),
55  C_ld_dec_out(_C_ld_dec_out),
56  R_wire_dec_out(_R_wire_dec_out),
57  num_gates(0), num_gates_min(2),
58  delay(0),
59  //power(),
60  fully_assoc(fully_assoc_), is_dram(is_dram_),
61  is_wl_tr(is_wl_tr_), cell(cell_)
62{
63
64  for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
65  {
66    w_dec_n[i] = 0;
67    w_dec_p[i] = 0;
68  }
69
70  /*
71   * _num_dec_signals is the number of decoded signal as output
72   * num_addr_bits_dec is the number of signal to be decoded
73   * as the decoders input.
74   */
75  int num_addr_bits_dec = _log2(_num_dec_signals);
76
77  if (num_addr_bits_dec < 4)
78  {
79    if (flag_way_select)
80    {
81      exist = true;
82      num_in_signals = 2;
83    }
84    else
85    {
86      num_in_signals = 0;
87    }
88  }
89  else
90  {
91    exist = true;
92
93    if (flag_way_select)
94    {
95      num_in_signals = 3;
96    }
97    else
98    {
99      num_in_signals = 2;
100    }
101  }
102
103  assert(cell.h>0);
104  assert(cell.w>0);
105  // the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
106  //area.h = 4 * cell.h;
107  area.h = g_tp.h_dec * cell.h;
108
109  compute_widths();
110  compute_area();
111}
112
113
114
115void Decoder::compute_widths()
116{
117  double F;
118  double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
119  double gnand2     = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
120  double gnand3     = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
121
122  if (exist)
123  {
124    if (num_in_signals == 2 || fully_assoc)
125    {
126      w_dec_n[0] = 2 * g_tp.min_w_nmos_;
127      w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
128      F = gnand2;
129    }
130    else
131    {
132      w_dec_n[0] = 3 * g_tp.min_w_nmos_;
133      w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
134      F = gnand3;
135    }
136
137    F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
138                         gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
139    num_gates = logical_effort(
140        num_gates_min,
141        num_in_signals == 2 ? gnand2 : gnand3,
142        F,
143        w_dec_n,
144        w_dec_p,
145        C_ld_dec_out,
146        p_to_n_sz_ratio,
147        is_dram,
148        is_wl_tr,
149        g_tp.max_w_nmos_dec);
150  }
151}
152
153
154
155void Decoder::compute_area()
156{
157  double cumulative_area = 0;
158  double cumulative_curr = 0;  // cumulative leakage current
159  double cumulative_curr_Ig = 0;  // cumulative leakage current
160
161  if (exist)
162  { // First check if this decoder exists
163    if (num_in_signals == 2)
164    {
165      cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
166      cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
167      cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
168    }
169    else if (num_in_signals == 3)
170    {
171      cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
172      cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
173      cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
174    }
175
176    for (int i = 1; i < num_gates; i++)
177    {
178      cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
179      cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
180      cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
181    }
182    power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
183    power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
184
185    area.w = (cumulative_area / area.h);
186  }
187}
188
189
190
191double Decoder::compute_delays(double inrisetime)
192{
193  if (exist)
194  {
195    double ret_val = 0;  // outrisetime
196    int    i;
197    double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
198    double Vdd = g_tp.peri_global.Vdd;
199
200    if ((is_wl_tr) && (is_dram))
201    {
202      Vpp = g_tp.vpp;
203    }
204    else if (is_wl_tr)
205    {
206      Vpp = g_tp.sram_cell.Vdd;
207    }
208    else
209    {
210      Vpp = g_tp.peri_global.Vdd;
211    }
212
213    // first check whether a decoder is required at all
214    rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
215    c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
216    c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
217                  drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
218    tf = rd * (c_intrinsic + c_load);
219    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
220    delay += this_delay;
221    inrisetime = this_delay / (1.0 - 0.5);
222    power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
223
224    for (i = 1; i < num_gates - 1; ++i)
225    {
226      rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
227      c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
228      c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
229                    drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
230      tf = rd * (c_intrinsic + c_load);
231      this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
232      delay += this_delay;
233      inrisetime = this_delay / (1.0 - 0.5);
234      power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
235    }
236
237    // add delay of final inverter that drives the wordline
238    i = num_gates - 1;
239    c_load = C_ld_dec_out;
240    rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
241    c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
242                  drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
243    tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2;
244    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
245    delay  += this_delay;
246    ret_val = this_delay / (1.0 - 0.5);
247    power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
248
249    return ret_val;
250  }
251  else
252  {
253    return 0.0;
254  }
255}
256
257void Decoder::leakage_feedback(double temperature)
258{
259  double cumulative_curr = 0;  // cumulative leakage current
260  double cumulative_curr_Ig = 0;  // cumulative leakage current
261
262  if (exist)
263  { // First check if this decoder exists
264    if (num_in_signals == 2)
265    {
266      cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
267      cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
268    }
269    else if (num_in_signals == 3)
270    {
271      cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
272      cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
273    }
274
275    for (int i = 1; i < num_gates; i++)
276    {
277      cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
278      cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
279    }
280
281    power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
282    power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
283  }
284}
285
286PredecBlk::PredecBlk(
287    int    num_dec_signals,
288    Decoder * dec_,
289    double C_wire_predec_blk_out,
290    double R_wire_predec_blk_out_,
291    int    num_dec_per_predec,
292    bool   is_dram,
293    bool   is_blk1)
294 :dec(dec_),
295  exist(false),
296  number_input_addr_bits(0),
297  C_ld_predec_blk_out(0),
298  R_wire_predec_blk_out(0),
299  branch_effort_nand2_gate_output(1),
300  branch_effort_nand3_gate_output(1),
301  flag_two_unique_paths(false),
302  flag_L2_gate(0),
303  number_inputs_L1_gate(0),
304  number_gates_L1_nand2_path(0),
305  number_gates_L1_nand3_path(0),
306  number_gates_L2(0),
307  min_number_gates_L1(2),
308  min_number_gates_L2(2),
309  num_L1_active_nand2_path(0),
310  num_L1_active_nand3_path(0),
311  delay_nand2_path(0),
312  delay_nand3_path(0),
313  power_nand2_path(),
314  power_nand3_path(),
315  power_L2(),
316  is_dram_(is_dram)
317{
318  int    branch_effort_predec_out;
319  double C_ld_dec_gate;
320  int    num_addr_bits_dec = _log2(num_dec_signals);
321  int    blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2;
322  int    blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits;
323
324  w_L1_nand2_n[0] = 0;
325  w_L1_nand2_p[0] = 0;
326  w_L1_nand3_n[0] = 0;
327  w_L1_nand3_p[0] = 0;
328
329  if (is_blk1 == true)
330  {
331    if (num_addr_bits_dec <= 0)
332    {
333      return;
334    }
335    else if (num_addr_bits_dec < 4)
336    {
337      // Just one predecoder block is required with NAND2 gates. No decoder required.
338      // The first level of predecoding directly drives the decoder output load
339      exist = true;
340      number_input_addr_bits = num_addr_bits_dec;
341      R_wire_predec_blk_out = dec->R_wire_dec_out;
342      C_ld_predec_blk_out = dec->C_ld_dec_out;
343    }
344    else
345    {
346      exist = true;
347      number_input_addr_bits   = blk1_num_input_addr_bits;
348      branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
349      C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
350      R_wire_predec_blk_out = R_wire_predec_blk_out_;
351      C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
352    }
353  }
354  else
355  {
356    if (num_addr_bits_dec >= 4)
357    {
358      exist = true;
359      number_input_addr_bits   = blk2_num_input_addr_bits;
360      branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
361      C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
362      R_wire_predec_blk_out = R_wire_predec_blk_out_;
363      C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
364    }
365  }
366
367  compute_widths();
368  compute_area();
369}
370
371
372
373void PredecBlk::compute_widths()
374{
375  double F, c_load_nand3_path, c_load_nand2_path;
376  double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
377  double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
378  double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
379
380  if (exist == false) return;
381
382
383  switch (number_input_addr_bits)
384  {
385    case 1:
386      flag_two_unique_paths           = false;
387      number_inputs_L1_gate           = 2;
388      flag_L2_gate                    = 0;
389      break;
390    case 2:
391      flag_two_unique_paths           = false;
392      number_inputs_L1_gate           = 2;
393      flag_L2_gate                    = 0;
394      break;
395    case 3:
396      flag_two_unique_paths           = false;
397      number_inputs_L1_gate           = 3;
398      flag_L2_gate                    = 0;
399      break;
400    case 4:
401      flag_two_unique_paths           = false;
402      number_inputs_L1_gate           = 2;
403      flag_L2_gate                    = 2;
404      branch_effort_nand2_gate_output = 4;
405      break;
406    case 5:
407      flag_two_unique_paths           = true;
408      flag_L2_gate                    = 2;
409      branch_effort_nand2_gate_output = 8;
410      branch_effort_nand3_gate_output = 4;
411      break;
412    case 6:
413      flag_two_unique_paths           = false;
414      number_inputs_L1_gate           = 3;
415      flag_L2_gate                    = 2;
416      branch_effort_nand3_gate_output = 8;
417      break;
418    case 7:
419      flag_two_unique_paths           = true;
420      flag_L2_gate                    = 3;
421      branch_effort_nand2_gate_output = 32;
422      branch_effort_nand3_gate_output = 16;
423      break;
424    case 8:
425      flag_two_unique_paths           = true;
426      flag_L2_gate                    = 3;
427      branch_effort_nand2_gate_output = 64;
428      branch_effort_nand3_gate_output = 32;
429      break;
430    case 9:
431      flag_two_unique_paths           = false;
432      number_inputs_L1_gate           = 3;
433      flag_L2_gate                    = 3;
434      branch_effort_nand3_gate_output = 64;
435      break;
436    default:
437      assert(0);
438      break;
439  }
440
441  // find the number of gates and sizing in second level of predecoder (if there is a second level)
442  if (flag_L2_gate)
443  {
444    if (flag_L2_gate == 2)
445    { // 2nd level is a NAND2 gate
446      w_L2_n[0] = 2 * g_tp.min_w_nmos_;
447      F = gnand2;
448    }
449    else
450    { // 2nd level is a NAND3 gate
451      w_L2_n[0] = 3 * g_tp.min_w_nmos_;
452      F = gnand3;
453    }
454    w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
455    F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
456    number_gates_L2 = logical_effort(
457        min_number_gates_L2,
458        flag_L2_gate == 2 ? gnand2 : gnand3,
459        F,
460        w_L2_n,
461        w_L2_p,
462        C_ld_predec_blk_out,
463        p_to_n_sz_ratio,
464        is_dram_, false,
465        g_tp.max_w_nmos_);
466
467    // Now find the number of gates and widths in first level of predecoder
468    if ((flag_two_unique_paths)||(number_inputs_L1_gate == 2))
469    { // Whenever flag_two_unique_paths is true, it means first level of decoder employs
470      // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it means
471      // a NAND2 gate is used in the first level of the predecoder
472      c_load_nand2_path = branch_effort_nand2_gate_output *
473        (gate_C(w_L2_n[0], 0, is_dram_) +
474         gate_C(w_L2_p[0], 0, is_dram_));
475      w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
476      w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
477      F = gnand2 * c_load_nand2_path /
478        (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
479         gate_C(w_L1_nand2_p[0], 0, is_dram_));
480      number_gates_L1_nand2_path = logical_effort(
481          min_number_gates_L1,
482          gnand2,
483          F,
484          w_L1_nand2_n,
485          w_L1_nand2_p,
486          c_load_nand2_path,
487          p_to_n_sz_ratio,
488          is_dram_, false,
489          g_tp.max_w_nmos_);
490    }
491
492    //Now find widths of gates along path in which first gate is a NAND3
493    if ((flag_two_unique_paths)||(number_inputs_L1_gate == 3))
494    { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
495      // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
496      // a NAND3 gate is used in the first level of the predecoder
497      c_load_nand3_path = branch_effort_nand3_gate_output *
498        (gate_C(w_L2_n[0], 0, is_dram_) +
499         gate_C(w_L2_p[0], 0, is_dram_));
500      w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
501      w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
502      F = gnand3 * c_load_nand3_path /
503        (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
504         gate_C(w_L1_nand3_p[0], 0, is_dram_));
505      number_gates_L1_nand3_path = logical_effort(
506          min_number_gates_L1,
507          gnand3,
508          F,
509          w_L1_nand3_n,
510          w_L1_nand3_p,
511          c_load_nand3_path,
512          p_to_n_sz_ratio,
513          is_dram_, false,
514          g_tp.max_w_nmos_);
515    }
516  }
517  else
518  { // find number of gates and widths in first level of predecoder block when there is no second level
519    if (number_inputs_L1_gate == 2)
520    {
521      w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
522      w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
523      F = gnand2*C_ld_predec_blk_out /
524        (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
525         gate_C(w_L1_nand2_p[0], 0, is_dram_));
526      number_gates_L1_nand2_path = logical_effort(
527          min_number_gates_L1,
528          gnand2,
529          F,
530          w_L1_nand2_n,
531          w_L1_nand2_p,
532          C_ld_predec_blk_out,
533          p_to_n_sz_ratio,
534          is_dram_, false,
535          g_tp.max_w_nmos_);
536    }
537    else if (number_inputs_L1_gate == 3)
538    {
539      w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
540      w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
541      F = gnand3*C_ld_predec_blk_out /
542        (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
543         gate_C(w_L1_nand3_p[0], 0, is_dram_));
544      number_gates_L1_nand3_path = logical_effort(
545          min_number_gates_L1,
546          gnand3,
547          F,
548          w_L1_nand3_n,
549          w_L1_nand3_p,
550          C_ld_predec_blk_out,
551          p_to_n_sz_ratio,
552          is_dram_, false,
553          g_tp.max_w_nmos_);
554    }
555  }
556}
557
558
559
560void PredecBlk::compute_area()
561{
562  if (exist)
563  { // First check whether a predecoder block is needed
564    int num_L1_nand2 = 0;
565    int num_L1_nand3 = 0;
566    int num_L2 = 0;
567    double tot_area_L1_nand3  =0;
568    double leak_L1_nand3      =0;
569    double gate_leak_L1_nand3 =0;
570
571    double tot_area_L1_nand2  = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def);
572    double leak_L1_nand2      = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
573    double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
574    if (number_inputs_L1_gate != 3) {
575      tot_area_L1_nand3 = 0;
576      leak_L1_nand3 = 0;
577      gate_leak_L1_nand3 =0;
578    }
579    else {
580      tot_area_L1_nand3  = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
581      leak_L1_nand3      = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
582      gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
583    }
584
585    switch (number_input_addr_bits)
586    {
587      case 1: //2 NAND2 gates
588        num_L1_nand2 = 2;
589        num_L2       = 0;
590        num_L1_active_nand2_path =1;
591        num_L1_active_nand3_path =0;
592        break;
593      case 2: //4 NAND2 gates
594        num_L1_nand2 = 4;
595        num_L2       = 0;
596        num_L1_active_nand2_path =1;
597        num_L1_active_nand3_path =0;
598        break;
599      case 3: //8 NAND3 gates
600        num_L1_nand3 = 8;
601        num_L2       = 0;
602        num_L1_active_nand2_path =0;
603        num_L1_active_nand3_path =1;
604        break;
605      case 4: //4 + 4 NAND2 gates
606        num_L1_nand2 = 8;
607        num_L2       = 16;
608        num_L1_active_nand2_path =2;
609        num_L1_active_nand3_path =0;
610        break;
611      case 5: //4 NAND2 gates, 8 NAND3 gates
612        num_L1_nand2 = 4;
613        num_L1_nand3 = 8;
614        num_L2       = 32;
615        num_L1_active_nand2_path =1;
616        num_L1_active_nand3_path =1;
617        break;
618      case 6: //8 + 8 NAND3 gates
619        num_L1_nand3 = 16;
620        num_L2       = 64;
621        num_L1_active_nand2_path =0;
622        num_L1_active_nand3_path =2;
623        break;
624      case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
625        num_L1_nand2 = 8;
626        num_L1_nand3 = 8;
627        num_L2       = 128;
628        num_L1_active_nand2_path =2;
629        num_L1_active_nand3_path =1;
630        break;
631      case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
632        num_L1_nand2 = 4;
633        num_L1_nand3 = 16;
634        num_L2       = 256;
635        num_L1_active_nand2_path =2;
636        num_L1_active_nand3_path =2;
637        break;
638      case 9: //8 + 8 + 8 NAND3 gates
639        num_L1_nand3 = 24;
640        num_L2       = 512;
641        num_L1_active_nand2_path =0;
642        num_L1_active_nand3_path =3;
643        break;
644      default:
645        break;
646    }
647
648    for (int i = 1; i < number_gates_L1_nand2_path; ++i)
649    {
650      tot_area_L1_nand2  += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
651      leak_L1_nand2      += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
652      gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
653    }
654    tot_area_L1_nand2  *= num_L1_nand2;
655    leak_L1_nand2      *= num_L1_nand2;
656    gate_leak_L1_nand2 *= num_L1_nand2;
657
658    for (int i = 1; i < number_gates_L1_nand3_path; ++i)
659    {
660      tot_area_L1_nand3  += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
661      leak_L1_nand3      += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
662      gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
663    }
664    tot_area_L1_nand3  *= num_L1_nand3;
665    leak_L1_nand3      *= num_L1_nand3;
666    gate_leak_L1_nand3 *= num_L1_nand3;
667
668    double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3;
669    double cumulative_area_L2 = 0.0;
670    double leakage_L2         = 0.0;
671    double gate_leakage_L2    = 0.0;
672
673    if (flag_L2_gate == 2)
674    {
675      cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
676      leakage_L2         = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
677      gate_leakage_L2    = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
678    }
679    else if (flag_L2_gate == 3)
680    {
681      cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
682      leakage_L2         = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
683      gate_leakage_L2    = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
684    }
685
686    for (int i = 1; i < number_gates_L2; ++i)
687    {
688      cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
689      leakage_L2         += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
690      gate_leakage_L2    += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
691    }
692    cumulative_area_L2 *= num_L2;
693    leakage_L2         *= num_L2;
694    gate_leakage_L2    *= num_L2;
695
696    power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
697    power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
698    power_L2.readOp.leakage         = leakage_L2    * g_tp.peri_global.Vdd;
699    area.set_area(cumulative_area_L1 + cumulative_area_L2);
700    power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
701    power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
702    power_L2.readOp.gate_leakage         = gate_leakage_L2    * g_tp.peri_global.Vdd;
703  }
704}
705
706
707
708pair<double, double> PredecBlk::compute_delays(
709    pair<double, double> inrisetime)  // <nand2, nand3>
710{
711  pair<double, double> ret_val;
712  ret_val.first  = 0;  // outrisetime_nand2_path
713  ret_val.second = 0;  // outrisetime_nand3_path
714
715  double inrisetime_nand2_path = inrisetime.first;
716  double inrisetime_nand3_path = inrisetime.second;
717  int    i;
718  double rd, c_load, c_intrinsic, tf, this_delay;
719  double Vdd = g_tp.peri_global.Vdd;
720
721  // TODO: following delay calculation part can be greatly simplified.
722  // first check whether a predecoder block is required
723  if (exist)
724  {
725    //Find delay in first level of predecoder block
726    //First find delay in path
727    if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2))
728    {
729      //First gate is a NAND2 gate
730      rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
731      c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
732      c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
733                        drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
734      tf = rd * (c_intrinsic + c_load);
735      this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
736      delay_nand2_path += this_delay;
737      inrisetime_nand2_path = this_delay / (1.0 - 0.5);
738      power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
739
740      //Add delays of all but the last inverter in the chain
741      for (i = 1; i < number_gates_L1_nand2_path - 1; ++i)
742      {
743        rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
744        c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
745        c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
746                      drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
747        tf = rd * (c_intrinsic + c_load);
748        this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
749        delay_nand2_path += this_delay;
750        inrisetime_nand2_path = this_delay / (1.0 - 0.5);
751        power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
752      }
753
754      //Add delay of the last inverter
755      i = number_gates_L1_nand2_path - 1;
756      rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
757      if (flag_L2_gate)
758      {
759        c_load = branch_effort_nand2_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
760        c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
761                      drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
762        tf = rd * (c_intrinsic + c_load);
763        this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
764        delay_nand2_path += this_delay;
765        inrisetime_nand2_path = this_delay / (1.0 - 0.5);
766        power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
767      }
768      else
769      { //First level directly drives decoder output load
770        c_load = C_ld_predec_blk_out;
771        c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
772                      drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
773        tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
774        this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
775        delay_nand2_path += this_delay;
776        ret_val.first = this_delay / (1.0 - 0.5);
777        power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
778      }
779    }
780
781    if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3))
782    { //Check if the number of gates in the first level is more than 1.
783      //First gate is a NAND3 gate
784      rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
785      c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
786      c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
787                        drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
788      tf = rd * (c_intrinsic + c_load);
789      this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
790      delay_nand3_path += this_delay;
791      inrisetime_nand3_path = this_delay / (1.0 - 0.5);
792      power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
793
794      //Add delays of all but the last inverter in the chain
795      for (i = 1; i < number_gates_L1_nand3_path - 1; ++i)
796      {
797        rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
798        c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
799        c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
800                      drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
801        tf = rd * (c_intrinsic + c_load);
802        this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
803        delay_nand3_path += this_delay;
804        inrisetime_nand3_path = this_delay / (1.0 - 0.5);
805        power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
806      }
807
808      //Add delay of the last inverter
809      i = number_gates_L1_nand3_path - 1;
810      rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
811      if (flag_L2_gate)
812      {
813        c_load = branch_effort_nand3_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
814        c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
815                      drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
816        tf = rd * (c_intrinsic + c_load);
817        this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
818        delay_nand3_path += this_delay;
819        inrisetime_nand3_path = this_delay / (1.0 - 0.5);
820        power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
821      }
822      else
823      { //First level directly drives decoder output load
824        c_load = C_ld_predec_blk_out;
825        c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
826                      drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
827        tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
828        this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
829        delay_nand3_path += this_delay;
830        ret_val.second = this_delay / (1.0 - 0.5);
831        power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
832      }
833    }
834
835    // Find delay through second level
836    if (flag_L2_gate)
837    {
838      if (flag_L2_gate == 2)
839      {
840        rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
841        c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
842        c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
843                          drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
844        tf = rd * (c_intrinsic + c_load);
845        this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
846        delay_nand2_path += this_delay;
847        inrisetime_nand2_path = this_delay / (1.0 - 0.5);
848        power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
849      }
850      else
851      { // flag_L2_gate = 3
852        rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
853        c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
854        c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
855                          drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
856        tf = rd * (c_intrinsic + c_load);
857        this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
858        delay_nand3_path += this_delay;
859        inrisetime_nand3_path = this_delay / (1.0 - 0.5);
860        power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
861      }
862
863      for (i = 1; i < number_gates_L2 - 1; ++i)
864      {
865        rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
866        c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
867        c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
868                      drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
869        tf = rd * (c_intrinsic + c_load);
870        this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
871        delay_nand2_path += this_delay;
872        inrisetime_nand2_path = this_delay / (1.0 - 0.5);
873        this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
874        delay_nand3_path += this_delay;
875        inrisetime_nand3_path = this_delay / (1.0 - 0.5);
876        power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
877      }
878
879      //Add delay of final inverter that drives the wordline decoders
880      i = number_gates_L2 - 1;
881      c_load = C_ld_predec_blk_out;
882      rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
883      c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
884                    drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
885      tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
886      this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
887      delay_nand2_path += this_delay;
888      ret_val.first = this_delay / (1.0 - 0.5);
889      this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
890      delay_nand3_path += this_delay;
891      ret_val.second = this_delay / (1.0 - 0.5);
892      power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
893    }
894  }
895
896  delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second;
897  return ret_val;
898}
899
900void PredecBlk::leakage_feedback(double temperature)
901{
902  if (exist)
903  { // First check whether a predecoder block is needed
904    int num_L1_nand2 = 0;
905    int num_L1_nand3 = 0;
906    int num_L2 = 0;
907    double leak_L1_nand3      =0;
908    double gate_leak_L1_nand3 =0;
909
910    double leak_L1_nand2      = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
911    double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
912    if (number_inputs_L1_gate != 3) {
913      leak_L1_nand3 = 0;
914      gate_leak_L1_nand3 =0;
915    }
916    else {
917      leak_L1_nand3      = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
918      gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
919    }
920
921    switch (number_input_addr_bits)
922    {
923      case 1: //2 NAND2 gates
924        num_L1_nand2 = 2;
925        num_L2       = 0;
926        num_L1_active_nand2_path =1;
927        num_L1_active_nand3_path =0;
928        break;
929      case 2: //4 NAND2 gates
930        num_L1_nand2 = 4;
931        num_L2       = 0;
932        num_L1_active_nand2_path =1;
933        num_L1_active_nand3_path =0;
934        break;
935      case 3: //8 NAND3 gates
936        num_L1_nand3 = 8;
937        num_L2       = 0;
938        num_L1_active_nand2_path =0;
939        num_L1_active_nand3_path =1;
940        break;
941      case 4: //4 + 4 NAND2 gates
942        num_L1_nand2 = 8;
943        num_L2       = 16;
944        num_L1_active_nand2_path =2;
945        num_L1_active_nand3_path =0;
946        break;
947      case 5: //4 NAND2 gates, 8 NAND3 gates
948        num_L1_nand2 = 4;
949        num_L1_nand3 = 8;
950        num_L2       = 32;
951        num_L1_active_nand2_path =1;
952        num_L1_active_nand3_path =1;
953        break;
954      case 6: //8 + 8 NAND3 gates
955        num_L1_nand3 = 16;
956        num_L2       = 64;
957        num_L1_active_nand2_path =0;
958        num_L1_active_nand3_path =2;
959        break;
960      case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
961        num_L1_nand2 = 8;
962        num_L1_nand3 = 8;
963        num_L2       = 128;
964        num_L1_active_nand2_path =2;
965        num_L1_active_nand3_path =1;
966        break;
967      case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
968        num_L1_nand2 = 4;
969        num_L1_nand3 = 16;
970        num_L2       = 256;
971        num_L1_active_nand2_path =2;
972        num_L1_active_nand3_path =2;
973        break;
974      case 9: //8 + 8 + 8 NAND3 gates
975        num_L1_nand3 = 24;
976        num_L2       = 512;
977        num_L1_active_nand2_path =0;
978        num_L1_active_nand3_path =3;
979        break;
980      default:
981        break;
982    }
983
984    for (int i = 1; i < number_gates_L1_nand2_path; ++i)
985    {
986      leak_L1_nand2      += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
987      gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
988    }
989    leak_L1_nand2      *= num_L1_nand2;
990    gate_leak_L1_nand2 *= num_L1_nand2;
991
992    for (int i = 1; i < number_gates_L1_nand3_path; ++i)
993    {
994      leak_L1_nand3      += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
995      gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
996    }
997    leak_L1_nand3      *= num_L1_nand3;
998    gate_leak_L1_nand3 *= num_L1_nand3;
999
1000    double leakage_L2         = 0.0;
1001    double gate_leakage_L2    = 0.0;
1002
1003    if (flag_L2_gate == 2)
1004    {
1005      leakage_L2         = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
1006      gate_leakage_L2    = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
1007    }
1008    else if (flag_L2_gate == 3)
1009    {
1010      leakage_L2         = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
1011      gate_leakage_L2    = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
1012    }
1013
1014    for (int i = 1; i < number_gates_L2; ++i)
1015    {
1016      leakage_L2         += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
1017      gate_leakage_L2    += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
1018    }
1019    leakage_L2         *= num_L2;
1020    gate_leakage_L2    *= num_L2;
1021
1022    power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
1023    power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
1024    power_L2.readOp.leakage         = leakage_L2    * g_tp.peri_global.Vdd;
1025
1026    power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
1027    power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
1028    power_L2.readOp.gate_leakage         = gate_leakage_L2    * g_tp.peri_global.Vdd;
1029  }
1030}
1031
1032PredecBlkDrv::PredecBlkDrv(
1033    int    way_select_,
1034    PredecBlk * blk_,
1035    bool   is_dram)
1036 :flag_driver_exists(0),
1037  number_gates_nand2_path(0),
1038  number_gates_nand3_path(0),
1039  min_number_gates(2),
1040  num_buffers_driving_1_nand2_load(0),
1041  num_buffers_driving_2_nand2_load(0),
1042  num_buffers_driving_4_nand2_load(0),
1043  num_buffers_driving_2_nand3_load(0),
1044  num_buffers_driving_8_nand3_load(0),
1045  num_buffers_nand3_path(0),
1046  c_load_nand2_path_out(0),
1047  c_load_nand3_path_out(0),
1048  r_load_nand2_path_out(0),
1049  r_load_nand3_path_out(0),
1050  delay_nand2_path(0),
1051  delay_nand3_path(0),
1052  power_nand2_path(),
1053  power_nand3_path(),
1054  blk(blk_), dec(blk->dec),
1055  is_dram_(is_dram),
1056  way_select(way_select_)
1057{
1058  for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
1059  {
1060    width_nand2_path_n[i] = 0;
1061    width_nand2_path_p[i] = 0;
1062    width_nand3_path_n[i] = 0;
1063    width_nand3_path_p[i] = 0;
1064  }
1065
1066  number_input_addr_bits = blk->number_input_addr_bits;
1067
1068  if (way_select > 1)
1069  {
1070    flag_driver_exists     = 1;
1071    number_input_addr_bits = way_select;
1072    if (dec->num_in_signals == 2)
1073    {
1074      c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
1075      num_buffers_driving_2_nand2_load = number_input_addr_bits;
1076    }
1077    else if (dec->num_in_signals == 3)
1078    {
1079      c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
1080      num_buffers_driving_2_nand3_load = number_input_addr_bits;
1081    }
1082  }
1083  else if (way_select == 0)
1084  {
1085    if (blk->exist)
1086    {
1087      flag_driver_exists = 1;
1088    }
1089  }
1090
1091  compute_widths();
1092  compute_area();
1093}
1094
1095
1096
1097void PredecBlkDrv::compute_widths()
1098{
1099  // The predecode block driver accepts as input the address bits from the h-tree network. For
1100  // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
1101  // inversion to generate addrbar and simply treat addrbar as addr.
1102
1103  double F;
1104  double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
1105
1106  if (flag_driver_exists)
1107  {
1108    double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
1109    double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
1110
1111    if (way_select == 0)
1112    {
1113      if (blk->number_input_addr_bits == 1)
1114      { //2 NAND2 gates
1115        num_buffers_driving_2_nand2_load = 1;
1116        c_load_nand2_path_out            = 2 * C_nand2_gate_blk;
1117      }
1118      else if (blk->number_input_addr_bits == 2)
1119      { //4 NAND2 gates  one 2-4 decoder
1120        num_buffers_driving_4_nand2_load = 2;
1121        c_load_nand2_path_out            = 4 * C_nand2_gate_blk;
1122      }
1123      else if (blk->number_input_addr_bits == 3)
1124      { //8 NAND3 gates  one 3-8 decoder
1125        num_buffers_driving_8_nand3_load = 3;
1126        c_load_nand3_path_out            = 8 * C_nand3_gate_blk;
1127      }
1128      else if (blk->number_input_addr_bits == 4)
1129      { //4 + 4 NAND2 gates two 2-4 decoder
1130        num_buffers_driving_4_nand2_load = 4;
1131        c_load_nand2_path_out            = 4 * C_nand2_gate_blk;
1132      }
1133      else if (blk->number_input_addr_bits == 5)
1134      { //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 decoder
1135        num_buffers_driving_4_nand2_load = 2;
1136        num_buffers_driving_8_nand3_load = 3;
1137        c_load_nand2_path_out            = 4 * C_nand2_gate_blk;
1138        c_load_nand3_path_out            = 8 * C_nand3_gate_blk;
1139      }
1140      else if (blk->number_input_addr_bits == 6)
1141      { //8 + 8 NAND3 gates two 3-8 decoder
1142        num_buffers_driving_8_nand3_load = 6;
1143        c_load_nand3_path_out            = 8 * C_nand3_gate_blk;
1144      }
1145      else if (blk->number_input_addr_bits == 7)
1146      { //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 decoder
1147        num_buffers_driving_4_nand2_load = 4;
1148        num_buffers_driving_8_nand3_load = 3;
1149        c_load_nand2_path_out            = 4 * C_nand2_gate_blk;
1150        c_load_nand3_path_out            = 8 * C_nand3_gate_blk;
1151      }
1152      else if (blk->number_input_addr_bits == 8)
1153      { //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 decoder
1154        num_buffers_driving_4_nand2_load = 2;
1155        num_buffers_driving_8_nand3_load = 6;
1156        c_load_nand2_path_out            = 4 * C_nand2_gate_blk;
1157        c_load_nand3_path_out            = 8 * C_nand3_gate_blk;
1158      }
1159      else if (blk->number_input_addr_bits == 9)
1160      { //8 + 8 + 8 NAND3 gates three 3-8 decoder
1161        num_buffers_driving_8_nand3_load = 9;
1162        c_load_nand3_path_out            = 8 * C_nand3_gate_blk;
1163      }
1164    }
1165
1166    if ((blk->flag_two_unique_paths) ||
1167        (blk->number_inputs_L1_gate == 2) ||
1168        (number_input_addr_bits == 0) ||
1169        ((way_select)&&(dec->num_in_signals == 2)))
1170    { //this means that way_select is driving NAND2 in decoder.
1171      width_nand2_path_n[0] = g_tp.min_w_nmos_;
1172      width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
1173      F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
1174      number_gates_nand2_path = logical_effort(
1175          min_number_gates,
1176          1,
1177          F,
1178          width_nand2_path_n,
1179          width_nand2_path_p,
1180          c_load_nand2_path_out,
1181          p_to_n_sz_ratio,
1182          is_dram_, false, g_tp.max_w_nmos_);
1183    }
1184
1185    if ((blk->flag_two_unique_paths) ||
1186        (blk->number_inputs_L1_gate == 3) ||
1187        ((way_select)&&(dec->num_in_signals == 3)))
1188    { //this means that way_select is driving NAND3 in decoder.
1189      width_nand3_path_n[0] = g_tp.min_w_nmos_;
1190      width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
1191      F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
1192      number_gates_nand3_path = logical_effort(
1193          min_number_gates,
1194          1,
1195          F,
1196          width_nand3_path_n,
1197          width_nand3_path_p,
1198          c_load_nand3_path_out,
1199          p_to_n_sz_ratio,
1200          is_dram_, false, g_tp.max_w_nmos_);
1201    }
1202  }
1203}
1204
1205
1206
1207void PredecBlkDrv::compute_area()
1208{
1209  double area_nand2_path = 0;
1210  double area_nand3_path = 0;
1211  double leak_nand2_path = 0;
1212  double leak_nand3_path = 0;
1213  double gate_leak_nand2_path = 0;
1214  double gate_leak_nand3_path = 0;
1215
1216  if (flag_driver_exists)
1217  { // first check whether a predecoder block driver is needed
1218    for (int i = 0; i < number_gates_nand2_path; ++i)
1219    {
1220      area_nand2_path += compute_gate_area(INV, 1, width_nand2_path_p[i], width_nand2_path_n[i], g_tp.cell_h_def);
1221      leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
1222      gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
1223    }
1224    area_nand2_path *= (num_buffers_driving_1_nand2_load +
1225                        num_buffers_driving_2_nand2_load +
1226                        num_buffers_driving_4_nand2_load);
1227    leak_nand2_path *= (num_buffers_driving_1_nand2_load +
1228                        num_buffers_driving_2_nand2_load +
1229                        num_buffers_driving_4_nand2_load);
1230    gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
1231                            num_buffers_driving_2_nand2_load +
1232                            num_buffers_driving_4_nand2_load);
1233
1234    for (int i = 0; i < number_gates_nand3_path; ++i)
1235    {
1236      area_nand3_path += compute_gate_area(INV, 1, width_nand3_path_p[i], width_nand3_path_n[i], g_tp.cell_h_def);
1237      leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
1238      gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
1239    }
1240    area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1241    leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1242    gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1243
1244    power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
1245    power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
1246    power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
1247    power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
1248    area.set_area(area_nand2_path + area_nand3_path);
1249  }
1250}
1251
1252
1253
1254pair<double, double> PredecBlkDrv::compute_delays(
1255    double inrisetime_nand2_path,
1256    double inrisetime_nand3_path)
1257{
1258  pair<double, double> ret_val;
1259  ret_val.first  = 0;  // outrisetime_nand2_path
1260  ret_val.second = 0;  // outrisetime_nand3_path
1261  int i;
1262  double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
1263  double Vdd = g_tp.peri_global.Vdd;
1264
1265  if (flag_driver_exists)
1266  {
1267    for (i = 0; i < number_gates_nand2_path - 1; ++i)
1268    {
1269      rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
1270      c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
1271      c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1272                    drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1273      tf = rd * (c_intrinsic + c_gate_load);
1274      this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
1275      delay_nand2_path += this_delay;
1276      inrisetime_nand2_path = this_delay / (1.0 - 0.5);
1277      power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
1278    }
1279
1280    // Final inverter drives the predecoder block or the decoder output load
1281    if (number_gates_nand2_path != 0)
1282    {
1283      i = number_gates_nand2_path - 1;
1284      rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
1285      c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1286                    drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1287      c_load = c_load_nand2_path_out;
1288      tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out*c_load/ 2;
1289      this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
1290      delay_nand2_path += this_delay;
1291      ret_val.first = this_delay / (1.0 - 0.5);
1292      power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
1293//      cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <<endl;
1294    }
1295
1296    for (i = 0; i < number_gates_nand3_path - 1; ++i)
1297    {
1298      rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
1299      c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
1300      c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1301                    drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1302      tf = rd * (c_intrinsic + c_gate_load);
1303      this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
1304      delay_nand3_path += this_delay;
1305      inrisetime_nand3_path = this_delay / (1.0 - 0.5);
1306      power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
1307    }
1308
1309    // Final inverter drives the predecoder block or the decoder output load
1310    if (number_gates_nand3_path != 0)
1311    {
1312      i = number_gates_nand3_path - 1;
1313      rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
1314      c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1315                    drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1316      c_load = c_load_nand3_path_out;
1317      tf = rd*(c_intrinsic + c_load) + r_load_nand3_path_out*c_load / 2;
1318      this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
1319      delay_nand3_path += this_delay;
1320      ret_val.second = this_delay / (1.0 - 0.5);
1321      power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
1322    }
1323  }
1324  return ret_val;
1325}
1326
1327
1328double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir)
1329{
1330  return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
1331          num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
1332}
1333
1334
1335
1336Predec::Predec(
1337    PredecBlkDrv * drv1_,
1338    PredecBlkDrv * drv2_)
1339:blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_)
1340{
1341  driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
1342                                drv1->power_nand3_path.readOp.leakage +
1343                                drv2->power_nand2_path.readOp.leakage +
1344                                drv2->power_nand3_path.readOp.leakage;
1345  block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
1346                               blk1->power_nand3_path.readOp.leakage +
1347                               blk1->power_L2.readOp.leakage +
1348                               blk2->power_nand2_path.readOp.leakage +
1349                               blk2->power_nand3_path.readOp.leakage +
1350                               blk2->power_L2.readOp.leakage;
1351  power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
1352
1353  driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
1354                                  drv1->power_nand3_path.readOp.gate_leakage +
1355                                  drv2->power_nand2_path.readOp.gate_leakage +
1356                                  drv2->power_nand3_path.readOp.gate_leakage;
1357  block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
1358                                 blk1->power_nand3_path.readOp.gate_leakage +
1359                                 blk1->power_L2.readOp.gate_leakage +
1360                                 blk2->power_nand2_path.readOp.gate_leakage +
1361                                 blk2->power_nand3_path.readOp.gate_leakage +
1362                                 blk2->power_L2.readOp.gate_leakage;
1363  power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
1364}
1365
1366void PredecBlkDrv::leakage_feedback(double temperature)
1367{
1368  double leak_nand2_path = 0;
1369  double leak_nand3_path = 0;
1370  double gate_leak_nand2_path = 0;
1371  double gate_leak_nand3_path = 0;
1372
1373  if (flag_driver_exists)
1374  { // first check whether a predecoder block driver is needed
1375    for (int i = 0; i < number_gates_nand2_path; ++i)
1376    {
1377      leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
1378      gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
1379    }
1380    leak_nand2_path *= (num_buffers_driving_1_nand2_load +
1381                        num_buffers_driving_2_nand2_load +
1382                        num_buffers_driving_4_nand2_load);
1383    gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
1384                            num_buffers_driving_2_nand2_load +
1385                            num_buffers_driving_4_nand2_load);
1386
1387    for (int i = 0; i < number_gates_nand3_path; ++i)
1388    {
1389      leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
1390      gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
1391    }
1392    leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1393    gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1394
1395    power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
1396    power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
1397    power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
1398    power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
1399  }
1400}
1401
1402double Predec::compute_delays(double inrisetime)
1403{
1404  // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
1405  pair<double, double> tmp_pair1, tmp_pair2;
1406  tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
1407  tmp_pair1 = blk1->compute_delays(tmp_pair1);
1408  tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
1409  tmp_pair2 = blk2->compute_delays(tmp_pair2);
1410  tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
1411
1412  driver_power.readOp.dynamic =
1413    drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
1414    drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
1415    drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
1416    drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
1417
1418  block_power.readOp.dynamic =
1419    blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
1420    blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
1421    blk1->power_L2.readOp.dynamic +
1422    blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path  +
1423    blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
1424    blk2->power_L2.readOp.dynamic;
1425
1426  power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
1427
1428  delay = tmp_pair1.first;
1429  return  tmp_pair1.second;
1430}
1431
1432
1433void Predec::leakage_feedback(double temperature)
1434{
1435  drv1->leakage_feedback(temperature);
1436  drv2->leakage_feedback(temperature);
1437  blk1->leakage_feedback(temperature);
1438  blk2->leakage_feedback(temperature);
1439
1440  driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
1441                                drv1->power_nand3_path.readOp.leakage +
1442                                drv2->power_nand2_path.readOp.leakage +
1443                                drv2->power_nand3_path.readOp.leakage;
1444  block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
1445                               blk1->power_nand3_path.readOp.leakage +
1446                               blk1->power_L2.readOp.leakage +
1447                               blk2->power_nand2_path.readOp.leakage +
1448                               blk2->power_nand3_path.readOp.leakage +
1449                               blk2->power_L2.readOp.leakage;
1450  power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
1451
1452  driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
1453                                  drv1->power_nand3_path.readOp.gate_leakage +
1454                                  drv2->power_nand2_path.readOp.gate_leakage +
1455                                  drv2->power_nand3_path.readOp.gate_leakage;
1456  block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
1457                                 blk1->power_nand3_path.readOp.gate_leakage +
1458                                 blk1->power_L2.readOp.gate_leakage +
1459                                 blk2->power_nand2_path.readOp.gate_leakage +
1460                                 blk2->power_nand3_path.readOp.gate_leakage +
1461                                 blk2->power_L2.readOp.gate_leakage;
1462  power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
1463}
1464
1465// returns <delay, risetime>
1466pair<double, double> Predec::get_max_delay_before_decoder(
1467    pair<double, double> input_pair1,
1468    pair<double, double> input_pair2)
1469{
1470  pair<double, double> ret_val;
1471  double delay;
1472
1473  delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
1474  ret_val.first  = delay;
1475  ret_val.second = input_pair1.first;
1476  delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
1477  if (ret_val.first < delay)
1478  {
1479    ret_val.first  = delay;
1480    ret_val.second = input_pair1.second;
1481  }
1482  delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
1483  if (ret_val.first < delay)
1484  {
1485    ret_val.first  = delay;
1486    ret_val.second = input_pair2.first;
1487  }
1488  delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
1489  if (ret_val.first < delay)
1490  {
1491    ret_val.first  = delay;
1492    ret_val.second = input_pair2.second;
1493  }
1494
1495  return ret_val;
1496}
1497
1498
1499
1500Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram)
1501:number_gates(0),
1502  min_number_gates(2),
1503  c_gate_load(c_gate_load_),
1504  c_wire_load(c_wire_load_),
1505  r_wire_load(r_wire_load_),
1506  delay(0),
1507  power(),
1508  is_dram_(is_dram)
1509{
1510  for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
1511  {
1512    width_n[i] = 0;
1513    width_p[i] = 0;
1514  }
1515
1516  compute_widths();
1517}
1518
1519
1520void Driver::compute_widths()
1521{
1522  double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
1523  double c_load = c_gate_load + c_wire_load;
1524  width_n[0] = g_tp.min_w_nmos_;
1525  width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
1526
1527  double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
1528  number_gates = logical_effort(
1529      min_number_gates,
1530      1,
1531      F,
1532      width_n,
1533      width_p,
1534      c_load,
1535      p_to_n_sz_ratio,
1536      is_dram_, false,
1537      g_tp.max_w_nmos_);
1538}
1539
1540
1541
1542double Driver::compute_delay(double inrisetime)
1543{
1544  int    i;
1545  double rd, c_load, c_intrinsic, tf;
1546  double this_delay = 0;
1547
1548  for (i = 0; i < number_gates - 1; ++i)
1549  {
1550    rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
1551    c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
1552    c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1553                  drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1554    tf = rd * (c_intrinsic + c_load);
1555    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1556    delay += this_delay;
1557    inrisetime = this_delay / (1.0 - 0.5);
1558    power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1559    power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd;
1560    power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
1561  }
1562
1563  i = number_gates - 1;
1564  c_load = c_gate_load + c_wire_load;
1565  rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
1566  c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
1567                drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
1568  tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load);
1569  this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1570  delay += this_delay;
1571  power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1572  power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd;
1573  power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
1574
1575  return this_delay / (1.0 - 0.5);
1576}
1577
1578