4a5
> * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
28c29
< * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
---
> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54,62c55,62
< :exist(false),
< C_ld_dec_out(_C_ld_dec_out),
< R_wire_dec_out(_R_wire_dec_out),
< num_gates(0), num_gates_min(2),
< delay(0),
< //power(),
< fully_assoc(fully_assoc_), is_dram(is_dram_),
< is_wl_tr(is_wl_tr_), cell(cell_)
< {
---
> : exist(false),
> C_ld_dec_out(_C_ld_dec_out),
> R_wire_dec_out(_R_wire_dec_out),
> num_gates(0), num_gates_min(2),
> delay(0),
> //power(),
> fully_assoc(fully_assoc_), is_dram(is_dram_),
> is_wl_tr(is_wl_tr_), cell(cell_) {
64,68c64,67
< for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
< {
< w_dec_n[i] = 0;
< w_dec_p[i] = 0;
< }
---
> for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
> w_dec_n[i] = 0;
> w_dec_p[i] = 0;
> }
70,75c69,74
< /*
< * _num_dec_signals is the number of decoded signal as output
< * num_addr_bits_dec is the number of signal to be decoded
< * as the decoders input.
< */
< int num_addr_bits_dec = _log2(_num_dec_signals);
---
> /*
> * _num_dec_signals is the number of decoded signal as output
> * num_addr_bits_dec is the number of signal to be decoded
> * as the decoders input.
> */
> int num_addr_bits_dec = _log2(_num_dec_signals);
77,91c76,84
< if (num_addr_bits_dec < 4)
< {
< if (flag_way_select)
< {
< exist = true;
< num_in_signals = 2;
< }
< else
< {
< num_in_signals = 0;
< }
< }
< else
< {
< exist = true;
---
> if (num_addr_bits_dec < 4) {
> if (flag_way_select) {
> exist = true;
> num_in_signals = 2;
> } else {
> num_in_signals = 0;
> }
> } else {
> exist = true;
93,95c86,90
< if (flag_way_select)
< {
< num_in_signals = 3;
---
> if (flag_way_select) {
> num_in_signals = 3;
> } else {
> num_in_signals = 2;
> }
97,101d91
< else
< {
< num_in_signals = 2;
< }
< }
103,107c93,97
< assert(cell.h>0);
< assert(cell.w>0);
< // the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
< //area.h = 4 * cell.h;
< area.h = g_tp.h_dec * cell.h;
---
> assert(cell.h > 0);
> assert(cell.w > 0);
> // the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
> //area.h = 4 * cell.h;
> area.h = g_tp.h_dec * cell.h;
109,110c99,100
< compute_widths();
< compute_area();
---
> compute_widths();
> compute_area();
115,120c105,109
< void Decoder::compute_widths()
< {
< double F;
< double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
< double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
< double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
---
> void Decoder::compute_widths() {
> double F;
> double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
> double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
> double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
122,135c111,120
< if (exist)
< {
< if (num_in_signals == 2 || fully_assoc)
< {
< w_dec_n[0] = 2 * g_tp.min_w_nmos_;
< w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
< F = gnand2;
< }
< else
< {
< w_dec_n[0] = 3 * g_tp.min_w_nmos_;
< w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
< F = gnand3;
< }
---
> if (exist) {
> if (num_in_signals == 2 || fully_assoc) {
> w_dec_n[0] = 2 * g_tp.min_w_nmos_;
> w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
> F = gnand2;
> } else {
> w_dec_n[0] = 3 * g_tp.min_w_nmos_;
> w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
> F = gnand3;
> }
137,150c122,135
< F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
< gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
< num_gates = logical_effort(
< num_gates_min,
< num_in_signals == 2 ? gnand2 : gnand3,
< F,
< w_dec_n,
< w_dec_p,
< C_ld_dec_out,
< p_to_n_sz_ratio,
< is_dram,
< is_wl_tr,
< g_tp.max_w_nmos_dec);
< }
---
> F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
> gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
> num_gates = logical_effort(
> num_gates_min,
> num_in_signals == 2 ? gnand2 : gnand3,
> F,
> w_dec_n,
> w_dec_p,
> C_ld_dec_out,
> p_to_n_sz_ratio,
> is_dram,
> is_wl_tr,
> g_tp.max_w_nmos_dec);
> }
155,159c140,143
< void Decoder::compute_area()
< {
< double cumulative_area = 0;
< double cumulative_curr = 0; // cumulative leakage current
< double cumulative_curr_Ig = 0; // cumulative leakage current
---
> void Decoder::compute_area() {
> double cumulative_area = 0;
> double cumulative_curr = 0; // cumulative leakage current
> double cumulative_curr_Ig = 0; // cumulative leakage current
161,174c145,160
< if (exist)
< { // First check if this decoder exists
< if (num_in_signals == 2)
< {
< cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
< cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
< cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
< }
< else if (num_in_signals == 3)
< {
< cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
< cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
< cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
< }
---
> if (exist) { // First check if this decoder exists
> if (num_in_signals == 2) {
> cumulative_area =
> compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
> cumulative_curr =
> cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
> cumulative_curr_Ig =
> cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
> } else if (num_in_signals == 3) {
> cumulative_area =
> compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
> cumulative_curr =
> cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
> cumulative_curr_Ig =
> cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
> }
176,183c162,171
< for (int i = 1; i < num_gates; i++)
< {
< cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
< cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
< cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
< }
< power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
< power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
---
> for (int i = 1; i < num_gates; i++) {
> cumulative_area +=
> compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
> cumulative_curr +=
> cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
> cumulative_curr_Ig =
> cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
> }
> power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
> power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
185,186c173,174
< area.w = (cumulative_area / area.h);
< }
---
> area.w = (cumulative_area / area.h);
> }
191,198c179,184
< double Decoder::compute_delays(double inrisetime)
< {
< if (exist)
< {
< double ret_val = 0; // outrisetime
< int i;
< double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
< double Vdd = g_tp.peri_global.Vdd;
---
> double Decoder::compute_delays(double inrisetime) {
> if (exist) {
> double ret_val = 0; // outrisetime
> int i;
> double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
> double Vdd = g_tp.peri_global.Vdd;
200,211c186,192
< if ((is_wl_tr) && (is_dram))
< {
< Vpp = g_tp.vpp;
< }
< else if (is_wl_tr)
< {
< Vpp = g_tp.sram_cell.Vdd;
< }
< else
< {
< Vpp = g_tp.peri_global.Vdd;
< }
---
> if ((is_wl_tr) && (is_dram)) {
> Vpp = g_tp.vpp;
> } else if (is_wl_tr) {
> Vpp = g_tp.sram_cell.Vdd;
> } else {
> Vpp = g_tp.peri_global.Vdd;
> }
213,222c194,203
< // first check whether a decoder is required at all
< rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
< c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
< c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
< drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
< tf = rd * (c_intrinsic + c_load);
< this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
< delay += this_delay;
< inrisetime = this_delay / (1.0 - 0.5);
< power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
---
> // first check whether a decoder is required at all
> rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
> c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
> c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
> drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
> tf = rd * (c_intrinsic + c_load);
> this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
> delay += this_delay;
> inrisetime = this_delay / (1.0 - 0.5);
> power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
224,235c205,215
< for (i = 1; i < num_gates - 1; ++i)
< {
< rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
< c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
< c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
< drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
< tf = rd * (c_intrinsic + c_load);
< this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
< delay += this_delay;
< inrisetime = this_delay / (1.0 - 0.5);
< power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
< }
---
> for (i = 1; i < num_gates - 1; ++i) {
> rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
> c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
> c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
> drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
> tf = rd * (c_intrinsic + c_load);
> this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
> delay += this_delay;
> inrisetime = this_delay / (1.0 - 0.5);
> power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
> }
237,247c217,227
< // add delay of final inverter that drives the wordline
< i = num_gates - 1;
< c_load = C_ld_dec_out;
< rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
< c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
< drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
< tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2;
< this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
< delay += this_delay;
< ret_val = this_delay / (1.0 - 0.5);
< power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
---
> // add delay of final inverter that drives the wordline
> i = num_gates - 1;
> c_load = C_ld_dec_out;
> rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
> c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
> drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
> tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2;
> this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
> delay += this_delay;
> ret_val = this_delay / (1.0 - 0.5);
> power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
249,254c229,232
< return ret_val;
< }
< else
< {
< return 0.0;
< }
---
> return ret_val;
> } else {
> return 0.0;
> }
294,322c272,299
< :dec(dec_),
< exist(false),
< number_input_addr_bits(0),
< C_ld_predec_blk_out(0),
< R_wire_predec_blk_out(0),
< branch_effort_nand2_gate_output(1),
< branch_effort_nand3_gate_output(1),
< flag_two_unique_paths(false),
< flag_L2_gate(0),
< number_inputs_L1_gate(0),
< number_gates_L1_nand2_path(0),
< number_gates_L1_nand3_path(0),
< number_gates_L2(0),
< min_number_gates_L1(2),
< min_number_gates_L2(2),
< num_L1_active_nand2_path(0),
< num_L1_active_nand3_path(0),
< delay_nand2_path(0),
< delay_nand3_path(0),
< power_nand2_path(),
< power_nand3_path(),
< power_L2(),
< is_dram_(is_dram)
< {
< int branch_effort_predec_out;
< double C_ld_dec_gate;
< int num_addr_bits_dec = _log2(num_dec_signals);
< int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2;
< int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits;
---
> : dec(dec_),
> exist(false),
> number_input_addr_bits(0),
> C_ld_predec_blk_out(0),
> R_wire_predec_blk_out(0),
> branch_effort_nand2_gate_output(1),
> branch_effort_nand3_gate_output(1),
> flag_two_unique_paths(false),
> flag_L2_gate(0),
> number_inputs_L1_gate(0),
> number_gates_L1_nand2_path(0),
> number_gates_L1_nand3_path(0),
> number_gates_L2(0),
> min_number_gates_L1(2),
> min_number_gates_L2(2),
> num_L1_active_nand2_path(0),
> num_L1_active_nand3_path(0),
> delay_nand2_path(0),
> delay_nand3_path(0),
> power_nand2_path(),
> power_nand3_path(),
> power_L2(),
> is_dram_(is_dram) {
> int branch_effort_predec_out;
> double C_ld_dec_gate;
> int num_addr_bits_dec = _log2(num_dec_signals);
> int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2;
> int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits;
324,327c301,304
< w_L1_nand2_n[0] = 0;
< w_L1_nand2_p[0] = 0;
< w_L1_nand3_n[0] = 0;
< w_L1_nand3_p[0] = 0;
---
> w_L1_nand2_n[0] = 0;
> w_L1_nand2_p[0] = 0;
> w_L1_nand3_n[0] = 0;
> w_L1_nand3_p[0] = 0;
329,333c306,332
< if (is_blk1 == true)
< {
< if (num_addr_bits_dec <= 0)
< {
< return;
---
> if (is_blk1 == true) {
> if (num_addr_bits_dec <= 0) {
> return;
> } else if (num_addr_bits_dec < 4) {
> // Just one predecoder block is required with NAND2 gates. No decoder required.
> // The first level of predecoding directly drives the decoder output load
> exist = true;
> number_input_addr_bits = num_addr_bits_dec;
> R_wire_predec_blk_out = dec->R_wire_dec_out;
> C_ld_predec_blk_out = dec->C_ld_dec_out;
> } else {
> exist = true;
> number_input_addr_bits = blk1_num_input_addr_bits;
> branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
> C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
> R_wire_predec_blk_out = R_wire_predec_blk_out_;
> C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
> }
> } else {
> if (num_addr_bits_dec >= 4) {
> exist = true;
> number_input_addr_bits = blk2_num_input_addr_bits;
> branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
> C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
> R_wire_predec_blk_out = R_wire_predec_blk_out_;
> C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
> }
335,365d333
< else if (num_addr_bits_dec < 4)
< {
< // Just one predecoder block is required with NAND2 gates. No decoder required.
< // The first level of predecoding directly drives the decoder output load
< exist = true;
< number_input_addr_bits = num_addr_bits_dec;
< R_wire_predec_blk_out = dec->R_wire_dec_out;
< C_ld_predec_blk_out = dec->C_ld_dec_out;
< }
< else
< {
< exist = true;
< number_input_addr_bits = blk1_num_input_addr_bits;
< branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
< C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
< R_wire_predec_blk_out = R_wire_predec_blk_out_;
< C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
< }
< }
< else
< {
< if (num_addr_bits_dec >= 4)
< {
< exist = true;
< number_input_addr_bits = blk2_num_input_addr_bits;
< branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
< C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
< R_wire_predec_blk_out = R_wire_predec_blk_out_;
< C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
< }
< }
367,368c335,336
< compute_widths();
< compute_area();
---
> compute_widths();
> compute_area();
373,378c341,345
< void PredecBlk::compute_widths()
< {
< double F, c_load_nand3_path, c_load_nand2_path;
< double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
< double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
< double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
---
> void PredecBlk::compute_widths() {
> double F, c_load_nand3_path, c_load_nand2_path;
> double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
> double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
> double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
380c347
< if (exist == false) return;
---
> if (exist == false) return;
383,384c350
< switch (number_input_addr_bits)
< {
---
> switch (number_input_addr_bits) {
386,389c352,355
< flag_two_unique_paths = false;
< number_inputs_L1_gate = 2;
< flag_L2_gate = 0;
< break;
---
> flag_two_unique_paths = false;
> number_inputs_L1_gate = 2;
> flag_L2_gate = 0;
> break;
391,394c357,360
< flag_two_unique_paths = false;
< number_inputs_L1_gate = 2;
< flag_L2_gate = 0;
< break;
---
> flag_two_unique_paths = false;
> number_inputs_L1_gate = 2;
> flag_L2_gate = 0;
> break;
396,399c362,365
< flag_two_unique_paths = false;
< number_inputs_L1_gate = 3;
< flag_L2_gate = 0;
< break;
---
> flag_two_unique_paths = false;
> number_inputs_L1_gate = 3;
> flag_L2_gate = 0;
> break;
401,405c367,371
< flag_two_unique_paths = false;
< number_inputs_L1_gate = 2;
< flag_L2_gate = 2;
< branch_effort_nand2_gate_output = 4;
< break;
---
> flag_two_unique_paths = false;
> number_inputs_L1_gate = 2;
> flag_L2_gate = 2;
> branch_effort_nand2_gate_output = 4;
> break;
407,411c373,377
< flag_two_unique_paths = true;
< flag_L2_gate = 2;
< branch_effort_nand2_gate_output = 8;
< branch_effort_nand3_gate_output = 4;
< break;
---
> flag_two_unique_paths = true;
> flag_L2_gate = 2;
> branch_effort_nand2_gate_output = 8;
> branch_effort_nand3_gate_output = 4;
> break;
413,417c379,383
< flag_two_unique_paths = false;
< number_inputs_L1_gate = 3;
< flag_L2_gate = 2;
< branch_effort_nand3_gate_output = 8;
< break;
---
> flag_two_unique_paths = false;
> number_inputs_L1_gate = 3;
> flag_L2_gate = 2;
> branch_effort_nand3_gate_output = 8;
> break;
419,423c385,389
< flag_two_unique_paths = true;
< flag_L2_gate = 3;
< branch_effort_nand2_gate_output = 32;
< branch_effort_nand3_gate_output = 16;
< break;
---
> flag_two_unique_paths = true;
> flag_L2_gate = 3;
> branch_effort_nand2_gate_output = 32;
> branch_effort_nand3_gate_output = 16;
> break;
425,429c391,395
< flag_two_unique_paths = true;
< flag_L2_gate = 3;
< branch_effort_nand2_gate_output = 64;
< branch_effort_nand3_gate_output = 32;
< break;
---
> flag_two_unique_paths = true;
> flag_L2_gate = 3;
> branch_effort_nand2_gate_output = 64;
> branch_effort_nand3_gate_output = 32;
> break;
431,435c397,401
< flag_two_unique_paths = false;
< number_inputs_L1_gate = 3;
< flag_L2_gate = 3;
< branch_effort_nand3_gate_output = 64;
< break;
---
> flag_two_unique_paths = false;
> number_inputs_L1_gate = 3;
> flag_L2_gate = 3;
> branch_effort_nand3_gate_output = 64;
> break;
437,447c403,404
< assert(0);
< break;
< }
<
< // find the number of gates and sizing in second level of predecoder (if there is a second level)
< if (flag_L2_gate)
< {
< if (flag_L2_gate == 2)
< { // 2nd level is a NAND2 gate
< w_L2_n[0] = 2 * g_tp.min_w_nmos_;
< F = gnand2;
---
> assert(0);
> break;
449,465d405
< else
< { // 2nd level is a NAND3 gate
< w_L2_n[0] = 3 * g_tp.min_w_nmos_;
< F = gnand3;
< }
< w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
< F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
< number_gates_L2 = logical_effort(
< min_number_gates_L2,
< flag_L2_gate == 2 ? gnand2 : gnand3,
< F,
< w_L2_n,
< w_L2_p,
< C_ld_predec_blk_out,
< p_to_n_sz_ratio,
< is_dram_, false,
< g_tp.max_w_nmos_);
467,490c407,427
< // Now find the number of gates and widths in first level of predecoder
< if ((flag_two_unique_paths)||(number_inputs_L1_gate == 2))
< { // Whenever flag_two_unique_paths is true, it means first level of decoder employs
< // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it means
< // a NAND2 gate is used in the first level of the predecoder
< c_load_nand2_path = branch_effort_nand2_gate_output *
< (gate_C(w_L2_n[0], 0, is_dram_) +
< gate_C(w_L2_p[0], 0, is_dram_));
< w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
< w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
< F = gnand2 * c_load_nand2_path /
< (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
< gate_C(w_L1_nand2_p[0], 0, is_dram_));
< number_gates_L1_nand2_path = logical_effort(
< min_number_gates_L1,
< gnand2,
< F,
< w_L1_nand2_n,
< w_L1_nand2_p,
< c_load_nand2_path,
< p_to_n_sz_ratio,
< is_dram_, false,
< g_tp.max_w_nmos_);
< }
---
> // find the number of gates and sizing in second level of predecoder (if there is a second level)
> if (flag_L2_gate) {
> if (flag_L2_gate == 2) { // 2nd level is a NAND2 gate
> w_L2_n[0] = 2 * g_tp.min_w_nmos_;
> F = gnand2;
> } else { // 2nd level is a NAND3 gate
> w_L2_n[0] = 3 * g_tp.min_w_nmos_;
> F = gnand3;
> }
> w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
> F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
> number_gates_L2 = logical_effort(
> min_number_gates_L2,
> flag_L2_gate == 2 ? gnand2 : gnand3,
> F,
> w_L2_n,
> w_L2_p,
> C_ld_predec_blk_out,
> p_to_n_sz_ratio,
> is_dram_, false,
> g_tp.max_w_nmos_);
492,514c429,512
< //Now find widths of gates along path in which first gate is a NAND3
< if ((flag_two_unique_paths)||(number_inputs_L1_gate == 3))
< { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
< // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
< // a NAND3 gate is used in the first level of the predecoder
< c_load_nand3_path = branch_effort_nand3_gate_output *
< (gate_C(w_L2_n[0], 0, is_dram_) +
< gate_C(w_L2_p[0], 0, is_dram_));
< w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
< w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
< F = gnand3 * c_load_nand3_path /
< (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
< gate_C(w_L1_nand3_p[0], 0, is_dram_));
< number_gates_L1_nand3_path = logical_effort(
< min_number_gates_L1,
< gnand3,
< F,
< w_L1_nand3_n,
< w_L1_nand3_p,
< c_load_nand3_path,
< p_to_n_sz_ratio,
< is_dram_, false,
< g_tp.max_w_nmos_);
---
> // Now find the number of gates and widths in first level of predecoder
> if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
> // Whenever flag_two_unique_paths is true, it means first level of
> // decoder employs
> // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2,
> // it means
> // a NAND2 gate is used in the first level of the predecoder
> c_load_nand2_path = branch_effort_nand2_gate_output *
> (gate_C(w_L2_n[0], 0, is_dram_) +
> gate_C(w_L2_p[0], 0, is_dram_));
> w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
> w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
> F = gnand2 * c_load_nand2_path /
> (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
> gate_C(w_L1_nand2_p[0], 0, is_dram_));
> number_gates_L1_nand2_path = logical_effort(
> min_number_gates_L1,
> gnand2,
> F,
> w_L1_nand2_n,
> w_L1_nand2_p,
> c_load_nand2_path,
> p_to_n_sz_ratio,
> is_dram_, false,
> g_tp.max_w_nmos_);
> }
>
> //Now find widths of gates along path in which first gate is a NAND3
> if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
> // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
> // a NAND3 gate is used in the first level of the predecoder
> c_load_nand3_path = branch_effort_nand3_gate_output *
> (gate_C(w_L2_n[0], 0, is_dram_) +
> gate_C(w_L2_p[0], 0, is_dram_));
> w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
> w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
> F = gnand3 * c_load_nand3_path /
> (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
> gate_C(w_L1_nand3_p[0], 0, is_dram_));
> number_gates_L1_nand3_path = logical_effort(
> min_number_gates_L1,
> gnand3,
> F,
> w_L1_nand3_n,
> w_L1_nand3_p,
> c_load_nand3_path,
> p_to_n_sz_ratio,
> is_dram_, false,
> g_tp.max_w_nmos_);
> }
> } else { // find number of gates and widths in first level of predecoder block when there is no second level
> if (number_inputs_L1_gate == 2) {
> w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
> w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
> F = gnand2 * C_ld_predec_blk_out /
> (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
> gate_C(w_L1_nand2_p[0], 0, is_dram_));
> number_gates_L1_nand2_path = logical_effort(
> min_number_gates_L1,
> gnand2,
> F,
> w_L1_nand2_n,
> w_L1_nand2_p,
> C_ld_predec_blk_out,
> p_to_n_sz_ratio,
> is_dram_, false,
> g_tp.max_w_nmos_);
> } else if (number_inputs_L1_gate == 3) {
> w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
> w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
> F = gnand3 * C_ld_predec_blk_out /
> (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
> gate_C(w_L1_nand3_p[0], 0, is_dram_));
> number_gates_L1_nand3_path = logical_effort(
> min_number_gates_L1,
> gnand3,
> F,
> w_L1_nand3_n,
> w_L1_nand3_p,
> C_ld_predec_blk_out,
> p_to_n_sz_ratio,
> is_dram_, false,
> g_tp.max_w_nmos_);
> }
516,555d513
< }
< else
< { // find number of gates and widths in first level of predecoder block when there is no second level
< if (number_inputs_L1_gate == 2)
< {
< w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
< w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
< F = gnand2*C_ld_predec_blk_out /
< (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
< gate_C(w_L1_nand2_p[0], 0, is_dram_));
< number_gates_L1_nand2_path = logical_effort(
< min_number_gates_L1,
< gnand2,
< F,
< w_L1_nand2_n,
< w_L1_nand2_p,
< C_ld_predec_blk_out,
< p_to_n_sz_ratio,
< is_dram_, false,
< g_tp.max_w_nmos_);
< }
< else if (number_inputs_L1_gate == 3)
< {
< w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
< w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
< F = gnand3*C_ld_predec_blk_out /
< (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
< gate_C(w_L1_nand3_p[0], 0, is_dram_));
< number_gates_L1_nand3_path = logical_effort(
< min_number_gates_L1,
< gnand3,
< F,
< w_L1_nand3_n,
< w_L1_nand3_p,
< C_ld_predec_blk_out,
< p_to_n_sz_ratio,
< is_dram_, false,
< g_tp.max_w_nmos_);
< }
< }
560,569c518,525
< void PredecBlk::compute_area()
< {
< if (exist)
< { // First check whether a predecoder block is needed
< int num_L1_nand2 = 0;
< int num_L1_nand3 = 0;
< int num_L2 = 0;
< double tot_area_L1_nand3 =0;
< double leak_L1_nand3 =0;
< double gate_leak_L1_nand3 =0;
---
> void PredecBlk::compute_area() {
> if (exist) { // First check whether a predecoder block is needed
> int num_L1_nand2 = 0;
> int num_L1_nand3 = 0;
> int num_L2 = 0;
> double tot_area_L1_nand3 = 0;
> double leak_L1_nand3 = 0;
> double gate_leak_L1_nand3 = 0;
571,583c527,538
< double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def);
< double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
< double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
< if (number_inputs_L1_gate != 3) {
< tot_area_L1_nand3 = 0;
< leak_L1_nand3 = 0;
< gate_leak_L1_nand3 =0;
< }
< else {
< tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
< leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
< gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
< }
---
> double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def);
> double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
> double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
> if (number_inputs_L1_gate != 3) {
> tot_area_L1_nand3 = 0;
> leak_L1_nand3 = 0;
> gate_leak_L1_nand3 = 0;
> } else {
> tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
> leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
> gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
> }
585,646c540,600
< switch (number_input_addr_bits)
< {
< case 1: //2 NAND2 gates
< num_L1_nand2 = 2;
< num_L2 = 0;
< num_L1_active_nand2_path =1;
< num_L1_active_nand3_path =0;
< break;
< case 2: //4 NAND2 gates
< num_L1_nand2 = 4;
< num_L2 = 0;
< num_L1_active_nand2_path =1;
< num_L1_active_nand3_path =0;
< break;
< case 3: //8 NAND3 gates
< num_L1_nand3 = 8;
< num_L2 = 0;
< num_L1_active_nand2_path =0;
< num_L1_active_nand3_path =1;
< break;
< case 4: //4 + 4 NAND2 gates
< num_L1_nand2 = 8;
< num_L2 = 16;
< num_L1_active_nand2_path =2;
< num_L1_active_nand3_path =0;
< break;
< case 5: //4 NAND2 gates, 8 NAND3 gates
< num_L1_nand2 = 4;
< num_L1_nand3 = 8;
< num_L2 = 32;
< num_L1_active_nand2_path =1;
< num_L1_active_nand3_path =1;
< break;
< case 6: //8 + 8 NAND3 gates
< num_L1_nand3 = 16;
< num_L2 = 64;
< num_L1_active_nand2_path =0;
< num_L1_active_nand3_path =2;
< break;
< case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
< num_L1_nand2 = 8;
< num_L1_nand3 = 8;
< num_L2 = 128;
< num_L1_active_nand2_path =2;
< num_L1_active_nand3_path =1;
< break;
< case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
< num_L1_nand2 = 4;
< num_L1_nand3 = 16;
< num_L2 = 256;
< num_L1_active_nand2_path =2;
< num_L1_active_nand3_path =2;
< break;
< case 9: //8 + 8 + 8 NAND3 gates
< num_L1_nand3 = 24;
< num_L2 = 512;
< num_L1_active_nand2_path =0;
< num_L1_active_nand3_path =3;
< break;
< default:
< break;
< }
---
> switch (number_input_addr_bits) {
> case 1: //2 NAND2 gates
> num_L1_nand2 = 2;
> num_L2 = 0;
> num_L1_active_nand2_path = 1;
> num_L1_active_nand3_path = 0;
> break;
> case 2: //4 NAND2 gates
> num_L1_nand2 = 4;
> num_L2 = 0;
> num_L1_active_nand2_path = 1;
> num_L1_active_nand3_path = 0;
> break;
> case 3: //8 NAND3 gates
> num_L1_nand3 = 8;
> num_L2 = 0;
> num_L1_active_nand2_path = 0;
> num_L1_active_nand3_path = 1;
> break;
> case 4: //4 + 4 NAND2 gates
> num_L1_nand2 = 8;
> num_L2 = 16;
> num_L1_active_nand2_path = 2;
> num_L1_active_nand3_path = 0;
> break;
> case 5: //4 NAND2 gates, 8 NAND3 gates
> num_L1_nand2 = 4;
> num_L1_nand3 = 8;
> num_L2 = 32;
> num_L1_active_nand2_path = 1;
> num_L1_active_nand3_path = 1;
> break;
> case 6: //8 + 8 NAND3 gates
> num_L1_nand3 = 16;
> num_L2 = 64;
> num_L1_active_nand2_path = 0;
> num_L1_active_nand3_path = 2;
> break;
> case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
> num_L1_nand2 = 8;
> num_L1_nand3 = 8;
> num_L2 = 128;
> num_L1_active_nand2_path = 2;
> num_L1_active_nand3_path = 1;
> break;
> case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
> num_L1_nand2 = 4;
> num_L1_nand3 = 16;
> num_L2 = 256;
> num_L1_active_nand2_path = 2;
> num_L1_active_nand3_path = 2;
> break;
> case 9: //8 + 8 + 8 NAND3 gates
> num_L1_nand3 = 24;
> num_L2 = 512;
> num_L1_active_nand2_path = 0;
> num_L1_active_nand3_path = 3;
> break;
> default:
> break;
> }
648,656c602,609
< for (int i = 1; i < number_gates_L1_nand2_path; ++i)
< {
< tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
< leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
< gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
< }
< tot_area_L1_nand2 *= num_L1_nand2;
< leak_L1_nand2 *= num_L1_nand2;
< gate_leak_L1_nand2 *= num_L1_nand2;
---
> for (int i = 1; i < number_gates_L1_nand2_path; ++i) {
> tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
> leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
> gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
> }
> tot_area_L1_nand2 *= num_L1_nand2;
> leak_L1_nand2 *= num_L1_nand2;
> gate_leak_L1_nand2 *= num_L1_nand2;
658,666c611,618
< for (int i = 1; i < number_gates_L1_nand3_path; ++i)
< {
< tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
< leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
< gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
< }
< tot_area_L1_nand3 *= num_L1_nand3;
< leak_L1_nand3 *= num_L1_nand3;
< gate_leak_L1_nand3 *= num_L1_nand3;
---
> for (int i = 1; i < number_gates_L1_nand3_path; ++i) {
> tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
> leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
> gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
> }
> tot_area_L1_nand3 *= num_L1_nand3;
> leak_L1_nand3 *= num_L1_nand3;
> gate_leak_L1_nand3 *= num_L1_nand3;
668,671c620,623
< double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3;
< double cumulative_area_L2 = 0.0;
< double leakage_L2 = 0.0;
< double gate_leakage_L2 = 0.0;
---
> double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3;
> double cumulative_area_L2 = 0.0;
> double leakage_L2 = 0.0;
> double gate_leakage_L2 = 0.0;
673,684c625,633
< if (flag_L2_gate == 2)
< {
< cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
< leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
< gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
< }
< else if (flag_L2_gate == 3)
< {
< cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
< leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
< gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
< }
---
> if (flag_L2_gate == 2) {
> cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
> leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
> gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
> } else if (flag_L2_gate == 3) {
> cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
> leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
> gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
> }
686,694c635,642
< for (int i = 1; i < number_gates_L2; ++i)
< {
< cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
< leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
< gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
< }
< cumulative_area_L2 *= num_L2;
< leakage_L2 *= num_L2;
< gate_leakage_L2 *= num_L2;
---
> for (int i = 1; i < number_gates_L2; ++i) {
> cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
> leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
> gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
> }
> cumulative_area_L2 *= num_L2;
> leakage_L2 *= num_L2;
> gate_leakage_L2 *= num_L2;
696,703c644,651
< power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
< power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
< power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd;
< area.set_area(cumulative_area_L1 + cumulative_area_L2);
< power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
< power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
< power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
< }
---
> power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
> power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
> power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd;
> area.set_area(cumulative_area_L1 + cumulative_area_L2);
> power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
> power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
> power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
> }
709,713c657,660
< pair<double, double> inrisetime) // <nand2, nand3>
< {
< pair<double, double> ret_val;
< ret_val.first = 0; // outrisetime_nand2_path
< ret_val.second = 0; // outrisetime_nand3_path
---
> pair<double, double> inrisetime) { // <nand2, nand3>
> pair<double, double> ret_val;
> ret_val.first = 0; // outrisetime_nand2_path
> ret_val.second = 0; // outrisetime_nand3_path
715,719c662,666
< double inrisetime_nand2_path = inrisetime.first;
< double inrisetime_nand3_path = inrisetime.second;
< int i;
< double rd, c_load, c_intrinsic, tf, this_delay;
< double Vdd = g_tp.peri_global.Vdd;
---
> double inrisetime_nand2_path = inrisetime.first;
> double inrisetime_nand3_path = inrisetime.second;
> int i;
> double rd, c_load, c_intrinsic, tf, this_delay;
> double Vdd = g_tp.peri_global.Vdd;
721,738c668,683
< // TODO: following delay calculation part can be greatly simplified.
< // first check whether a predecoder block is required
< if (exist)
< {
< //Find delay in first level of predecoder block
< //First find delay in path
< if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2))
< {
< //First gate is a NAND2 gate
< rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
< c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
< c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_load);
< this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
< delay_nand2_path += this_delay;
< inrisetime_nand2_path = this_delay / (1.0 - 0.5);
< power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
---
> // TODO: following delay calculation part can be greatly simplified.
> // first check whether a predecoder block is required
> if (exist) {
> //Find delay in first level of predecoder block
> //First find delay in path
> if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
> //First gate is a NAND2 gate
> rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
> c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
> c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_load);
> this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
> delay_nand2_path += this_delay;
> inrisetime_nand2_path = this_delay / (1.0 - 0.5);
> power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
740,752c685,696
< //Add delays of all but the last inverter in the chain
< for (i = 1; i < number_gates_L1_nand2_path - 1; ++i)
< {
< rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
< c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
< c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_load);
< this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
< delay_nand2_path += this_delay;
< inrisetime_nand2_path = this_delay / (1.0 - 0.5);
< power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
< }
---
> //Add delays of all but the last inverter in the chain
> for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) {
> rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
> c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
> c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_load);
> this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
> delay_nand2_path += this_delay;
> inrisetime_nand2_path = this_delay / (1.0 - 0.5);
> power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
> }
754,779c698,722
< //Add delay of the last inverter
< i = number_gates_L1_nand2_path - 1;
< rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
< if (flag_L2_gate)
< {
< c_load = branch_effort_nand2_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
< c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_load);
< this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
< delay_nand2_path += this_delay;
< inrisetime_nand2_path = this_delay / (1.0 - 0.5);
< power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
< }
< else
< { //First level directly drives decoder output load
< c_load = C_ld_predec_blk_out;
< c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
< this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
< delay_nand2_path += this_delay;
< ret_val.first = this_delay / (1.0 - 0.5);
< power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
< }
< }
---
> //Add delay of the last inverter
> i = number_gates_L1_nand2_path - 1;
> rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
> if (flag_L2_gate) {
> c_load = branch_effort_nand2_gate_output *
> (gate_C(w_L2_n[0], 0, is_dram_) +
> gate_C(w_L2_p[0], 0, is_dram_));
> c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_load);
> this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
> delay_nand2_path += this_delay;
> inrisetime_nand2_path = this_delay / (1.0 - 0.5);
> power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
> } else { //First level directly drives decoder output load
> c_load = C_ld_predec_blk_out;
> c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
> this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
> delay_nand2_path += this_delay;
> ret_val.first = this_delay / (1.0 - 0.5);
> power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
> }
> }
781,792c724,735
< if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3))
< { //Check if the number of gates in the first level is more than 1.
< //First gate is a NAND3 gate
< rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
< c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
< c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_load);
< this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
< delay_nand3_path += this_delay;
< inrisetime_nand3_path = this_delay / (1.0 - 0.5);
< power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
---
> if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) {
> //Check if the number of gates in the first level is more than 1.
> //First gate is a NAND3 gate
> rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
> c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
> c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_load);
> this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
> delay_nand3_path += this_delay;
> inrisetime_nand3_path = this_delay / (1.0 - 0.5);
> power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
794,806c737,748
< //Add delays of all but the last inverter in the chain
< for (i = 1; i < number_gates_L1_nand3_path - 1; ++i)
< {
< rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
< c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
< c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_load);
< this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
< delay_nand3_path += this_delay;
< inrisetime_nand3_path = this_delay / (1.0 - 0.5);
< power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
< }
---
> //Add delays of all but the last inverter in the chain
> for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) {
> rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
> c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
> c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_load);
> this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
> delay_nand3_path += this_delay;
> inrisetime_nand3_path = this_delay / (1.0 - 0.5);
> power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
> }
808,833c750,774
< //Add delay of the last inverter
< i = number_gates_L1_nand3_path - 1;
< rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
< if (flag_L2_gate)
< {
< c_load = branch_effort_nand3_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
< c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_load);
< this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
< delay_nand3_path += this_delay;
< inrisetime_nand3_path = this_delay / (1.0 - 0.5);
< power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
< }
< else
< { //First level directly drives decoder output load
< c_load = C_ld_predec_blk_out;
< c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
< this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
< delay_nand3_path += this_delay;
< ret_val.second = this_delay / (1.0 - 0.5);
< power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
< }
< }
---
> //Add delay of the last inverter
> i = number_gates_L1_nand3_path - 1;
> rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
> if (flag_L2_gate) {
> c_load = branch_effort_nand3_gate_output *
> (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0,
> is_dram_));
> c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_load);
> this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
> delay_nand3_path += this_delay;
> inrisetime_nand3_path = this_delay / (1.0 - 0.5);
> power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
> } else { //First level directly drives decoder output load
> c_load = C_ld_predec_blk_out;
> c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
> this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
> delay_nand3_path += this_delay;
> ret_val.second = this_delay / (1.0 - 0.5);
> power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
> }
> }
835,861c776,798
< // Find delay through second level
< if (flag_L2_gate)
< {
< if (flag_L2_gate == 2)
< {
< rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
< c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
< c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_load);
< this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
< delay_nand2_path += this_delay;
< inrisetime_nand2_path = this_delay / (1.0 - 0.5);
< power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
< }
< else
< { // flag_L2_gate = 3
< rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
< c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
< c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_load);
< this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
< delay_nand3_path += this_delay;
< inrisetime_nand3_path = this_delay / (1.0 - 0.5);
< power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
< }
---
> // Find delay through second level
> if (flag_L2_gate) {
> if (flag_L2_gate == 2) {
> rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
> c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
> c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_load);
> this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
> delay_nand2_path += this_delay;
> inrisetime_nand2_path = this_delay / (1.0 - 0.5);
> power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
> } else { // flag_L2_gate = 3
> rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
> c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
> c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_load);
> this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
> delay_nand3_path += this_delay;
> inrisetime_nand3_path = this_delay / (1.0 - 0.5);
> power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
> }
863,877c800,813
< for (i = 1; i < number_gates_L2 - 1; ++i)
< {
< rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
< c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
< c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_load);
< this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
< delay_nand2_path += this_delay;
< inrisetime_nand2_path = this_delay / (1.0 - 0.5);
< this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
< delay_nand3_path += this_delay;
< inrisetime_nand3_path = this_delay / (1.0 - 0.5);
< power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
< }
---
> for (i = 1; i < number_gates_L2 - 1; ++i) {
> rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
> c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
> c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_load);
> this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
> delay_nand2_path += this_delay;
> inrisetime_nand2_path = this_delay / (1.0 - 0.5);
> this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
> delay_nand3_path += this_delay;
> inrisetime_nand3_path = this_delay / (1.0 - 0.5);
> power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
> }
879,892c815,829
< //Add delay of final inverter that drives the wordline decoders
< i = number_gates_L2 - 1;
< c_load = C_ld_predec_blk_out;
< rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
< c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
< this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
< delay_nand2_path += this_delay;
< ret_val.first = this_delay / (1.0 - 0.5);
< this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
< delay_nand3_path += this_delay;
< ret_val.second = this_delay / (1.0 - 0.5);
< power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
---
> //Add delay of final inverter that drives the wordline decoders
> i = number_gates_L2 - 1;
> c_load = C_ld_predec_blk_out;
> rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
> c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
> this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
> delay_nand2_path += this_delay;
> ret_val.first = this_delay / (1.0 - 0.5);
> this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
> delay_nand3_path += this_delay;
> ret_val.second = this_delay / (1.0 - 0.5);
> power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
> }
894d830
< }
896,897c832,833
< delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second;
< return ret_val;
---
> delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second;
> return ret_val;
1036,1064c972,998
< :flag_driver_exists(0),
< number_gates_nand2_path(0),
< number_gates_nand3_path(0),
< min_number_gates(2),
< num_buffers_driving_1_nand2_load(0),
< num_buffers_driving_2_nand2_load(0),
< num_buffers_driving_4_nand2_load(0),
< num_buffers_driving_2_nand3_load(0),
< num_buffers_driving_8_nand3_load(0),
< num_buffers_nand3_path(0),
< c_load_nand2_path_out(0),
< c_load_nand3_path_out(0),
< r_load_nand2_path_out(0),
< r_load_nand3_path_out(0),
< delay_nand2_path(0),
< delay_nand3_path(0),
< power_nand2_path(),
< power_nand3_path(),
< blk(blk_), dec(blk->dec),
< is_dram_(is_dram),
< way_select(way_select_)
< {
< for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
< {
< width_nand2_path_n[i] = 0;
< width_nand2_path_p[i] = 0;
< width_nand3_path_n[i] = 0;
< width_nand3_path_p[i] = 0;
< }
---
> : flag_driver_exists(0),
> number_gates_nand2_path(0),
> number_gates_nand3_path(0),
> min_number_gates(2),
> num_buffers_driving_1_nand2_load(0),
> num_buffers_driving_2_nand2_load(0),
> num_buffers_driving_4_nand2_load(0),
> num_buffers_driving_2_nand3_load(0),
> num_buffers_driving_8_nand3_load(0),
> num_buffers_nand3_path(0),
> c_load_nand2_path_out(0),
> c_load_nand3_path_out(0),
> r_load_nand2_path_out(0),
> r_load_nand3_path_out(0),
> delay_nand2_path(0),
> delay_nand3_path(0),
> power_nand2_path(),
> power_nand3_path(),
> blk(blk_), dec(blk->dec),
> is_dram_(is_dram),
> way_select(way_select_) {
> for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
> width_nand2_path_n[i] = 0;
> width_nand2_path_p[i] = 0;
> width_nand3_path_n[i] = 0;
> width_nand3_path_p[i] = 0;
> }
1066c1000
< number_input_addr_bits = blk->number_input_addr_bits;
---
> number_input_addr_bits = blk->number_input_addr_bits;
1068,1075c1002,1015
< if (way_select > 1)
< {
< flag_driver_exists = 1;
< number_input_addr_bits = way_select;
< if (dec->num_in_signals == 2)
< {
< c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
< num_buffers_driving_2_nand2_load = number_input_addr_bits;
---
> if (way_select > 1) {
> flag_driver_exists = 1;
> number_input_addr_bits = way_select;
> if (dec->num_in_signals == 2) {
> c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
> num_buffers_driving_2_nand2_load = number_input_addr_bits;
> } else if (dec->num_in_signals == 3) {
> c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
> num_buffers_driving_2_nand3_load = number_input_addr_bits;
> }
> } else if (way_select == 0) {
> if (blk->exist) {
> flag_driver_exists = 1;
> }
1077,1089d1016
< else if (dec->num_in_signals == 3)
< {
< c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
< num_buffers_driving_2_nand3_load = number_input_addr_bits;
< }
< }
< else if (way_select == 0)
< {
< if (blk->exist)
< {
< flag_driver_exists = 1;
< }
< }
1091,1092c1018,1019
< compute_widths();
< compute_area();
---
> compute_widths();
> compute_area();
1097,1101c1024,1027
< void PredecBlkDrv::compute_widths()
< {
< // The predecode block driver accepts as input the address bits from the h-tree network. For
< // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
< // inversion to generate addrbar and simply treat addrbar as addr.
---
> void PredecBlkDrv::compute_widths() {
> // The predecode block driver accepts as input the address bits from the h-tree network. For
> // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
> // inversion to generate addrbar and simply treat addrbar as addr.
1103,1104c1029,1030
< double F;
< double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
---
> double F;
> double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
1106,1109c1032,1034
< if (flag_driver_exists)
< {
< double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
< double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
---
> if (flag_driver_exists) {
> double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
> double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
1111,1164c1036,1083
< if (way_select == 0)
< {
< if (blk->number_input_addr_bits == 1)
< { //2 NAND2 gates
< num_buffers_driving_2_nand2_load = 1;
< c_load_nand2_path_out = 2 * C_nand2_gate_blk;
< }
< else if (blk->number_input_addr_bits == 2)
< { //4 NAND2 gates one 2-4 decoder
< num_buffers_driving_4_nand2_load = 2;
< c_load_nand2_path_out = 4 * C_nand2_gate_blk;
< }
< else if (blk->number_input_addr_bits == 3)
< { //8 NAND3 gates one 3-8 decoder
< num_buffers_driving_8_nand3_load = 3;
< c_load_nand3_path_out = 8 * C_nand3_gate_blk;
< }
< else if (blk->number_input_addr_bits == 4)
< { //4 + 4 NAND2 gates two 2-4 decoder
< num_buffers_driving_4_nand2_load = 4;
< c_load_nand2_path_out = 4 * C_nand2_gate_blk;
< }
< else if (blk->number_input_addr_bits == 5)
< { //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 decoder
< num_buffers_driving_4_nand2_load = 2;
< num_buffers_driving_8_nand3_load = 3;
< c_load_nand2_path_out = 4 * C_nand2_gate_blk;
< c_load_nand3_path_out = 8 * C_nand3_gate_blk;
< }
< else if (blk->number_input_addr_bits == 6)
< { //8 + 8 NAND3 gates two 3-8 decoder
< num_buffers_driving_8_nand3_load = 6;
< c_load_nand3_path_out = 8 * C_nand3_gate_blk;
< }
< else if (blk->number_input_addr_bits == 7)
< { //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 decoder
< num_buffers_driving_4_nand2_load = 4;
< num_buffers_driving_8_nand3_load = 3;
< c_load_nand2_path_out = 4 * C_nand2_gate_blk;
< c_load_nand3_path_out = 8 * C_nand3_gate_blk;
< }
< else if (blk->number_input_addr_bits == 8)
< { //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 decoder
< num_buffers_driving_4_nand2_load = 2;
< num_buffers_driving_8_nand3_load = 6;
< c_load_nand2_path_out = 4 * C_nand2_gate_blk;
< c_load_nand3_path_out = 8 * C_nand3_gate_blk;
< }
< else if (blk->number_input_addr_bits == 9)
< { //8 + 8 + 8 NAND3 gates three 3-8 decoder
< num_buffers_driving_8_nand3_load = 9;
< c_load_nand3_path_out = 8 * C_nand3_gate_blk;
< }
< }
---
> if (way_select == 0) {
> if (blk->number_input_addr_bits == 1) {
> //2 NAND2 gates
> num_buffers_driving_2_nand2_load = 1;
> c_load_nand2_path_out = 2 * C_nand2_gate_blk;
> } else if (blk->number_input_addr_bits == 2) {
> //4 NAND2 gates one 2-4 decoder
> num_buffers_driving_4_nand2_load = 2;
> c_load_nand2_path_out = 4 * C_nand2_gate_blk;
> } else if (blk->number_input_addr_bits == 3) {
> //8 NAND3 gates one 3-8 decoder
> num_buffers_driving_8_nand3_load = 3;
> c_load_nand3_path_out = 8 * C_nand3_gate_blk;
> } else if (blk->number_input_addr_bits == 4) {
> //4 + 4 NAND2 gates two 2-4 decoder
> num_buffers_driving_4_nand2_load = 4;
> c_load_nand2_path_out = 4 * C_nand2_gate_blk;
> } else if (blk->number_input_addr_bits == 5) {
> //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8
> //decoder
> num_buffers_driving_4_nand2_load = 2;
> num_buffers_driving_8_nand3_load = 3;
> c_load_nand2_path_out = 4 * C_nand2_gate_blk;
> c_load_nand3_path_out = 8 * C_nand3_gate_blk;
> } else if (blk->number_input_addr_bits == 6) {
> //8 + 8 NAND3 gates two 3-8 decoder
> num_buffers_driving_8_nand3_load = 6;
> c_load_nand3_path_out = 8 * C_nand3_gate_blk;
> } else if (blk->number_input_addr_bits == 7) {
> //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8
> //decoder
> num_buffers_driving_4_nand2_load = 4;
> num_buffers_driving_8_nand3_load = 3;
> c_load_nand2_path_out = 4 * C_nand2_gate_blk;
> c_load_nand3_path_out = 8 * C_nand3_gate_blk;
> } else if (blk->number_input_addr_bits == 8) {
> //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8
> //decoder
> num_buffers_driving_4_nand2_load = 2;
> num_buffers_driving_8_nand3_load = 6;
> c_load_nand2_path_out = 4 * C_nand2_gate_blk;
> c_load_nand3_path_out = 8 * C_nand3_gate_blk;
> } else if (blk->number_input_addr_bits == 9) {
> //8 + 8 + 8 NAND3 gates three 3-8 decoder
> num_buffers_driving_8_nand3_load = 9;
> c_load_nand3_path_out = 8 * C_nand3_gate_blk;
> }
> }
1166,1183c1085,1102
< if ((blk->flag_two_unique_paths) ||
< (blk->number_inputs_L1_gate == 2) ||
< (number_input_addr_bits == 0) ||
< ((way_select)&&(dec->num_in_signals == 2)))
< { //this means that way_select is driving NAND2 in decoder.
< width_nand2_path_n[0] = g_tp.min_w_nmos_;
< width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
< F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
< number_gates_nand2_path = logical_effort(
< min_number_gates,
< 1,
< F,
< width_nand2_path_n,
< width_nand2_path_p,
< c_load_nand2_path_out,
< p_to_n_sz_ratio,
< is_dram_, false, g_tp.max_w_nmos_);
< }
---
> if ((blk->flag_two_unique_paths) ||
> (blk->number_inputs_L1_gate == 2) ||
> (number_input_addr_bits == 0) ||
> ((way_select) && (dec->num_in_signals == 2))) {
> //this means that way_select is driving NAND2 in decoder.
> width_nand2_path_n[0] = g_tp.min_w_nmos_;
> width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
> F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
> number_gates_nand2_path = logical_effort(
> min_number_gates,
> 1,
> F,
> width_nand2_path_n,
> width_nand2_path_p,
> c_load_nand2_path_out,
> p_to_n_sz_ratio,
> is_dram_, false, g_tp.max_w_nmos_);
> }
1185,1200c1104,1120
< if ((blk->flag_two_unique_paths) ||
< (blk->number_inputs_L1_gate == 3) ||
< ((way_select)&&(dec->num_in_signals == 3)))
< { //this means that way_select is driving NAND3 in decoder.
< width_nand3_path_n[0] = g_tp.min_w_nmos_;
< width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
< F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
< number_gates_nand3_path = logical_effort(
< min_number_gates,
< 1,
< F,
< width_nand3_path_n,
< width_nand3_path_p,
< c_load_nand3_path_out,
< p_to_n_sz_ratio,
< is_dram_, false, g_tp.max_w_nmos_);
---
> if ((blk->flag_two_unique_paths) ||
> (blk->number_inputs_L1_gate == 3) ||
> ((way_select) && (dec->num_in_signals == 3))) {
> //this means that way_select is driving NAND3 in decoder.
> width_nand3_path_n[0] = g_tp.min_w_nmos_;
> width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
> F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
> number_gates_nand3_path = logical_effort(
> min_number_gates,
> 1,
> F,
> width_nand3_path_n,
> width_nand3_path_p,
> c_load_nand3_path_out,
> p_to_n_sz_ratio,
> is_dram_, false, g_tp.max_w_nmos_);
> }
1202d1121
< }
1207,1214c1126,1132
< void PredecBlkDrv::compute_area()
< {
< double area_nand2_path = 0;
< double area_nand3_path = 0;
< double leak_nand2_path = 0;
< double leak_nand3_path = 0;
< double gate_leak_nand2_path = 0;
< double gate_leak_nand3_path = 0;
---
> void PredecBlkDrv::compute_area() {
> double area_nand2_path = 0;
> double area_nand3_path = 0;
> double leak_nand2_path = 0;
> double leak_nand3_path = 0;
> double gate_leak_nand2_path = 0;
> double gate_leak_nand3_path = 0;
1216,1230c1134,1147
< if (flag_driver_exists)
< { // first check whether a predecoder block driver is needed
< for (int i = 0; i < number_gates_nand2_path; ++i)
< {
< area_nand2_path += compute_gate_area(INV, 1, width_nand2_path_p[i], width_nand2_path_n[i], g_tp.cell_h_def);
< leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
< gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
< }
< area_nand2_path *= (num_buffers_driving_1_nand2_load +
< num_buffers_driving_2_nand2_load +
< num_buffers_driving_4_nand2_load);
< leak_nand2_path *= (num_buffers_driving_1_nand2_load +
< num_buffers_driving_2_nand2_load +
< num_buffers_driving_4_nand2_load);
< gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
---
> if (flag_driver_exists) {
> // first check whether a predecoder block driver is needed
> for (int i = 0; i < number_gates_nand2_path; ++i) {
> area_nand2_path +=
> compute_gate_area(INV, 1, width_nand2_path_p[i],
> width_nand2_path_n[i], g_tp.cell_h_def);
> leak_nand2_path +=
> cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
> 1, inv, is_dram_);
> gate_leak_nand2_path +=
> cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
> 1, inv, is_dram_);
> }
> area_nand2_path *= (num_buffers_driving_1_nand2_load +
1232a1150,1155
> leak_nand2_path *= (num_buffers_driving_1_nand2_load +
> num_buffers_driving_2_nand2_load +
> num_buffers_driving_4_nand2_load);
> gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
> num_buffers_driving_2_nand2_load +
> num_buffers_driving_4_nand2_load);
1234,1242c1157,1170
< for (int i = 0; i < number_gates_nand3_path; ++i)
< {
< area_nand3_path += compute_gate_area(INV, 1, width_nand3_path_p[i], width_nand3_path_n[i], g_tp.cell_h_def);
< leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
< gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
< }
< area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
< leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
< gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
---
> for (int i = 0; i < number_gates_nand3_path; ++i) {
> area_nand3_path +=
> compute_gate_area(INV, 1, width_nand3_path_p[i],
> width_nand3_path_n[i], g_tp.cell_h_def);
> leak_nand3_path +=
> cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
> 1, inv, is_dram_);
> gate_leak_nand3_path +=
> cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
> 1, inv, is_dram_);
> }
> area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
> leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
> gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
1244,1249c1172,1177
< power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
< power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
< power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
< power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
< area.set_area(area_nand2_path + area_nand3_path);
< }
---
> power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
> power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
> power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
> power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
> area.set_area(area_nand2_path + area_nand3_path);
> }
1256,1263c1184,1190
< double inrisetime_nand3_path)
< {
< pair<double, double> ret_val;
< ret_val.first = 0; // outrisetime_nand2_path
< ret_val.second = 0; // outrisetime_nand3_path
< int i;
< double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
< double Vdd = g_tp.peri_global.Vdd;
---
> double inrisetime_nand3_path) {
> pair<double, double> ret_val;
> ret_val.first = 0; // outrisetime_nand2_path
> ret_val.second = 0; // outrisetime_nand3_path
> int i;
> double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
> double Vdd = g_tp.peri_global.Vdd;
1265,1278c1192,1203
< if (flag_driver_exists)
< {
< for (i = 0; i < number_gates_nand2_path - 1; ++i)
< {
< rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
< c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
< c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_gate_load);
< this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
< delay_nand2_path += this_delay;
< inrisetime_nand2_path = this_delay / (1.0 - 0.5);
< power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
< }
---
> if (flag_driver_exists) {
> for (i = 0; i < number_gates_nand2_path - 1; ++i) {
> rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
> c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
> c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_gate_load);
> this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
> delay_nand2_path += this_delay;
> inrisetime_nand2_path = this_delay / (1.0 - 0.5);
> power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
> }
1280,1292c1205,1216
< // Final inverter drives the predecoder block or the decoder output load
< if (number_gates_nand2_path != 0)
< {
< i = number_gates_nand2_path - 1;
< rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
< c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
< c_load = c_load_nand2_path_out;
< tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out*c_load/ 2;
< this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
< delay_nand2_path += this_delay;
< ret_val.first = this_delay / (1.0 - 0.5);
< power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
---
> // Final inverter drives the predecoder block or the decoder output load
> if (number_gates_nand2_path != 0) {
> i = number_gates_nand2_path - 1;
> rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
> c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
> c_load = c_load_nand2_path_out;
> tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out * c_load / 2;
> this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
> delay_nand2_path += this_delay;
> ret_val.first = this_delay / (1.0 - 0.5);
> power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
1294c1218
< }
---
> }
1296,1307c1220,1230
< for (i = 0; i < number_gates_nand3_path - 1; ++i)
< {
< rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
< c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
< c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_gate_load);
< this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
< delay_nand3_path += this_delay;
< inrisetime_nand3_path = this_delay / (1.0 - 0.5);
< power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
< }
---
> for (i = 0; i < number_gates_nand3_path - 1; ++i) {
> rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
> c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
> c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_gate_load);
> this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
> delay_nand3_path += this_delay;
> inrisetime_nand3_path = this_delay / (1.0 - 0.5);
> power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
> }
1309,1321c1232,1244
< // Final inverter drives the predecoder block or the decoder output load
< if (number_gates_nand3_path != 0)
< {
< i = number_gates_nand3_path - 1;
< rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
< c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
< c_load = c_load_nand3_path_out;
< tf = rd*(c_intrinsic + c_load) + r_load_nand3_path_out*c_load / 2;
< this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
< delay_nand3_path += this_delay;
< ret_val.second = this_delay / (1.0 - 0.5);
< power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
---
> // Final inverter drives the predecoder block or the decoder output load
> if (number_gates_nand3_path != 0) {
> i = number_gates_nand3_path - 1;
> rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
> c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
> c_load = c_load_nand3_path_out;
> tf = rd * (c_intrinsic + c_load) + r_load_nand3_path_out * c_load / 2;
> this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
> delay_nand3_path += this_delay;
> ret_val.second = this_delay / (1.0 - 0.5);
> power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
> }
1323,1324c1246
< }
< return ret_val;
---
> return ret_val;
1328,1331c1250,1252
< double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir)
< {
< return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
< num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
---
> double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir) {
> return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
> num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
1339,1351c1260,1271
< :blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_)
< {
< driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
< drv1->power_nand3_path.readOp.leakage +
< drv2->power_nand2_path.readOp.leakage +
< drv2->power_nand3_path.readOp.leakage;
< block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
< blk1->power_nand3_path.readOp.leakage +
< blk1->power_L2.readOp.leakage +
< blk2->power_nand2_path.readOp.leakage +
< blk2->power_nand3_path.readOp.leakage +
< blk2->power_L2.readOp.leakage;
< power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
---
> : blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) {
> driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
> drv1->power_nand3_path.readOp.leakage +
> drv2->power_nand2_path.readOp.leakage +
> drv2->power_nand3_path.readOp.leakage;
> block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
> blk1->power_nand3_path.readOp.leakage +
> blk1->power_L2.readOp.leakage +
> blk2->power_nand2_path.readOp.leakage +
> blk2->power_nand3_path.readOp.leakage +
> blk2->power_L2.readOp.leakage;
> power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
1353,1363c1273,1283
< driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
< drv1->power_nand3_path.readOp.gate_leakage +
< drv2->power_nand2_path.readOp.gate_leakage +
< drv2->power_nand3_path.readOp.gate_leakage;
< block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
< blk1->power_nand3_path.readOp.gate_leakage +
< blk1->power_L2.readOp.gate_leakage +
< blk2->power_nand2_path.readOp.gate_leakage +
< blk2->power_nand3_path.readOp.gate_leakage +
< blk2->power_L2.readOp.gate_leakage;
< power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
---
> driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
> drv1->power_nand3_path.readOp.gate_leakage +
> drv2->power_nand2_path.readOp.gate_leakage +
> drv2->power_nand3_path.readOp.gate_leakage;
> block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
> blk1->power_nand3_path.readOp.gate_leakage +
> blk1->power_L2.readOp.gate_leakage +
> blk2->power_nand2_path.readOp.gate_leakage +
> blk2->power_nand3_path.readOp.gate_leakage +
> blk2->power_L2.readOp.gate_leakage;
> power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
1402,1410c1322,1329
< double Predec::compute_delays(double inrisetime)
< {
< // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
< pair<double, double> tmp_pair1, tmp_pair2;
< tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
< tmp_pair1 = blk1->compute_delays(tmp_pair1);
< tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
< tmp_pair2 = blk2->compute_delays(tmp_pair2);
< tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
---
> double Predec::compute_delays(double inrisetime) {
> // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
> pair<double, double> tmp_pair1, tmp_pair2;
> tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
> tmp_pair1 = blk1->compute_delays(tmp_pair1);
> tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
> tmp_pair2 = blk2->compute_delays(tmp_pair2);
> tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
1412,1416c1331,1335
< driver_power.readOp.dynamic =
< drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
< drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
< drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
< drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
---
> driver_power.readOp.dynamic =
> drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
> drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
> drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
> drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
1418,1424c1337,1343
< block_power.readOp.dynamic =
< blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
< blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
< blk1->power_L2.readOp.dynamic +
< blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
< blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
< blk2->power_L2.readOp.dynamic;
---
> block_power.readOp.dynamic =
> blk1->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path +
> blk1->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path +
> blk1->power_L2.readOp.dynamic +
> blk2->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path +
> blk2->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path +
> blk2->power_L2.readOp.dynamic;
1426c1345
< power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
---
> power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
1428,1429c1347,1348
< delay = tmp_pair1.first;
< return tmp_pair1.second;
---
> delay = tmp_pair1.first;
> return tmp_pair1.second;
1432d1350
<
1468,1471c1386,1388
< pair<double, double> input_pair2)
< {
< pair<double, double> ret_val;
< double delay;
---
> pair<double, double> input_pair2) {
> pair<double, double> ret_val;
> double delay;
1473,1478c1390
< delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
< ret_val.first = delay;
< ret_val.second = input_pair1.first;
< delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
< if (ret_val.first < delay)
< {
---
> delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
1480,1493c1392,1407
< ret_val.second = input_pair1.second;
< }
< delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
< if (ret_val.first < delay)
< {
< ret_val.first = delay;
< ret_val.second = input_pair2.first;
< }
< delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
< if (ret_val.first < delay)
< {
< ret_val.first = delay;
< ret_val.second = input_pair2.second;
< }
---
> ret_val.second = input_pair1.first;
> delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
> if (ret_val.first < delay) {
> ret_val.first = delay;
> ret_val.second = input_pair1.second;
> }
> delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
> if (ret_val.first < delay) {
> ret_val.first = delay;
> ret_val.second = input_pair2.first;
> }
> delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
> if (ret_val.first < delay) {
> ret_val.first = delay;
> ret_val.second = input_pair2.second;
> }
1495c1409
< return ret_val;
---
> return ret_val;
1500,1514c1414,1427
< Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram)
< :number_gates(0),
< min_number_gates(2),
< c_gate_load(c_gate_load_),
< c_wire_load(c_wire_load_),
< r_wire_load(r_wire_load_),
< delay(0),
< power(),
< is_dram_(is_dram)
< {
< for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
< {
< width_n[i] = 0;
< width_p[i] = 0;
< }
---
> Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_,
> bool is_dram)
> : number_gates(0),
> min_number_gates(2),
> c_gate_load(c_gate_load_),
> c_wire_load(c_wire_load_),
> r_wire_load(r_wire_load_),
> delay(0),
> power(),
> is_dram_(is_dram) {
> for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
> width_n[i] = 0;
> width_p[i] = 0;
> }
1516c1429
< compute_widths();
---
> compute_widths();
1520,1525c1433,1437
< void Driver::compute_widths()
< {
< double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
< double c_load = c_gate_load + c_wire_load;
< width_n[0] = g_tp.min_w_nmos_;
< width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
---
> void Driver::compute_widths() {
> double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
> double c_load = c_gate_load + c_wire_load;
> width_n[0] = g_tp.min_w_nmos_;
> width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
1527,1537c1439,1449
< double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
< number_gates = logical_effort(
< min_number_gates,
< 1,
< F,
< width_n,
< width_p,
< c_load,
< p_to_n_sz_ratio,
< is_dram_, false,
< g_tp.max_w_nmos_);
---
> double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
> number_gates = logical_effort(
> min_number_gates,
> 1,
> F,
> width_n,
> width_p,
> c_load,
> p_to_n_sz_ratio,
> is_dram_, false,
> g_tp.max_w_nmos_);
1542,1546c1454,1457
< double Driver::compute_delay(double inrisetime)
< {
< int i;
< double rd, c_load, c_intrinsic, tf;
< double this_delay = 0;
---
> double Driver::compute_delay(double inrisetime) {
> int i;
> double rd, c_load, c_intrinsic, tf;
> double this_delay = 0;
1548,1549c1459,1479
< for (i = 0; i < number_gates - 1; ++i)
< {
---
> for (i = 0; i < number_gates - 1; ++i) {
> rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
> c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
> c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
> drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_load);
> this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
> delay += this_delay;
> inrisetime = this_delay / (1.0 - 0.5);
> power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
> g_tp.peri_global.Vdd;
> power.readOp.leakage +=
> cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
> g_tp.peri_global.Vdd;
> power.readOp.gate_leakage +=
> cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
> g_tp.peri_global.Vdd;
> }
>
> i = number_gates - 1;
> c_load = c_gate_load + c_wire_load;
1551d1480
< c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
1553,1554c1482,1484
< drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_load);
---
> drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
> tf = rd * (c_intrinsic + c_load) + r_wire_load *
> (c_wire_load / 2 + c_gate_load);
1557,1561c1487,1494
< inrisetime = this_delay / (1.0 - 0.5);
< power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
< power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd;
< power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
< }
---
> power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
> g_tp.peri_global.Vdd;
> power.readOp.leakage +=
> cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
> g_tp.peri_global.Vdd;
> power.readOp.gate_leakage +=
> cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
> g_tp.peri_global.Vdd;
1563,1575c1496
< i = number_gates - 1;
< c_load = c_gate_load + c_wire_load;
< rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
< c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
< drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
< tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load);
< this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
< delay += this_delay;
< power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
< power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd;
< power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
<
< return this_delay / (1.0 - 0.5);
---
> return this_delay / (1.0 - 0.5);