decoder.cc (10152:52c552138ba1) | decoder.cc (10234:5cb711fa6176) |
---|---|
1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. | 1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. |
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. |
|
5 * All Rights Reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer; 11 * redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the --- 7 unchanged lines hidden (view full) --- 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 6 * All Rights Reserved 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are 10 * met: redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer; 12 * redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the --- 7 unchanged lines hidden (view full) --- 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” | 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 * 30 ***************************************************************************/ 31 32 33 34#include <cassert> 35#include <cmath> 36#include <iostream> --- 9 unchanged lines hidden (view full) --- 46 int _num_dec_signals, 47 bool flag_way_select, 48 double _C_ld_dec_out, 49 double _R_wire_dec_out, 50 bool fully_assoc_, 51 bool is_dram_, 52 bool is_wl_tr_, 53 const Area & cell_) | 30 * 31 ***************************************************************************/ 32 33 34 35#include <cassert> 36#include <cmath> 37#include <iostream> --- 9 unchanged lines hidden (view full) --- 47 int _num_dec_signals, 48 bool flag_way_select, 49 double _C_ld_dec_out, 50 double _R_wire_dec_out, 51 bool fully_assoc_, 52 bool is_dram_, 53 bool is_wl_tr_, 54 const Area & cell_) |
54:exist(false), 55 C_ld_dec_out(_C_ld_dec_out), 56 R_wire_dec_out(_R_wire_dec_out), 57 num_gates(0), num_gates_min(2), 58 delay(0), 59 //power(), 60 fully_assoc(fully_assoc_), is_dram(is_dram_), 61 is_wl_tr(is_wl_tr_), cell(cell_) 62{ | 55 : exist(false), 56 C_ld_dec_out(_C_ld_dec_out), 57 R_wire_dec_out(_R_wire_dec_out), 58 num_gates(0), num_gates_min(2), 59 delay(0), 60 //power(), 61 fully_assoc(fully_assoc_), is_dram(is_dram_), 62 is_wl_tr(is_wl_tr_), cell(cell_) { |
63 | 63 |
64 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) 65 { 66 w_dec_n[i] = 0; 67 w_dec_p[i] = 0; 68 } | 64 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { 65 w_dec_n[i] = 0; 66 w_dec_p[i] = 0; 67 } |
69 | 68 |
70 /* 71 * _num_dec_signals is the number of decoded signal as output 72 * num_addr_bits_dec is the number of signal to be decoded 73 * as the decoders input. 74 */ 75 int num_addr_bits_dec = _log2(_num_dec_signals); | 69 /* 70 * _num_dec_signals is the number of decoded signal as output 71 * num_addr_bits_dec is the number of signal to be decoded 72 * as the decoders input. 73 */ 74 int num_addr_bits_dec = _log2(_num_dec_signals); |
76 | 75 |
77 if (num_addr_bits_dec < 4) 78 { 79 if (flag_way_select) 80 { 81 exist = true; 82 num_in_signals = 2; 83 } 84 else 85 { 86 num_in_signals = 0; 87 } 88 } 89 else 90 { 91 exist = true; | 76 if (num_addr_bits_dec < 4) { 77 if (flag_way_select) { 78 exist = true; 79 num_in_signals = 2; 80 } else { 81 num_in_signals = 0; 82 } 83 } else { 84 exist = true; |
92 | 85 |
93 if (flag_way_select) 94 { 95 num_in_signals = 3; | 86 if (flag_way_select) { 87 num_in_signals = 3; 88 } else { 89 num_in_signals = 2; 90 } |
96 } | 91 } |
97 else 98 { 99 num_in_signals = 2; 100 } 101 } | |
102 | 92 |
103 assert(cell.h>0); 104 assert(cell.w>0); 105 // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; 106 //area.h = 4 * cell.h; 107 area.h = g_tp.h_dec * cell.h; | 93 assert(cell.h > 0); 94 assert(cell.w > 0); 95 // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; 96 //area.h = 4 * cell.h; 97 area.h = g_tp.h_dec * cell.h; |
108 | 98 |
109 compute_widths(); 110 compute_area(); | 99 compute_widths(); 100 compute_area(); |
111} 112 113 114 | 101} 102 103 104 |
115void Decoder::compute_widths() 116{ 117 double F; 118 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr); 119 double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); 120 double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); | 105void Decoder::compute_widths() { 106 double F; 107 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr); 108 double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); 109 double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); |
121 | 110 |
122 if (exist) 123 { 124 if (num_in_signals == 2 || fully_assoc) 125 { 126 w_dec_n[0] = 2 * g_tp.min_w_nmos_; 127 w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 128 F = gnand2; 129 } 130 else 131 { 132 w_dec_n[0] = 3 * g_tp.min_w_nmos_; 133 w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 134 F = gnand3; 135 } | 111 if (exist) { 112 if (num_in_signals == 2 || fully_assoc) { 113 w_dec_n[0] = 2 * g_tp.min_w_nmos_; 114 w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 115 F = gnand2; 116 } else { 117 w_dec_n[0] = 3 * g_tp.min_w_nmos_; 118 w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 119 F = gnand3; 120 } |
136 | 121 |
137 F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) + 138 gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr)); 139 num_gates = logical_effort( 140 num_gates_min, 141 num_in_signals == 2 ? gnand2 : gnand3, 142 F, 143 w_dec_n, 144 w_dec_p, 145 C_ld_dec_out, 146 p_to_n_sz_ratio, 147 is_dram, 148 is_wl_tr, 149 g_tp.max_w_nmos_dec); 150 } | 122 F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) + 123 gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr)); 124 num_gates = logical_effort( 125 num_gates_min, 126 num_in_signals == 2 ? gnand2 : gnand3, 127 F, 128 w_dec_n, 129 w_dec_p, 130 C_ld_dec_out, 131 p_to_n_sz_ratio, 132 is_dram, 133 is_wl_tr, 134 g_tp.max_w_nmos_dec); 135 } |
151} 152 153 154 | 136} 137 138 139 |
155void Decoder::compute_area() 156{ 157 double cumulative_area = 0; 158 double cumulative_curr = 0; // cumulative leakage current 159 double cumulative_curr_Ig = 0; // cumulative leakage current | 140void Decoder::compute_area() { 141 double cumulative_area = 0; 142 double cumulative_curr = 0; // cumulative leakage current 143 double cumulative_curr_Ig = 0; // cumulative leakage current |
160 | 144 |
161 if (exist) 162 { // First check if this decoder exists 163 if (num_in_signals == 2) 164 { 165 cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h); 166 cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); 167 cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); 168 } 169 else if (num_in_signals == 3) 170 { 171 cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h); 172 cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);; 173 cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); 174 } | 145 if (exist) { // First check if this decoder exists 146 if (num_in_signals == 2) { 147 cumulative_area = 148 compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h); 149 cumulative_curr = 150 cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram); 151 cumulative_curr_Ig = 152 cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram); 153 } else if (num_in_signals == 3) { 154 cumulative_area = 155 compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h); 156 cumulative_curr = 157 cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);; 158 cumulative_curr_Ig = 159 cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); 160 } |
175 | 161 |
176 for (int i = 1; i < num_gates; i++) 177 { 178 cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h); 179 cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); 180 cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); 181 } 182 power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; 183 power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; | 162 for (int i = 1; i < num_gates; i++) { 163 cumulative_area += 164 compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h); 165 cumulative_curr += 166 cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); 167 cumulative_curr_Ig = 168 cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); 169 } 170 power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; 171 power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; |
184 | 172 |
185 area.w = (cumulative_area / area.h); 186 } | 173 area.w = (cumulative_area / area.h); 174 } |
187} 188 189 190 | 175} 176 177 178 |
191double Decoder::compute_delays(double inrisetime) 192{ 193 if (exist) 194 { 195 double ret_val = 0; // outrisetime 196 int i; 197 double rd, tf, this_delay, c_load, c_intrinsic, Vpp; 198 double Vdd = g_tp.peri_global.Vdd; | 179double Decoder::compute_delays(double inrisetime) { 180 if (exist) { 181 double ret_val = 0; // outrisetime 182 int i; 183 double rd, tf, this_delay, c_load, c_intrinsic, Vpp; 184 double Vdd = g_tp.peri_global.Vdd; |
199 | 185 |
200 if ((is_wl_tr) && (is_dram)) 201 { 202 Vpp = g_tp.vpp; 203 } 204 else if (is_wl_tr) 205 { 206 Vpp = g_tp.sram_cell.Vdd; 207 } 208 else 209 { 210 Vpp = g_tp.peri_global.Vdd; 211 } | 186 if ((is_wl_tr) && (is_dram)) { 187 Vpp = g_tp.vpp; 188 } else if (is_wl_tr) { 189 Vpp = g_tp.sram_cell.Vdd; 190 } else { 191 Vpp = g_tp.peri_global.Vdd; 192 } |
212 | 193 |
213 // first check whether a decoder is required at all 214 rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr); 215 c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr); 216 c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals + 217 drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr); 218 tf = rd * (c_intrinsic + c_load); 219 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 220 delay += this_delay; 221 inrisetime = this_delay / (1.0 - 0.5); 222 power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; | 194 // first check whether a decoder is required at all 195 rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr); 196 c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr); 197 c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals + 198 drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr); 199 tf = rd * (c_intrinsic + c_load); 200 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 201 delay += this_delay; 202 inrisetime = this_delay / (1.0 - 0.5); 203 power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; |
223 | 204 |
224 for (i = 1; i < num_gates - 1; ++i) 225 { 226 rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); 227 c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr); 228 c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + 229 drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); 230 tf = rd * (c_intrinsic + c_load); 231 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 232 delay += this_delay; 233 inrisetime = this_delay / (1.0 - 0.5); 234 power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; 235 } | 205 for (i = 1; i < num_gates - 1; ++i) { 206 rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); 207 c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr); 208 c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + 209 drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); 210 tf = rd * (c_intrinsic + c_load); 211 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 212 delay += this_delay; 213 inrisetime = this_delay / (1.0 - 0.5); 214 power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; 215 } |
236 | 216 |
237 // add delay of final inverter that drives the wordline 238 i = num_gates - 1; 239 c_load = C_ld_dec_out; 240 rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); 241 c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + 242 drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); 243 tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2; 244 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 245 delay += this_delay; 246 ret_val = this_delay / (1.0 - 0.5); 247 power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd; | 217 // add delay of final inverter that drives the wordline 218 i = num_gates - 1; 219 c_load = C_ld_dec_out; 220 rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); 221 c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + 222 drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); 223 tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2; 224 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 225 delay += this_delay; 226 ret_val = this_delay / (1.0 - 0.5); 227 power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd; |
248 | 228 |
249 return ret_val; 250 } 251 else 252 { 253 return 0.0; 254 } | 229 return ret_val; 230 } else { 231 return 0.0; 232 } |
255} 256 257void Decoder::leakage_feedback(double temperature) 258{ 259 double cumulative_curr = 0; // cumulative leakage current 260 double cumulative_curr_Ig = 0; // cumulative leakage current 261 262 if (exist) --- 23 unchanged lines hidden (view full) --- 286PredecBlk::PredecBlk( 287 int num_dec_signals, 288 Decoder * dec_, 289 double C_wire_predec_blk_out, 290 double R_wire_predec_blk_out_, 291 int num_dec_per_predec, 292 bool is_dram, 293 bool is_blk1) | 233} 234 235void Decoder::leakage_feedback(double temperature) 236{ 237 double cumulative_curr = 0; // cumulative leakage current 238 double cumulative_curr_Ig = 0; // cumulative leakage current 239 240 if (exist) --- 23 unchanged lines hidden (view full) --- 264PredecBlk::PredecBlk( 265 int num_dec_signals, 266 Decoder * dec_, 267 double C_wire_predec_blk_out, 268 double R_wire_predec_blk_out_, 269 int num_dec_per_predec, 270 bool is_dram, 271 bool is_blk1) |
294 :dec(dec_), 295 exist(false), 296 number_input_addr_bits(0), 297 C_ld_predec_blk_out(0), 298 R_wire_predec_blk_out(0), 299 branch_effort_nand2_gate_output(1), 300 branch_effort_nand3_gate_output(1), 301 flag_two_unique_paths(false), 302 flag_L2_gate(0), 303 number_inputs_L1_gate(0), 304 number_gates_L1_nand2_path(0), 305 number_gates_L1_nand3_path(0), 306 number_gates_L2(0), 307 min_number_gates_L1(2), 308 min_number_gates_L2(2), 309 num_L1_active_nand2_path(0), 310 num_L1_active_nand3_path(0), 311 delay_nand2_path(0), 312 delay_nand3_path(0), 313 power_nand2_path(), 314 power_nand3_path(), 315 power_L2(), 316 is_dram_(is_dram) 317{ 318 int branch_effort_predec_out; 319 double C_ld_dec_gate; 320 int num_addr_bits_dec = _log2(num_dec_signals); 321 int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2; 322 int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits; | 272 : dec(dec_), 273 exist(false), 274 number_input_addr_bits(0), 275 C_ld_predec_blk_out(0), 276 R_wire_predec_blk_out(0), 277 branch_effort_nand2_gate_output(1), 278 branch_effort_nand3_gate_output(1), 279 flag_two_unique_paths(false), 280 flag_L2_gate(0), 281 number_inputs_L1_gate(0), 282 number_gates_L1_nand2_path(0), 283 number_gates_L1_nand3_path(0), 284 number_gates_L2(0), 285 min_number_gates_L1(2), 286 min_number_gates_L2(2), 287 num_L1_active_nand2_path(0), 288 num_L1_active_nand3_path(0), 289 delay_nand2_path(0), 290 delay_nand3_path(0), 291 power_nand2_path(), 292 power_nand3_path(), 293 power_L2(), 294 is_dram_(is_dram) { 295 int branch_effort_predec_out; 296 double C_ld_dec_gate; 297 int num_addr_bits_dec = _log2(num_dec_signals); 298 int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2; 299 int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits; |
323 | 300 |
324 w_L1_nand2_n[0] = 0; 325 w_L1_nand2_p[0] = 0; 326 w_L1_nand3_n[0] = 0; 327 w_L1_nand3_p[0] = 0; | 301 w_L1_nand2_n[0] = 0; 302 w_L1_nand2_p[0] = 0; 303 w_L1_nand3_n[0] = 0; 304 w_L1_nand3_p[0] = 0; |
328 | 305 |
329 if (is_blk1 == true) 330 { 331 if (num_addr_bits_dec <= 0) 332 { 333 return; | 306 if (is_blk1 == true) { 307 if (num_addr_bits_dec <= 0) { 308 return; 309 } else if (num_addr_bits_dec < 4) { 310 // Just one predecoder block is required with NAND2 gates. No decoder required. 311 // The first level of predecoding directly drives the decoder output load 312 exist = true; 313 number_input_addr_bits = num_addr_bits_dec; 314 R_wire_predec_blk_out = dec->R_wire_dec_out; 315 C_ld_predec_blk_out = dec->C_ld_dec_out; 316 } else { 317 exist = true; 318 number_input_addr_bits = blk1_num_input_addr_bits; 319 branch_effort_predec_out = (1 << blk2_num_input_addr_bits); 320 C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); 321 R_wire_predec_blk_out = R_wire_predec_blk_out_; 322 C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; 323 } 324 } else { 325 if (num_addr_bits_dec >= 4) { 326 exist = true; 327 number_input_addr_bits = blk2_num_input_addr_bits; 328 branch_effort_predec_out = (1 << blk1_num_input_addr_bits); 329 C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); 330 R_wire_predec_blk_out = R_wire_predec_blk_out_; 331 C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; 332 } |
334 } | 333 } |
335 else if (num_addr_bits_dec < 4) 336 { 337 // Just one predecoder block is required with NAND2 gates. No decoder required. 338 // The first level of predecoding directly drives the decoder output load 339 exist = true; 340 number_input_addr_bits = num_addr_bits_dec; 341 R_wire_predec_blk_out = dec->R_wire_dec_out; 342 C_ld_predec_blk_out = dec->C_ld_dec_out; 343 } 344 else 345 { 346 exist = true; 347 number_input_addr_bits = blk1_num_input_addr_bits; 348 branch_effort_predec_out = (1 << blk2_num_input_addr_bits); 349 C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); 350 R_wire_predec_blk_out = R_wire_predec_blk_out_; 351 C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; 352 } 353 } 354 else 355 { 356 if (num_addr_bits_dec >= 4) 357 { 358 exist = true; 359 number_input_addr_bits = blk2_num_input_addr_bits; 360 branch_effort_predec_out = (1 << blk1_num_input_addr_bits); 361 C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); 362 R_wire_predec_blk_out = R_wire_predec_blk_out_; 363 C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; 364 } 365 } | |
366 | 334 |
367 compute_widths(); 368 compute_area(); | 335 compute_widths(); 336 compute_area(); |
369} 370 371 372 | 337} 338 339 340 |
373void PredecBlk::compute_widths() 374{ 375 double F, c_load_nand3_path, c_load_nand2_path; 376 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); 377 double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); 378 double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); | 341void PredecBlk::compute_widths() { 342 double F, c_load_nand3_path, c_load_nand2_path; 343 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); 344 double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); 345 double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); |
379 | 346 |
380 if (exist == false) return; | 347 if (exist == false) return; |
381 382 | 348 349 |
383 switch (number_input_addr_bits) 384 { | 350 switch (number_input_addr_bits) { |
385 case 1: | 351 case 1: |
386 flag_two_unique_paths = false; 387 number_inputs_L1_gate = 2; 388 flag_L2_gate = 0; 389 break; | 352 flag_two_unique_paths = false; 353 number_inputs_L1_gate = 2; 354 flag_L2_gate = 0; 355 break; |
390 case 2: | 356 case 2: |
391 flag_two_unique_paths = false; 392 number_inputs_L1_gate = 2; 393 flag_L2_gate = 0; 394 break; | 357 flag_two_unique_paths = false; 358 number_inputs_L1_gate = 2; 359 flag_L2_gate = 0; 360 break; |
395 case 3: | 361 case 3: |
396 flag_two_unique_paths = false; 397 number_inputs_L1_gate = 3; 398 flag_L2_gate = 0; 399 break; | 362 flag_two_unique_paths = false; 363 number_inputs_L1_gate = 3; 364 flag_L2_gate = 0; 365 break; |
400 case 4: | 366 case 4: |
401 flag_two_unique_paths = false; 402 number_inputs_L1_gate = 2; 403 flag_L2_gate = 2; 404 branch_effort_nand2_gate_output = 4; 405 break; | 367 flag_two_unique_paths = false; 368 number_inputs_L1_gate = 2; 369 flag_L2_gate = 2; 370 branch_effort_nand2_gate_output = 4; 371 break; |
406 case 5: | 372 case 5: |
407 flag_two_unique_paths = true; 408 flag_L2_gate = 2; 409 branch_effort_nand2_gate_output = 8; 410 branch_effort_nand3_gate_output = 4; 411 break; | 373 flag_two_unique_paths = true; 374 flag_L2_gate = 2; 375 branch_effort_nand2_gate_output = 8; 376 branch_effort_nand3_gate_output = 4; 377 break; |
412 case 6: | 378 case 6: |
413 flag_two_unique_paths = false; 414 number_inputs_L1_gate = 3; 415 flag_L2_gate = 2; 416 branch_effort_nand3_gate_output = 8; 417 break; | 379 flag_two_unique_paths = false; 380 number_inputs_L1_gate = 3; 381 flag_L2_gate = 2; 382 branch_effort_nand3_gate_output = 8; 383 break; |
418 case 7: | 384 case 7: |
419 flag_two_unique_paths = true; 420 flag_L2_gate = 3; 421 branch_effort_nand2_gate_output = 32; 422 branch_effort_nand3_gate_output = 16; 423 break; | 385 flag_two_unique_paths = true; 386 flag_L2_gate = 3; 387 branch_effort_nand2_gate_output = 32; 388 branch_effort_nand3_gate_output = 16; 389 break; |
424 case 8: | 390 case 8: |
425 flag_two_unique_paths = true; 426 flag_L2_gate = 3; 427 branch_effort_nand2_gate_output = 64; 428 branch_effort_nand3_gate_output = 32; 429 break; | 391 flag_two_unique_paths = true; 392 flag_L2_gate = 3; 393 branch_effort_nand2_gate_output = 64; 394 branch_effort_nand3_gate_output = 32; 395 break; |
430 case 9: | 396 case 9: |
431 flag_two_unique_paths = false; 432 number_inputs_L1_gate = 3; 433 flag_L2_gate = 3; 434 branch_effort_nand3_gate_output = 64; 435 break; | 397 flag_two_unique_paths = false; 398 number_inputs_L1_gate = 3; 399 flag_L2_gate = 3; 400 branch_effort_nand3_gate_output = 64; 401 break; |
436 default: | 402 default: |
437 assert(0); 438 break; 439 } 440 441 // find the number of gates and sizing in second level of predecoder (if there is a second level) 442 if (flag_L2_gate) 443 { 444 if (flag_L2_gate == 2) 445 { // 2nd level is a NAND2 gate 446 w_L2_n[0] = 2 * g_tp.min_w_nmos_; 447 F = gnand2; | 403 assert(0); 404 break; |
448 } | 405 } |
449 else 450 { // 2nd level is a NAND3 gate 451 w_L2_n[0] = 3 * g_tp.min_w_nmos_; 452 F = gnand3; 453 } 454 w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 455 F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); 456 number_gates_L2 = logical_effort( 457 min_number_gates_L2, 458 flag_L2_gate == 2 ? gnand2 : gnand3, 459 F, 460 w_L2_n, 461 w_L2_p, 462 C_ld_predec_blk_out, 463 p_to_n_sz_ratio, 464 is_dram_, false, 465 g_tp.max_w_nmos_); | |
466 | 406 |
467 // Now find the number of gates and widths in first level of predecoder 468 if ((flag_two_unique_paths)||(number_inputs_L1_gate == 2)) 469 { // Whenever flag_two_unique_paths is true, it means first level of decoder employs 470 // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it means 471 // a NAND2 gate is used in the first level of the predecoder 472 c_load_nand2_path = branch_effort_nand2_gate_output * 473 (gate_C(w_L2_n[0], 0, is_dram_) + 474 gate_C(w_L2_p[0], 0, is_dram_)); 475 w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; 476 w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 477 F = gnand2 * c_load_nand2_path / 478 (gate_C(w_L1_nand2_n[0], 0, is_dram_) + 479 gate_C(w_L1_nand2_p[0], 0, is_dram_)); 480 number_gates_L1_nand2_path = logical_effort( 481 min_number_gates_L1, 482 gnand2, 483 F, 484 w_L1_nand2_n, 485 w_L1_nand2_p, 486 c_load_nand2_path, 487 p_to_n_sz_ratio, 488 is_dram_, false, 489 g_tp.max_w_nmos_); 490 } | 407 // find the number of gates and sizing in second level of predecoder (if there is a second level) 408 if (flag_L2_gate) { 409 if (flag_L2_gate == 2) { // 2nd level is a NAND2 gate 410 w_L2_n[0] = 2 * g_tp.min_w_nmos_; 411 F = gnand2; 412 } else { // 2nd level is a NAND3 gate 413 w_L2_n[0] = 3 * g_tp.min_w_nmos_; 414 F = gnand3; 415 } 416 w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 417 F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); 418 number_gates_L2 = logical_effort( 419 min_number_gates_L2, 420 flag_L2_gate == 2 ? gnand2 : gnand3, 421 F, 422 w_L2_n, 423 w_L2_p, 424 C_ld_predec_blk_out, 425 p_to_n_sz_ratio, 426 is_dram_, false, 427 g_tp.max_w_nmos_); |
491 | 428 |
492 //Now find widths of gates along path in which first gate is a NAND3 493 if ((flag_two_unique_paths)||(number_inputs_L1_gate == 3)) 494 { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs 495 // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means 496 // a NAND3 gate is used in the first level of the predecoder 497 c_load_nand3_path = branch_effort_nand3_gate_output * 498 (gate_C(w_L2_n[0], 0, is_dram_) + 499 gate_C(w_L2_p[0], 0, is_dram_)); 500 w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; 501 w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 502 F = gnand3 * c_load_nand3_path / 503 (gate_C(w_L1_nand3_n[0], 0, is_dram_) + 504 gate_C(w_L1_nand3_p[0], 0, is_dram_)); 505 number_gates_L1_nand3_path = logical_effort( 506 min_number_gates_L1, 507 gnand3, 508 F, 509 w_L1_nand3_n, 510 w_L1_nand3_p, 511 c_load_nand3_path, 512 p_to_n_sz_ratio, 513 is_dram_, false, 514 g_tp.max_w_nmos_); | 429 // Now find the number of gates and widths in first level of predecoder 430 if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) { 431 // Whenever flag_two_unique_paths is true, it means first level of 432 // decoder employs 433 // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, 434 // it means 435 // a NAND2 gate is used in the first level of the predecoder 436 c_load_nand2_path = branch_effort_nand2_gate_output * 437 (gate_C(w_L2_n[0], 0, is_dram_) + 438 gate_C(w_L2_p[0], 0, is_dram_)); 439 w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; 440 w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 441 F = gnand2 * c_load_nand2_path / 442 (gate_C(w_L1_nand2_n[0], 0, is_dram_) + 443 gate_C(w_L1_nand2_p[0], 0, is_dram_)); 444 number_gates_L1_nand2_path = logical_effort( 445 min_number_gates_L1, 446 gnand2, 447 F, 448 w_L1_nand2_n, 449 w_L1_nand2_p, 450 c_load_nand2_path, 451 p_to_n_sz_ratio, 452 is_dram_, false, 453 g_tp.max_w_nmos_); 454 } 455 456 //Now find widths of gates along path in which first gate is a NAND3 457 if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs 458 // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means 459 // a NAND3 gate is used in the first level of the predecoder 460 c_load_nand3_path = branch_effort_nand3_gate_output * 461 (gate_C(w_L2_n[0], 0, is_dram_) + 462 gate_C(w_L2_p[0], 0, is_dram_)); 463 w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; 464 w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 465 F = gnand3 * c_load_nand3_path / 466 (gate_C(w_L1_nand3_n[0], 0, is_dram_) + 467 gate_C(w_L1_nand3_p[0], 0, is_dram_)); 468 number_gates_L1_nand3_path = logical_effort( 469 min_number_gates_L1, 470 gnand3, 471 F, 472 w_L1_nand3_n, 473 w_L1_nand3_p, 474 c_load_nand3_path, 475 p_to_n_sz_ratio, 476 is_dram_, false, 477 g_tp.max_w_nmos_); 478 } 479 } else { // find number of gates and widths in first level of predecoder block when there is no second level 480 if (number_inputs_L1_gate == 2) { 481 w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; 482 w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 483 F = gnand2 * C_ld_predec_blk_out / 484 (gate_C(w_L1_nand2_n[0], 0, is_dram_) + 485 gate_C(w_L1_nand2_p[0], 0, is_dram_)); 486 number_gates_L1_nand2_path = logical_effort( 487 min_number_gates_L1, 488 gnand2, 489 F, 490 w_L1_nand2_n, 491 w_L1_nand2_p, 492 C_ld_predec_blk_out, 493 p_to_n_sz_ratio, 494 is_dram_, false, 495 g_tp.max_w_nmos_); 496 } else if (number_inputs_L1_gate == 3) { 497 w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; 498 w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 499 F = gnand3 * C_ld_predec_blk_out / 500 (gate_C(w_L1_nand3_n[0], 0, is_dram_) + 501 gate_C(w_L1_nand3_p[0], 0, is_dram_)); 502 number_gates_L1_nand3_path = logical_effort( 503 min_number_gates_L1, 504 gnand3, 505 F, 506 w_L1_nand3_n, 507 w_L1_nand3_p, 508 C_ld_predec_blk_out, 509 p_to_n_sz_ratio, 510 is_dram_, false, 511 g_tp.max_w_nmos_); 512 } |
515 } | 513 } |
516 } 517 else 518 { // find number of gates and widths in first level of predecoder block when there is no second level 519 if (number_inputs_L1_gate == 2) 520 { 521 w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; 522 w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 523 F = gnand2*C_ld_predec_blk_out / 524 (gate_C(w_L1_nand2_n[0], 0, is_dram_) + 525 gate_C(w_L1_nand2_p[0], 0, is_dram_)); 526 number_gates_L1_nand2_path = logical_effort( 527 min_number_gates_L1, 528 gnand2, 529 F, 530 w_L1_nand2_n, 531 w_L1_nand2_p, 532 C_ld_predec_blk_out, 533 p_to_n_sz_ratio, 534 is_dram_, false, 535 g_tp.max_w_nmos_); 536 } 537 else if (number_inputs_L1_gate == 3) 538 { 539 w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; 540 w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 541 F = gnand3*C_ld_predec_blk_out / 542 (gate_C(w_L1_nand3_n[0], 0, is_dram_) + 543 gate_C(w_L1_nand3_p[0], 0, is_dram_)); 544 number_gates_L1_nand3_path = logical_effort( 545 min_number_gates_L1, 546 gnand3, 547 F, 548 w_L1_nand3_n, 549 w_L1_nand3_p, 550 C_ld_predec_blk_out, 551 p_to_n_sz_ratio, 552 is_dram_, false, 553 g_tp.max_w_nmos_); 554 } 555 } | |
556} 557 558 559 | 514} 515 516 517 |
560void PredecBlk::compute_area() 561{ 562 if (exist) 563 { // First check whether a predecoder block is needed 564 int num_L1_nand2 = 0; 565 int num_L1_nand3 = 0; 566 int num_L2 = 0; 567 double tot_area_L1_nand3 =0; 568 double leak_L1_nand3 =0; 569 double gate_leak_L1_nand3 =0; | 518void PredecBlk::compute_area() { 519 if (exist) { // First check whether a predecoder block is needed 520 int num_L1_nand2 = 0; 521 int num_L1_nand3 = 0; 522 int num_L2 = 0; 523 double tot_area_L1_nand3 = 0; 524 double leak_L1_nand3 = 0; 525 double gate_leak_L1_nand3 = 0; |
570 | 526 |
571 double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def); 572 double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); 573 double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); 574 if (number_inputs_L1_gate != 3) { 575 tot_area_L1_nand3 = 0; 576 leak_L1_nand3 = 0; 577 gate_leak_L1_nand3 =0; 578 } 579 else { 580 tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def); 581 leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); 582 gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); 583 } | 527 double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def); 528 double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); 529 double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); 530 if (number_inputs_L1_gate != 3) { 531 tot_area_L1_nand3 = 0; 532 leak_L1_nand3 = 0; 533 gate_leak_L1_nand3 = 0; 534 } else { 535 tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def); 536 leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); 537 gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); 538 } |
584 | 539 |
585 switch (number_input_addr_bits) 586 { 587 case 1: //2 NAND2 gates 588 num_L1_nand2 = 2; 589 num_L2 = 0; 590 num_L1_active_nand2_path =1; 591 num_L1_active_nand3_path =0; 592 break; 593 case 2: //4 NAND2 gates 594 num_L1_nand2 = 4; 595 num_L2 = 0; 596 num_L1_active_nand2_path =1; 597 num_L1_active_nand3_path =0; 598 break; 599 case 3: //8 NAND3 gates 600 num_L1_nand3 = 8; 601 num_L2 = 0; 602 num_L1_active_nand2_path =0; 603 num_L1_active_nand3_path =1; 604 break; 605 case 4: //4 + 4 NAND2 gates 606 num_L1_nand2 = 8; 607 num_L2 = 16; 608 num_L1_active_nand2_path =2; 609 num_L1_active_nand3_path =0; 610 break; 611 case 5: //4 NAND2 gates, 8 NAND3 gates 612 num_L1_nand2 = 4; 613 num_L1_nand3 = 8; 614 num_L2 = 32; 615 num_L1_active_nand2_path =1; 616 num_L1_active_nand3_path =1; 617 break; 618 case 6: //8 + 8 NAND3 gates 619 num_L1_nand3 = 16; 620 num_L2 = 64; 621 num_L1_active_nand2_path =0; 622 num_L1_active_nand3_path =2; 623 break; 624 case 7: //4 + 4 NAND2 gates, 8 NAND3 gates 625 num_L1_nand2 = 8; 626 num_L1_nand3 = 8; 627 num_L2 = 128; 628 num_L1_active_nand2_path =2; 629 num_L1_active_nand3_path =1; 630 break; 631 case 8: //4 NAND2 gates, 8 + 8 NAND3 gates 632 num_L1_nand2 = 4; 633 num_L1_nand3 = 16; 634 num_L2 = 256; 635 num_L1_active_nand2_path =2; 636 num_L1_active_nand3_path =2; 637 break; 638 case 9: //8 + 8 + 8 NAND3 gates 639 num_L1_nand3 = 24; 640 num_L2 = 512; 641 num_L1_active_nand2_path =0; 642 num_L1_active_nand3_path =3; 643 break; 644 default: 645 break; 646 } | 540 switch (number_input_addr_bits) { 541 case 1: //2 NAND2 gates 542 num_L1_nand2 = 2; 543 num_L2 = 0; 544 num_L1_active_nand2_path = 1; 545 num_L1_active_nand3_path = 0; 546 break; 547 case 2: //4 NAND2 gates 548 num_L1_nand2 = 4; 549 num_L2 = 0; 550 num_L1_active_nand2_path = 1; 551 num_L1_active_nand3_path = 0; 552 break; 553 case 3: //8 NAND3 gates 554 num_L1_nand3 = 8; 555 num_L2 = 0; 556 num_L1_active_nand2_path = 0; 557 num_L1_active_nand3_path = 1; 558 break; 559 case 4: //4 + 4 NAND2 gates 560 num_L1_nand2 = 8; 561 num_L2 = 16; 562 num_L1_active_nand2_path = 2; 563 num_L1_active_nand3_path = 0; 564 break; 565 case 5: //4 NAND2 gates, 8 NAND3 gates 566 num_L1_nand2 = 4; 567 num_L1_nand3 = 8; 568 num_L2 = 32; 569 num_L1_active_nand2_path = 1; 570 num_L1_active_nand3_path = 1; 571 break; 572 case 6: //8 + 8 NAND3 gates 573 num_L1_nand3 = 16; 574 num_L2 = 64; 575 num_L1_active_nand2_path = 0; 576 num_L1_active_nand3_path = 2; 577 break; 578 case 7: //4 + 4 NAND2 gates, 8 NAND3 gates 579 num_L1_nand2 = 8; 580 num_L1_nand3 = 8; 581 num_L2 = 128; 582 num_L1_active_nand2_path = 2; 583 num_L1_active_nand3_path = 1; 584 break; 585 case 8: //4 NAND2 gates, 8 + 8 NAND3 gates 586 num_L1_nand2 = 4; 587 num_L1_nand3 = 16; 588 num_L2 = 256; 589 num_L1_active_nand2_path = 2; 590 num_L1_active_nand3_path = 2; 591 break; 592 case 9: //8 + 8 + 8 NAND3 gates 593 num_L1_nand3 = 24; 594 num_L2 = 512; 595 num_L1_active_nand2_path = 0; 596 num_L1_active_nand3_path = 3; 597 break; 598 default: 599 break; 600 } |
647 | 601 |
648 for (int i = 1; i < number_gates_L1_nand2_path; ++i) 649 { 650 tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def); 651 leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); 652 gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); 653 } 654 tot_area_L1_nand2 *= num_L1_nand2; 655 leak_L1_nand2 *= num_L1_nand2; 656 gate_leak_L1_nand2 *= num_L1_nand2; | 602 for (int i = 1; i < number_gates_L1_nand2_path; ++i) { 603 tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def); 604 leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); 605 gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); 606 } 607 tot_area_L1_nand2 *= num_L1_nand2; 608 leak_L1_nand2 *= num_L1_nand2; 609 gate_leak_L1_nand2 *= num_L1_nand2; |
657 | 610 |
658 for (int i = 1; i < number_gates_L1_nand3_path; ++i) 659 { 660 tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def); 661 leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); 662 gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); 663 } 664 tot_area_L1_nand3 *= num_L1_nand3; 665 leak_L1_nand3 *= num_L1_nand3; 666 gate_leak_L1_nand3 *= num_L1_nand3; | 611 for (int i = 1; i < number_gates_L1_nand3_path; ++i) { 612 tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def); 613 leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); 614 gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); 615 } 616 tot_area_L1_nand3 *= num_L1_nand3; 617 leak_L1_nand3 *= num_L1_nand3; 618 gate_leak_L1_nand3 *= num_L1_nand3; |
667 | 619 |
668 double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3; 669 double cumulative_area_L2 = 0.0; 670 double leakage_L2 = 0.0; 671 double gate_leakage_L2 = 0.0; | 620 double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3; 621 double cumulative_area_L2 = 0.0; 622 double leakage_L2 = 0.0; 623 double gate_leakage_L2 = 0.0; |
672 | 624 |
673 if (flag_L2_gate == 2) 674 { 675 cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); 676 leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); 677 gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); 678 } 679 else if (flag_L2_gate == 3) 680 { 681 cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); 682 leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); 683 gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); 684 } | 625 if (flag_L2_gate == 2) { 626 cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); 627 leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); 628 gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); 629 } else if (flag_L2_gate == 3) { 630 cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); 631 leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); 632 gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); 633 } |
685 | 634 |
686 for (int i = 1; i < number_gates_L2; ++i) 687 { 688 cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def); 689 leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); 690 gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); 691 } 692 cumulative_area_L2 *= num_L2; 693 leakage_L2 *= num_L2; 694 gate_leakage_L2 *= num_L2; | 635 for (int i = 1; i < number_gates_L2; ++i) { 636 cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def); 637 leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); 638 gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); 639 } 640 cumulative_area_L2 *= num_L2; 641 leakage_L2 *= num_L2; 642 gate_leakage_L2 *= num_L2; |
695 | 643 |
696 power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd; 697 power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd; 698 power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd; 699 area.set_area(cumulative_area_L1 + cumulative_area_L2); 700 power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd; 701 power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd; 702 power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; 703 } | 644 power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd; 645 power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd; 646 power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd; 647 area.set_area(cumulative_area_L1 + cumulative_area_L2); 648 power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd; 649 power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd; 650 power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; 651 } |
704} 705 706 707 708pair<double, double> PredecBlk::compute_delays( | 652} 653 654 655 656pair<double, double> PredecBlk::compute_delays( |
709 pair 710{ 711 pair<double, double> ret_val; 712 ret_val.first = 0; // outrisetime_nand2_path 713 ret_val.second = 0; // outrisetime_nand3_path | 657 pair<double, double> inrisetime) { // <nand2, nand3> 658 pair<double, double> ret_val; 659 ret_val.first = 0; // outrisetime_nand2_path 660 ret_val.second = 0; // outrisetime_nand3_path |
714 | 661 |
715 double inrisetime_nand2_path = inrisetime.first; 716 double inrisetime_nand3_path = inrisetime.second; 717 int i; 718 double rd, c_load, c_intrinsic, tf, this_delay; 719 double Vdd = g_tp.peri_global.Vdd; | 662 double inrisetime_nand2_path = inrisetime.first; 663 double inrisetime_nand3_path = inrisetime.second; 664 int i; 665 double rd, c_load, c_intrinsic, tf, this_delay; 666 double Vdd = g_tp.peri_global.Vdd; |
720 | 667 |
721 // TODO: following delay calculation part can be greatly simplified. 722 // first check whether a predecoder block is required 723 if (exist) 724 { 725 //Find delay in first level of predecoder block 726 //First find delay in path 727 if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) 728 { 729 //First gate is a NAND2 gate 730 rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_); 731 c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_); 732 c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 733 drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); 734 tf = rd * (c_intrinsic + c_load); 735 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 736 delay_nand2_path += this_delay; 737 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 738 power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; | 668 // TODO: following delay calculation part can be greatly simplified. 669 // first check whether a predecoder block is required 670 if (exist) { 671 //Find delay in first level of predecoder block 672 //First find delay in path 673 if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) { 674 //First gate is a NAND2 gate 675 rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_); 676 c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_); 677 c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 678 drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); 679 tf = rd * (c_intrinsic + c_load); 680 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 681 delay_nand2_path += this_delay; 682 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 683 power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; |
739 | 684 |
740 //Add delays of all but the last inverter in the chain 741 for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) 742 { 743 rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); 744 c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_); 745 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 746 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 747 tf = rd * (c_intrinsic + c_load); 748 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 749 delay_nand2_path += this_delay; 750 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 751 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 752 } | 685 //Add delays of all but the last inverter in the chain 686 for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) { 687 rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); 688 c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_); 689 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 690 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 691 tf = rd * (c_intrinsic + c_load); 692 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 693 delay_nand2_path += this_delay; 694 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 695 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 696 } |
753 | 697 |
754 //Add delay of the last inverter 755 i = number_gates_L1_nand2_path - 1; 756 rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); 757 if (flag_L2_gate) 758 { 759 c_load = branch_effort_nand2_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); 760 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 761 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 762 tf = rd * (c_intrinsic + c_load); 763 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 764 delay_nand2_path += this_delay; 765 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 766 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 767 } 768 else 769 { //First level directly drives decoder output load 770 c_load = C_ld_predec_blk_out; 771 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 772 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 773 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; 774 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 775 delay_nand2_path += this_delay; 776 ret_val.first = this_delay / (1.0 - 0.5); 777 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 778 } 779 } | 698 //Add delay of the last inverter 699 i = number_gates_L1_nand2_path - 1; 700 rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); 701 if (flag_L2_gate) { 702 c_load = branch_effort_nand2_gate_output * 703 (gate_C(w_L2_n[0], 0, is_dram_) + 704 gate_C(w_L2_p[0], 0, is_dram_)); 705 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 706 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 707 tf = rd * (c_intrinsic + c_load); 708 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 709 delay_nand2_path += this_delay; 710 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 711 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 712 } else { //First level directly drives decoder output load 713 c_load = C_ld_predec_blk_out; 714 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 715 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 716 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; 717 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 718 delay_nand2_path += this_delay; 719 ret_val.first = this_delay / (1.0 - 0.5); 720 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 721 } 722 } |
780 | 723 |
781 if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) 782 { //Check if the number of gates in the first level is more than 1. 783 //First gate is a NAND3 gate 784 rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_); 785 c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_); 786 c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 787 drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); 788 tf = rd * (c_intrinsic + c_load); 789 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 790 delay_nand3_path += this_delay; 791 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 792 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; | 724 if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { 725 //Check if the number of gates in the first level is more than 1. 726 //First gate is a NAND3 gate 727 rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_); 728 c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_); 729 c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 730 drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); 731 tf = rd * (c_intrinsic + c_load); 732 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 733 delay_nand3_path += this_delay; 734 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 735 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; |
793 | 736 |
794 //Add delays of all but the last inverter in the chain 795 for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) 796 { 797 rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); 798 c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_); 799 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 800 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 801 tf = rd * (c_intrinsic + c_load); 802 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 803 delay_nand3_path += this_delay; 804 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 805 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 806 } | 737 //Add delays of all but the last inverter in the chain 738 for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) { 739 rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); 740 c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_); 741 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 742 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 743 tf = rd * (c_intrinsic + c_load); 744 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 745 delay_nand3_path += this_delay; 746 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 747 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 748 } |
807 | 749 |
808 //Add delay of the last inverter 809 i = number_gates_L1_nand3_path - 1; 810 rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); 811 if (flag_L2_gate) 812 { 813 c_load = branch_effort_nand3_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); 814 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 815 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 816 tf = rd * (c_intrinsic + c_load); 817 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 818 delay_nand3_path += this_delay; 819 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 820 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 821 } 822 else 823 { //First level directly drives decoder output load 824 c_load = C_ld_predec_blk_out; 825 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 826 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 827 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; 828 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 829 delay_nand3_path += this_delay; 830 ret_val.second = this_delay / (1.0 - 0.5); 831 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 832 } 833 } | 750 //Add delay of the last inverter 751 i = number_gates_L1_nand3_path - 1; 752 rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); 753 if (flag_L2_gate) { 754 c_load = branch_effort_nand3_gate_output * 755 (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, 756 is_dram_)); 757 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 758 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 759 tf = rd * (c_intrinsic + c_load); 760 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 761 delay_nand3_path += this_delay; 762 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 763 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 764 } else { //First level directly drives decoder output load 765 c_load = C_ld_predec_blk_out; 766 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 767 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 768 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; 769 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 770 delay_nand3_path += this_delay; 771 ret_val.second = this_delay / (1.0 - 0.5); 772 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 773 } 774 } |
834 | 775 |
835 // Find delay through second level 836 if (flag_L2_gate) 837 { 838 if (flag_L2_gate == 2) 839 { 840 rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_); 841 c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); 842 c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 843 drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); 844 tf = rd * (c_intrinsic + c_load); 845 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 846 delay_nand2_path += this_delay; 847 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 848 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 849 } 850 else 851 { // flag_L2_gate = 3 852 rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_); 853 c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); 854 c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 855 drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); 856 tf = rd * (c_intrinsic + c_load); 857 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 858 delay_nand3_path += this_delay; 859 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 860 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 861 } | 776 // Find delay through second level 777 if (flag_L2_gate) { 778 if (flag_L2_gate == 2) { 779 rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_); 780 c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); 781 c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 782 drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); 783 tf = rd * (c_intrinsic + c_load); 784 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 785 delay_nand2_path += this_delay; 786 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 787 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 788 } else { // flag_L2_gate = 3 789 rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_); 790 c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); 791 c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 792 drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); 793 tf = rd * (c_intrinsic + c_load); 794 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 795 delay_nand3_path += this_delay; 796 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 797 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 798 } |
862 | 799 |
863 for (i = 1; i < number_gates_L2 - 1; ++i) 864 { 865 rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); 866 c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_); 867 c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 868 drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 869 tf = rd * (c_intrinsic + c_load); 870 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 871 delay_nand2_path += this_delay; 872 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 873 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 874 delay_nand3_path += this_delay; 875 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 876 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 877 } | 800 for (i = 1; i < number_gates_L2 - 1; ++i) { 801 rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); 802 c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_); 803 c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 804 drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 805 tf = rd * (c_intrinsic + c_load); 806 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 807 delay_nand2_path += this_delay; 808 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 809 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 810 delay_nand3_path += this_delay; 811 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 812 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 813 } |
878 | 814 |
879 //Add delay of final inverter that drives the wordline decoders 880 i = number_gates_L2 - 1; 881 c_load = C_ld_predec_blk_out; 882 rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); 883 c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 884 drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 885 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; 886 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 887 delay_nand2_path += this_delay; 888 ret_val.first = this_delay / (1.0 - 0.5); 889 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 890 delay_nand3_path += this_delay; 891 ret_val.second = this_delay / (1.0 - 0.5); 892 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; | 815 //Add delay of final inverter that drives the wordline decoders 816 i = number_gates_L2 - 1; 817 c_load = C_ld_predec_blk_out; 818 rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); 819 c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 820 drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 821 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; 822 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 823 delay_nand2_path += this_delay; 824 ret_val.first = this_delay / (1.0 - 0.5); 825 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 826 delay_nand3_path += this_delay; 827 ret_val.second = this_delay / (1.0 - 0.5); 828 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 829 } |
893 } | 830 } |
894 } | |
895 | 831 |
896 delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second; 897 return ret_val; | 832 delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second; 833 return ret_val; |
898} 899 900void PredecBlk::leakage_feedback(double temperature) 901{ 902 if (exist) 903 { // First check whether a predecoder block is needed 904 int num_L1_nand2 = 0; 905 int num_L1_nand3 = 0; --- 122 unchanged lines hidden (view full) --- 1028 power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; 1029 } 1030} 1031 1032PredecBlkDrv::PredecBlkDrv( 1033 int way_select_, 1034 PredecBlk * blk_, 1035 bool is_dram) | 834} 835 836void PredecBlk::leakage_feedback(double temperature) 837{ 838 if (exist) 839 { // First check whether a predecoder block is needed 840 int num_L1_nand2 = 0; 841 int num_L1_nand3 = 0; --- 122 unchanged lines hidden (view full) --- 964 power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; 965 } 966} 967 968PredecBlkDrv::PredecBlkDrv( 969 int way_select_, 970 PredecBlk * blk_, 971 bool is_dram) |
1036 :flag_driver_exists(0), 1037 number_gates_nand2_path(0), 1038 number_gates_nand3_path(0), 1039 min_number_gates(2), 1040 num_buffers_driving_1_nand2_load(0), 1041 num_buffers_driving_2_nand2_load(0), 1042 num_buffers_driving_4_nand2_load(0), 1043 num_buffers_driving_2_nand3_load(0), 1044 num_buffers_driving_8_nand3_load(0), 1045 num_buffers_nand3_path(0), 1046 c_load_nand2_path_out(0), 1047 c_load_nand3_path_out(0), 1048 r_load_nand2_path_out(0), 1049 r_load_nand3_path_out(0), 1050 delay_nand2_path(0), 1051 delay_nand3_path(0), 1052 power_nand2_path(), 1053 power_nand3_path(), 1054 blk(blk_), dec(blk->dec), 1055 is_dram_(is_dram), 1056 way_select(way_select_) 1057{ 1058 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) 1059 { 1060 width_nand2_path_n[i] = 0; 1061 width_nand2_path_p[i] = 0; 1062 width_nand3_path_n[i] = 0; 1063 width_nand3_path_p[i] = 0; 1064 } | 972 : flag_driver_exists(0), 973 number_gates_nand2_path(0), 974 number_gates_nand3_path(0), 975 min_number_gates(2), 976 num_buffers_driving_1_nand2_load(0), 977 num_buffers_driving_2_nand2_load(0), 978 num_buffers_driving_4_nand2_load(0), 979 num_buffers_driving_2_nand3_load(0), 980 num_buffers_driving_8_nand3_load(0), 981 num_buffers_nand3_path(0), 982 c_load_nand2_path_out(0), 983 c_load_nand3_path_out(0), 984 r_load_nand2_path_out(0), 985 r_load_nand3_path_out(0), 986 delay_nand2_path(0), 987 delay_nand3_path(0), 988 power_nand2_path(), 989 power_nand3_path(), 990 blk(blk_), dec(blk->dec), 991 is_dram_(is_dram), 992 way_select(way_select_) { 993 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { 994 width_nand2_path_n[i] = 0; 995 width_nand2_path_p[i] = 0; 996 width_nand3_path_n[i] = 0; 997 width_nand3_path_p[i] = 0; 998 } |
1065 | 999 |
1066 number_input_addr_bits = blk->number_input_addr_bits; | 1000 number_input_addr_bits = blk->number_input_addr_bits; |
1067 | 1001 |
1068 if (way_select > 1) 1069 { 1070 flag_driver_exists = 1; 1071 number_input_addr_bits = way_select; 1072 if (dec->num_in_signals == 2) 1073 { 1074 c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); 1075 num_buffers_driving_2_nand2_load = number_input_addr_bits; | 1002 if (way_select > 1) { 1003 flag_driver_exists = 1; 1004 number_input_addr_bits = way_select; 1005 if (dec->num_in_signals == 2) { 1006 c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); 1007 num_buffers_driving_2_nand2_load = number_input_addr_bits; 1008 } else if (dec->num_in_signals == 3) { 1009 c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); 1010 num_buffers_driving_2_nand3_load = number_input_addr_bits; 1011 } 1012 } else if (way_select == 0) { 1013 if (blk->exist) { 1014 flag_driver_exists = 1; 1015 } |
1076 } | 1016 } |
1077 else if (dec->num_in_signals == 3) 1078 { 1079 c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); 1080 num_buffers_driving_2_nand3_load = number_input_addr_bits; 1081 } 1082 } 1083 else if (way_select == 0) 1084 { 1085 if (blk->exist) 1086 { 1087 flag_driver_exists = 1; 1088 } 1089 } | |
1090 | 1017 |
1091 compute_widths(); 1092 compute_area(); | 1018 compute_widths(); 1019 compute_area(); |
1093} 1094 1095 1096 | 1020} 1021 1022 1023 |
1097void PredecBlkDrv::compute_widths() 1098{ 1099 // The predecode block driver accepts as input the address bits from the h-tree network. For 1100 // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of 1101 // inversion to generate addrbar and simply treat addrbar as addr. | 1024void PredecBlkDrv::compute_widths() { 1025 // The predecode block driver accepts as input the address bits from the h-tree network. For 1026 // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of 1027 // inversion to generate addrbar and simply treat addrbar as addr. |
1102 | 1028 |
1103 double F; 1104 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); | 1029 double F; 1030 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); |
1105 | 1031 |
1106 if (flag_driver_exists) 1107 { 1108 double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_); 1109 double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_); | 1032 if (flag_driver_exists) { 1033 double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_); 1034 double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_); |
1110 | 1035 |
1111 if (way_select == 0) 1112 { 1113 if (blk->number_input_addr_bits == 1) 1114 { //2 NAND2 gates 1115 num_buffers_driving_2_nand2_load = 1; 1116 c_load_nand2_path_out = 2 * C_nand2_gate_blk; 1117 } 1118 else if (blk->number_input_addr_bits == 2) 1119 { //4 NAND2 gates one 2-4 decoder 1120 num_buffers_driving_4_nand2_load = 2; 1121 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1122 } 1123 else if (blk->number_input_addr_bits == 3) 1124 { //8 NAND3 gates one 3-8 decoder 1125 num_buffers_driving_8_nand3_load = 3; 1126 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1127 } 1128 else if (blk->number_input_addr_bits == 4) 1129 { //4 + 4 NAND2 gates two 2-4 decoder 1130 num_buffers_driving_4_nand2_load = 4; 1131 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1132 } 1133 else if (blk->number_input_addr_bits == 5) 1134 { //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 decoder 1135 num_buffers_driving_4_nand2_load = 2; 1136 num_buffers_driving_8_nand3_load = 3; 1137 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1138 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1139 } 1140 else if (blk->number_input_addr_bits == 6) 1141 { //8 + 8 NAND3 gates two 3-8 decoder 1142 num_buffers_driving_8_nand3_load = 6; 1143 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1144 } 1145 else if (blk->number_input_addr_bits == 7) 1146 { //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 decoder 1147 num_buffers_driving_4_nand2_load = 4; 1148 num_buffers_driving_8_nand3_load = 3; 1149 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1150 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1151 } 1152 else if (blk->number_input_addr_bits == 8) 1153 { //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 decoder 1154 num_buffers_driving_4_nand2_load = 2; 1155 num_buffers_driving_8_nand3_load = 6; 1156 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1157 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1158 } 1159 else if (blk->number_input_addr_bits == 9) 1160 { //8 + 8 + 8 NAND3 gates three 3-8 decoder 1161 num_buffers_driving_8_nand3_load = 9; 1162 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1163 } 1164 } | 1036 if (way_select == 0) { 1037 if (blk->number_input_addr_bits == 1) { 1038 //2 NAND2 gates 1039 num_buffers_driving_2_nand2_load = 1; 1040 c_load_nand2_path_out = 2 * C_nand2_gate_blk; 1041 } else if (blk->number_input_addr_bits == 2) { 1042 //4 NAND2 gates one 2-4 decoder 1043 num_buffers_driving_4_nand2_load = 2; 1044 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1045 } else if (blk->number_input_addr_bits == 3) { 1046 //8 NAND3 gates one 3-8 decoder 1047 num_buffers_driving_8_nand3_load = 3; 1048 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1049 } else if (blk->number_input_addr_bits == 4) { 1050 //4 + 4 NAND2 gates two 2-4 decoder 1051 num_buffers_driving_4_nand2_load = 4; 1052 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1053 } else if (blk->number_input_addr_bits == 5) { 1054 //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 1055 //decoder 1056 num_buffers_driving_4_nand2_load = 2; 1057 num_buffers_driving_8_nand3_load = 3; 1058 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1059 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1060 } else if (blk->number_input_addr_bits == 6) { 1061 //8 + 8 NAND3 gates two 3-8 decoder 1062 num_buffers_driving_8_nand3_load = 6; 1063 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1064 } else if (blk->number_input_addr_bits == 7) { 1065 //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 1066 //decoder 1067 num_buffers_driving_4_nand2_load = 4; 1068 num_buffers_driving_8_nand3_load = 3; 1069 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1070 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1071 } else if (blk->number_input_addr_bits == 8) { 1072 //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 1073 //decoder 1074 num_buffers_driving_4_nand2_load = 2; 1075 num_buffers_driving_8_nand3_load = 6; 1076 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1077 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1078 } else if (blk->number_input_addr_bits == 9) { 1079 //8 + 8 + 8 NAND3 gates three 3-8 decoder 1080 num_buffers_driving_8_nand3_load = 9; 1081 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1082 } 1083 } |
1165 | 1084 |
1166 if ((blk->flag_two_unique_paths) || 1167 (blk->number_inputs_L1_gate == 2) || 1168 (number_input_addr_bits == 0) || 1169 ((way_select)&&(dec->num_in_signals == 2))) 1170 { //this means that way_select is driving NAND2 in decoder. 1171 width_nand2_path_n[0] = g_tp.min_w_nmos_; 1172 width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0]; 1173 F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_); 1174 number_gates_nand2_path = logical_effort( 1175 min_number_gates, 1176 1, 1177 F, 1178 width_nand2_path_n, 1179 width_nand2_path_p, 1180 c_load_nand2_path_out, 1181 p_to_n_sz_ratio, 1182 is_dram_, false, g_tp.max_w_nmos_); 1183 } | 1085 if ((blk->flag_two_unique_paths) || 1086 (blk->number_inputs_L1_gate == 2) || 1087 (number_input_addr_bits == 0) || 1088 ((way_select) && (dec->num_in_signals == 2))) { 1089 //this means that way_select is driving NAND2 in decoder. 1090 width_nand2_path_n[0] = g_tp.min_w_nmos_; 1091 width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0]; 1092 F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_); 1093 number_gates_nand2_path = logical_effort( 1094 min_number_gates, 1095 1, 1096 F, 1097 width_nand2_path_n, 1098 width_nand2_path_p, 1099 c_load_nand2_path_out, 1100 p_to_n_sz_ratio, 1101 is_dram_, false, g_tp.max_w_nmos_); 1102 } |
1184 | 1103 |
1185 if ((blk->flag_two_unique_paths) || 1186 (blk->number_inputs_L1_gate == 3) || 1187 ((way_select)&&(dec->num_in_signals == 3))) 1188 { //this means that way_select is driving NAND3 in decoder. 1189 width_nand3_path_n[0] = g_tp.min_w_nmos_; 1190 width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0]; 1191 F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_); 1192 number_gates_nand3_path = logical_effort( 1193 min_number_gates, 1194 1, 1195 F, 1196 width_nand3_path_n, 1197 width_nand3_path_p, 1198 c_load_nand3_path_out, 1199 p_to_n_sz_ratio, 1200 is_dram_, false, g_tp.max_w_nmos_); | 1104 if ((blk->flag_two_unique_paths) || 1105 (blk->number_inputs_L1_gate == 3) || 1106 ((way_select) && (dec->num_in_signals == 3))) { 1107 //this means that way_select is driving NAND3 in decoder. 1108 width_nand3_path_n[0] = g_tp.min_w_nmos_; 1109 width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0]; 1110 F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_); 1111 number_gates_nand3_path = logical_effort( 1112 min_number_gates, 1113 1, 1114 F, 1115 width_nand3_path_n, 1116 width_nand3_path_p, 1117 c_load_nand3_path_out, 1118 p_to_n_sz_ratio, 1119 is_dram_, false, g_tp.max_w_nmos_); 1120 } |
1201 } | 1121 } |
1202 } | |
1203} 1204 1205 1206 | 1122} 1123 1124 1125 |
1207void PredecBlkDrv::compute_area() 1208{ 1209 double area_nand2_path = 0; 1210 double area_nand3_path = 0; 1211 double leak_nand2_path = 0; 1212 double leak_nand3_path = 0; 1213 double gate_leak_nand2_path = 0; 1214 double gate_leak_nand3_path = 0; | 1126void PredecBlkDrv::compute_area() { 1127 double area_nand2_path = 0; 1128 double area_nand3_path = 0; 1129 double leak_nand2_path = 0; 1130 double leak_nand3_path = 0; 1131 double gate_leak_nand2_path = 0; 1132 double gate_leak_nand3_path = 0; |
1215 | 1133 |
1216 if (flag_driver_exists) 1217 { // first check whether a predecoder block driver is needed 1218 for (int i = 0; i < number_gates_nand2_path; ++i) 1219 { 1220 area_nand2_path += compute_gate_area(INV, 1, width_nand2_path_p[i], width_nand2_path_n[i], g_tp.cell_h_def); 1221 leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); 1222 gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); 1223 } 1224 area_nand2_path *= (num_buffers_driving_1_nand2_load + 1225 num_buffers_driving_2_nand2_load + 1226 num_buffers_driving_4_nand2_load); 1227 leak_nand2_path *= (num_buffers_driving_1_nand2_load + 1228 num_buffers_driving_2_nand2_load + 1229 num_buffers_driving_4_nand2_load); 1230 gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load + | 1134 if (flag_driver_exists) { 1135 // first check whether a predecoder block driver is needed 1136 for (int i = 0; i < number_gates_nand2_path; ++i) { 1137 area_nand2_path += 1138 compute_gate_area(INV, 1, width_nand2_path_p[i], 1139 width_nand2_path_n[i], g_tp.cell_h_def); 1140 leak_nand2_path += 1141 cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1142 1, inv, is_dram_); 1143 gate_leak_nand2_path += 1144 cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1145 1, inv, is_dram_); 1146 } 1147 area_nand2_path *= (num_buffers_driving_1_nand2_load + |
1231 num_buffers_driving_2_nand2_load + 1232 num_buffers_driving_4_nand2_load); | 1148 num_buffers_driving_2_nand2_load + 1149 num_buffers_driving_4_nand2_load); |
1150 leak_nand2_path *= (num_buffers_driving_1_nand2_load + 1151 num_buffers_driving_2_nand2_load + 1152 num_buffers_driving_4_nand2_load); 1153 gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load + 1154 num_buffers_driving_2_nand2_load + 1155 num_buffers_driving_4_nand2_load); |
|
1233 | 1156 |
1234 for (int i = 0; i < number_gates_nand3_path; ++i) 1235 { 1236 area_nand3_path += compute_gate_area(INV, 1, width_nand3_path_p[i], width_nand3_path_n[i], g_tp.cell_h_def); 1237 leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); 1238 gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); 1239 } 1240 area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); 1241 leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); 1242 gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); | 1157 for (int i = 0; i < number_gates_nand3_path; ++i) { 1158 area_nand3_path += 1159 compute_gate_area(INV, 1, width_nand3_path_p[i], 1160 width_nand3_path_n[i], g_tp.cell_h_def); 1161 leak_nand3_path += 1162 cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1163 1, inv, is_dram_); 1164 gate_leak_nand3_path += 1165 cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1166 1, inv, is_dram_); 1167 } 1168 area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); 1169 leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); 1170 gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); |
1243 | 1171 |
1244 power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; 1245 power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; 1246 power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; 1247 power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; 1248 area.set_area(area_nand2_path + area_nand3_path); 1249 } | 1172 power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; 1173 power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; 1174 power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; 1175 power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; 1176 area.set_area(area_nand2_path + area_nand3_path); 1177 } |
1250} 1251 1252 1253 1254pair<double, double> PredecBlkDrv::compute_delays( 1255 double inrisetime_nand2_path, | 1178} 1179 1180 1181 1182pair<double, double> PredecBlkDrv::compute_delays( 1183 double inrisetime_nand2_path, |
1256 double inrisetime_nand3_path) 1257{ 1258 pair<double, double> ret_val; 1259 ret_val.first = 0; // outrisetime_nand2_path 1260 ret_val.second = 0; // outrisetime_nand3_path 1261 int i; 1262 double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay; 1263 double Vdd = g_tp.peri_global.Vdd; | 1184 double inrisetime_nand3_path) { 1185 pair<double, double> ret_val; 1186 ret_val.first = 0; // outrisetime_nand2_path 1187 ret_val.second = 0; // outrisetime_nand3_path 1188 int i; 1189 double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay; 1190 double Vdd = g_tp.peri_global.Vdd; |
1264 | 1191 |
1265 if (flag_driver_exists) 1266 { 1267 for (i = 0; i < number_gates_nand2_path - 1; ++i) 1268 { 1269 rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); 1270 c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_); 1271 c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1272 drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1273 tf = rd * (c_intrinsic + c_gate_load); 1274 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 1275 delay_nand2_path += this_delay; 1276 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 1277 power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd; 1278 } | 1192 if (flag_driver_exists) { 1193 for (i = 0; i < number_gates_nand2_path - 1; ++i) { 1194 rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); 1195 c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_); 1196 c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1197 drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1198 tf = rd * (c_intrinsic + c_gate_load); 1199 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 1200 delay_nand2_path += this_delay; 1201 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 1202 power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd; 1203 } |
1279 | 1204 |
1280 // Final inverter drives the predecoder block or the decoder output load 1281 if (number_gates_nand2_path != 0) 1282 { 1283 i = number_gates_nand2_path - 1; 1284 rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); 1285 c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1286 drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1287 c_load = c_load_nand2_path_out; 1288 tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out*c_load/ 2; 1289 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 1290 delay_nand2_path += this_delay; 1291 ret_val.first = this_delay / (1.0 - 0.5); 1292 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd; | 1205 // Final inverter drives the predecoder block or the decoder output load 1206 if (number_gates_nand2_path != 0) { 1207 i = number_gates_nand2_path - 1; 1208 rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); 1209 c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1210 drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1211 c_load = c_load_nand2_path_out; 1212 tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out * c_load / 2; 1213 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 1214 delay_nand2_path += this_delay; 1215 ret_val.first = this_delay / (1.0 - 0.5); 1216 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd; |
1293// cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <<endl; | 1217// cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <<endl; |
1294 } | 1218 } |
1295 | 1219 |
1296 for (i = 0; i < number_gates_nand3_path - 1; ++i) 1297 { 1298 rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_); 1299 c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_); 1300 c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1301 drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1302 tf = rd * (c_intrinsic + c_gate_load); 1303 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 1304 delay_nand3_path += this_delay; 1305 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 1306 power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd; 1307 } | 1220 for (i = 0; i < number_gates_nand3_path - 1; ++i) { 1221 rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_); 1222 c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_); 1223 c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1224 drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1225 tf = rd * (c_intrinsic + c_gate_load); 1226 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 1227 delay_nand3_path += this_delay; 1228 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 1229 power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd; 1230 } |
1308 | 1231 |
1309 // Final inverter drives the predecoder block or the decoder output load 1310 if (number_gates_nand3_path != 0) 1311 { 1312 i = number_gates_nand3_path - 1; 1313 rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_); 1314 c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1315 drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1316 c_load = c_load_nand3_path_out; 1317 tf = rd*(c_intrinsic + c_load) + r_load_nand3_path_out*c_load / 2; 1318 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 1319 delay_nand3_path += this_delay; 1320 ret_val.second = this_delay / (1.0 - 0.5); 1321 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd; | 1232 // Final inverter drives the predecoder block or the decoder output load 1233 if (number_gates_nand3_path != 0) { 1234 i = number_gates_nand3_path - 1; 1235 rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_); 1236 c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1237 drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1238 c_load = c_load_nand3_path_out; 1239 tf = rd * (c_intrinsic + c_load) + r_load_nand3_path_out * c_load / 2; 1240 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 1241 delay_nand3_path += this_delay; 1242 ret_val.second = this_delay / (1.0 - 0.5); 1243 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd; 1244 } |
1322 } | 1245 } |
1323 } 1324 return ret_val; | 1246 return ret_val; |
1325} 1326 1327 | 1247} 1248 1249 |
1328double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir) 1329{ 1330 return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic + 1331 num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir; | 1250double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir) { 1251 return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic + 1252 num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir; |
1332} 1333 1334 1335 1336Predec::Predec( 1337 PredecBlkDrv * drv1_, 1338 PredecBlkDrv * drv2_) | 1253} 1254 1255 1256 1257Predec::Predec( 1258 PredecBlkDrv * drv1_, 1259 PredecBlkDrv * drv2_) |
1339:blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) 1340{ 1341 driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + 1342 drv1->power_nand3_path.readOp.leakage + 1343 drv2->power_nand2_path.readOp.leakage + 1344 drv2->power_nand3_path.readOp.leakage; 1345 block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + 1346 blk1->power_nand3_path.readOp.leakage + 1347 blk1->power_L2.readOp.leakage + 1348 blk2->power_nand2_path.readOp.leakage + 1349 blk2->power_nand3_path.readOp.leakage + 1350 blk2->power_L2.readOp.leakage; 1351 power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; | 1260 : blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) { 1261 driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + 1262 drv1->power_nand3_path.readOp.leakage + 1263 drv2->power_nand2_path.readOp.leakage + 1264 drv2->power_nand3_path.readOp.leakage; 1265 block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + 1266 blk1->power_nand3_path.readOp.leakage + 1267 blk1->power_L2.readOp.leakage + 1268 blk2->power_nand2_path.readOp.leakage + 1269 blk2->power_nand3_path.readOp.leakage + 1270 blk2->power_L2.readOp.leakage; 1271 power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; |
1352 | 1272 |
1353 driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + 1354 drv1->power_nand3_path.readOp.gate_leakage + 1355 drv2->power_nand2_path.readOp.gate_leakage + 1356 drv2->power_nand3_path.readOp.gate_leakage; 1357 block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + 1358 blk1->power_nand3_path.readOp.gate_leakage + 1359 blk1->power_L2.readOp.gate_leakage + 1360 blk2->power_nand2_path.readOp.gate_leakage + 1361 blk2->power_nand3_path.readOp.gate_leakage + 1362 blk2->power_L2.readOp.gate_leakage; 1363 power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; | 1273 driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + 1274 drv1->power_nand3_path.readOp.gate_leakage + 1275 drv2->power_nand2_path.readOp.gate_leakage + 1276 drv2->power_nand3_path.readOp.gate_leakage; 1277 block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + 1278 blk1->power_nand3_path.readOp.gate_leakage + 1279 blk1->power_L2.readOp.gate_leakage + 1280 blk2->power_nand2_path.readOp.gate_leakage + 1281 blk2->power_nand3_path.readOp.gate_leakage + 1282 blk2->power_L2.readOp.gate_leakage; 1283 power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; |
1364} 1365 1366void PredecBlkDrv::leakage_feedback(double temperature) 1367{ 1368 double leak_nand2_path = 0; 1369 double leak_nand3_path = 0; 1370 double gate_leak_nand2_path = 0; 1371 double gate_leak_nand3_path = 0; --- 22 unchanged lines hidden (view full) --- 1394 1395 power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; 1396 power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; 1397 power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; 1398 power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; 1399 } 1400} 1401 | 1284} 1285 1286void PredecBlkDrv::leakage_feedback(double temperature) 1287{ 1288 double leak_nand2_path = 0; 1289 double leak_nand3_path = 0; 1290 double gate_leak_nand2_path = 0; 1291 double gate_leak_nand3_path = 0; --- 22 unchanged lines hidden (view full) --- 1314 1315 power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; 1316 power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; 1317 power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; 1318 power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; 1319 } 1320} 1321 |
1402double Predec::compute_delays(double inrisetime) 1403{ 1404 // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block. 1405 pair<double, double> tmp_pair1, tmp_pair2; 1406 tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime); 1407 tmp_pair1 = blk1->compute_delays(tmp_pair1); 1408 tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime); 1409 tmp_pair2 = blk2->compute_delays(tmp_pair2); 1410 tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2); | 1322double Predec::compute_delays(double inrisetime) { 1323 // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block. 1324 pair<double, double> tmp_pair1, tmp_pair2; 1325 tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime); 1326 tmp_pair1 = blk1->compute_delays(tmp_pair1); 1327 tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime); 1328 tmp_pair2 = blk2->compute_delays(tmp_pair2); 1329 tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2); |
1411 | 1330 |
1412 driver_power.readOp.dynamic = 1413 drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic + 1414 drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic + 1415 drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic + 1416 drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic; | 1331 driver_power.readOp.dynamic = 1332 drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic + 1333 drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic + 1334 drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic + 1335 drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic; |
1417 | 1336 |
1418 block_power.readOp.dynamic = 1419 blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + 1420 blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + 1421 blk1->power_L2.readOp.dynamic + 1422 blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + 1423 blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + 1424 blk2->power_L2.readOp.dynamic; | 1337 block_power.readOp.dynamic = 1338 blk1->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path + 1339 blk1->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path + 1340 blk1->power_L2.readOp.dynamic + 1341 blk2->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path + 1342 blk2->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path + 1343 blk2->power_L2.readOp.dynamic; |
1425 | 1344 |
1426 power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic; | 1345 power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic; |
1427 | 1346 |
1428 delay = tmp_pair1.first; 1429 return tmp_pair1.second; | 1347 delay = tmp_pair1.first; 1348 return tmp_pair1.second; |
1430} 1431 | 1349} 1350 |
1432 | |
1433void Predec::leakage_feedback(double temperature) 1434{ 1435 drv1->leakage_feedback(temperature); 1436 drv2->leakage_feedback(temperature); 1437 blk1->leakage_feedback(temperature); 1438 blk2->leakage_feedback(temperature); 1439 1440 driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + --- 19 unchanged lines hidden (view full) --- 1460 blk2->power_nand3_path.readOp.gate_leakage + 1461 blk2->power_L2.readOp.gate_leakage; 1462 power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; 1463} 1464 1465// returns <delay, risetime> 1466pair<double, double> Predec::get_max_delay_before_decoder( 1467 pair<double, double> input_pair1, | 1351void Predec::leakage_feedback(double temperature) 1352{ 1353 drv1->leakage_feedback(temperature); 1354 drv2->leakage_feedback(temperature); 1355 blk1->leakage_feedback(temperature); 1356 blk2->leakage_feedback(temperature); 1357 1358 driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + --- 19 unchanged lines hidden (view full) --- 1378 blk2->power_nand3_path.readOp.gate_leakage + 1379 blk2->power_L2.readOp.gate_leakage; 1380 power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; 1381} 1382 1383// returns <delay, risetime> 1384pair<double, double> Predec::get_max_delay_before_decoder( 1385 pair<double, double> input_pair1, |
1468 pair 1469{ 1470 pair<double, double> ret_val; 1471 double delay; | 1386 pair<double, double> input_pair2) { 1387 pair<double, double> ret_val; 1388 double delay; |
1472 | 1389 |
1473 delay = drv1->delay_nand2_path + blk1->delay_nand2_path; 1474 ret_val.first = delay; 1475 ret_val.second = input_pair1.first; 1476 delay = drv1->delay_nand3_path + blk1->delay_nand3_path; 1477 if (ret_val.first < delay) 1478 { | 1390 delay = drv1->delay_nand2_path + blk1->delay_nand2_path; |
1479 ret_val.first = delay; | 1391 ret_val.first = delay; |
1480 ret_val.second = input_pair1.second; 1481 } 1482 delay = drv2->delay_nand2_path + blk2->delay_nand2_path; 1483 if (ret_val.first < delay) 1484 { 1485 ret_val.first = delay; 1486 ret_val.second = input_pair2.first; 1487 } 1488 delay = drv2->delay_nand3_path + blk2->delay_nand3_path; 1489 if (ret_val.first < delay) 1490 { 1491 ret_val.first = delay; 1492 ret_val.second = input_pair2.second; 1493 } | 1392 ret_val.second = input_pair1.first; 1393 delay = drv1->delay_nand3_path + blk1->delay_nand3_path; 1394 if (ret_val.first < delay) { 1395 ret_val.first = delay; 1396 ret_val.second = input_pair1.second; 1397 } 1398 delay = drv2->delay_nand2_path + blk2->delay_nand2_path; 1399 if (ret_val.first < delay) { 1400 ret_val.first = delay; 1401 ret_val.second = input_pair2.first; 1402 } 1403 delay = drv2->delay_nand3_path + blk2->delay_nand3_path; 1404 if (ret_val.first < delay) { 1405 ret_val.first = delay; 1406 ret_val.second = input_pair2.second; 1407 } |
1494 | 1408 |
1495 return ret_val; | 1409 return ret_val; |
1496} 1497 1498 1499 | 1410} 1411 1412 1413 |
1500Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram) 1501:number_gates(0), 1502 min_number_gates(2), 1503 c_gate_load(c_gate_load_), 1504 c_wire_load(c_wire_load_), 1505 r_wire_load(r_wire_load_), 1506 delay(0), 1507 power(), 1508 is_dram_(is_dram) 1509{ 1510 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) 1511 { 1512 width_n[i] = 0; 1513 width_p[i] = 0; 1514 } | 1414Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, 1415 bool is_dram) 1416 : number_gates(0), 1417 min_number_gates(2), 1418 c_gate_load(c_gate_load_), 1419 c_wire_load(c_wire_load_), 1420 r_wire_load(r_wire_load_), 1421 delay(0), 1422 power(), 1423 is_dram_(is_dram) { 1424 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { 1425 width_n[i] = 0; 1426 width_p[i] = 0; 1427 } |
1515 | 1428 |
1516 compute_widths(); | 1429 compute_widths(); |
1517} 1518 1519 | 1430} 1431 1432 |
1520void Driver::compute_widths() 1521{ 1522 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); 1523 double c_load = c_gate_load + c_wire_load; 1524 width_n[0] = g_tp.min_w_nmos_; 1525 width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; | 1433void Driver::compute_widths() { 1434 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); 1435 double c_load = c_gate_load + c_wire_load; 1436 width_n[0] = g_tp.min_w_nmos_; 1437 width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; |
1526 | 1438 |
1527 double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_); 1528 number_gates = logical_effort( 1529 min_number_gates, 1530 1, 1531 F, 1532 width_n, 1533 width_p, 1534 c_load, 1535 p_to_n_sz_ratio, 1536 is_dram_, false, 1537 g_tp.max_w_nmos_); | 1439 double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_); 1440 number_gates = logical_effort( 1441 min_number_gates, 1442 1, 1443 F, 1444 width_n, 1445 width_p, 1446 c_load, 1447 p_to_n_sz_ratio, 1448 is_dram_, false, 1449 g_tp.max_w_nmos_); |
1538} 1539 1540 1541 | 1450} 1451 1452 1453 |
1542double Driver::compute_delay(double inrisetime) 1543{ 1544 int i; 1545 double rd, c_load, c_intrinsic, tf; 1546 double this_delay = 0; | 1454double Driver::compute_delay(double inrisetime) { 1455 int i; 1456 double rd, c_load, c_intrinsic, tf; 1457 double this_delay = 0; |
1547 | 1458 |
1548 for (i = 0; i < number_gates - 1; ++i) 1549 { | 1459 for (i = 0; i < number_gates - 1; ++i) { 1460 rd = tr_R_on(width_n[i], NCH, 1, is_dram_); 1461 c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_); 1462 c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1463 drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1464 tf = rd * (c_intrinsic + c_load); 1465 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 1466 delay += this_delay; 1467 inrisetime = this_delay / (1.0 - 0.5); 1468 power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * 1469 g_tp.peri_global.Vdd; 1470 power.readOp.leakage += 1471 cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * 1472 g_tp.peri_global.Vdd; 1473 power.readOp.gate_leakage += 1474 cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * 1475 g_tp.peri_global.Vdd; 1476 } 1477 1478 i = number_gates - 1; 1479 c_load = c_gate_load + c_wire_load; |
1550 rd = tr_R_on(width_n[i], NCH, 1, is_dram_); | 1480 rd = tr_R_on(width_n[i], NCH, 1, is_dram_); |
1551 c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_); | |
1552 c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + | 1481 c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + |
1553 drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1554 tf = rd * (c_intrinsic + c_load); | 1482 drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1483 tf = rd * (c_intrinsic + c_load) + r_wire_load * 1484 (c_wire_load / 2 + c_gate_load); |
1555 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 1556 delay += this_delay; | 1485 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 1486 delay += this_delay; |
1557 inrisetime = this_delay / (1.0 - 0.5); 1558 power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 1559 power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd; 1560 power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; 1561 } | 1487 power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * 1488 g_tp.peri_global.Vdd; 1489 power.readOp.leakage += 1490 cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * 1491 g_tp.peri_global.Vdd; 1492 power.readOp.gate_leakage += 1493 cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * 1494 g_tp.peri_global.Vdd; |
1562 | 1495 |
1563 i = number_gates - 1; 1564 c_load = c_gate_load + c_wire_load; 1565 rd = tr_R_on(width_n[i], NCH, 1, is_dram_); 1566 c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1567 drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1568 tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load); 1569 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 1570 delay += this_delay; 1571 power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 1572 power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd; 1573 power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; 1574 1575 return this_delay / (1.0 - 0.5); | 1496 return this_delay / (1.0 - 0.5); |
1576} 1577 | 1497} 1498 |