decoder.cc revision 10152
1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. 5 * All Rights Reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer; 11 * redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution; 14 * neither the name of the copyright holders nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 * 30 ***************************************************************************/ 31 32 33 34#include <cassert> 35#include <cmath> 36#include <iostream> 37 38#include "area.h" 39#include "decoder.h" 40#include "parameter.h" 41 42using namespace std; 43 44 45Decoder::Decoder( 46 int _num_dec_signals, 47 bool flag_way_select, 48 double _C_ld_dec_out, 49 double _R_wire_dec_out, 50 bool fully_assoc_, 51 bool is_dram_, 52 bool is_wl_tr_, 53 const Area & cell_) 54:exist(false), 55 C_ld_dec_out(_C_ld_dec_out), 56 R_wire_dec_out(_R_wire_dec_out), 57 num_gates(0), num_gates_min(2), 58 delay(0), 59 //power(), 60 fully_assoc(fully_assoc_), is_dram(is_dram_), 61 is_wl_tr(is_wl_tr_), cell(cell_) 62{ 63 64 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) 65 { 66 w_dec_n[i] = 0; 67 w_dec_p[i] = 0; 68 } 69 70 /* 71 * _num_dec_signals is the number of decoded signal as output 72 * num_addr_bits_dec is the number of signal to be decoded 73 * as the decoders input. 74 */ 75 int num_addr_bits_dec = _log2(_num_dec_signals); 76 77 if (num_addr_bits_dec < 4) 78 { 79 if (flag_way_select) 80 { 81 exist = true; 82 num_in_signals = 2; 83 } 84 else 85 { 86 num_in_signals = 0; 87 } 88 } 89 else 90 { 91 exist = true; 92 93 if (flag_way_select) 94 { 95 num_in_signals = 3; 96 } 97 else 98 { 99 num_in_signals = 2; 100 } 101 } 102 103 assert(cell.h>0); 104 assert(cell.w>0); 105 // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; 106 //area.h = 4 * cell.h; 107 area.h = g_tp.h_dec * cell.h; 108 109 compute_widths(); 110 compute_area(); 111} 112 113 114 115void Decoder::compute_widths() 116{ 117 double F; 118 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr); 119 double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); 120 double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); 121 122 if (exist) 123 { 124 if (num_in_signals == 2 || fully_assoc) 125 { 126 w_dec_n[0] = 2 * g_tp.min_w_nmos_; 127 w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 128 F = gnand2; 129 } 130 else 131 { 132 w_dec_n[0] = 3 * g_tp.min_w_nmos_; 133 w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 134 F = gnand3; 135 } 136 137 F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) + 138 gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr)); 139 num_gates = logical_effort( 140 num_gates_min, 141 num_in_signals == 2 ? gnand2 : gnand3, 142 F, 143 w_dec_n, 144 w_dec_p, 145 C_ld_dec_out, 146 p_to_n_sz_ratio, 147 is_dram, 148 is_wl_tr, 149 g_tp.max_w_nmos_dec); 150 } 151} 152 153 154 155void Decoder::compute_area() 156{ 157 double cumulative_area = 0; 158 double cumulative_curr = 0; // cumulative leakage current 159 double cumulative_curr_Ig = 0; // cumulative leakage current 160 161 if (exist) 162 { // First check if this decoder exists 163 if (num_in_signals == 2) 164 { 165 cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h); 166 cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); 167 cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); 168 } 169 else if (num_in_signals == 3) 170 { 171 cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h); 172 cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);; 173 cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); 174 } 175 176 for (int i = 1; i < num_gates; i++) 177 { 178 cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h); 179 cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); 180 cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); 181 } 182 power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; 183 power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; 184 185 area.w = (cumulative_area / area.h); 186 } 187} 188 189 190 191double Decoder::compute_delays(double inrisetime) 192{ 193 if (exist) 194 { 195 double ret_val = 0; // outrisetime 196 int i; 197 double rd, tf, this_delay, c_load, c_intrinsic, Vpp; 198 double Vdd = g_tp.peri_global.Vdd; 199 200 if ((is_wl_tr) && (is_dram)) 201 { 202 Vpp = g_tp.vpp; 203 } 204 else if (is_wl_tr) 205 { 206 Vpp = g_tp.sram_cell.Vdd; 207 } 208 else 209 { 210 Vpp = g_tp.peri_global.Vdd; 211 } 212 213 // first check whether a decoder is required at all 214 rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr); 215 c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr); 216 c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals + 217 drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr); 218 tf = rd * (c_intrinsic + c_load); 219 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 220 delay += this_delay; 221 inrisetime = this_delay / (1.0 - 0.5); 222 power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; 223 224 for (i = 1; i < num_gates - 1; ++i) 225 { 226 rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); 227 c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr); 228 c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + 229 drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); 230 tf = rd * (c_intrinsic + c_load); 231 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 232 delay += this_delay; 233 inrisetime = this_delay / (1.0 - 0.5); 234 power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; 235 } 236 237 // add delay of final inverter that drives the wordline 238 i = num_gates - 1; 239 c_load = C_ld_dec_out; 240 rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); 241 c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + 242 drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); 243 tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2; 244 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 245 delay += this_delay; 246 ret_val = this_delay / (1.0 - 0.5); 247 power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd; 248 249 return ret_val; 250 } 251 else 252 { 253 return 0.0; 254 } 255} 256 257void Decoder::leakage_feedback(double temperature) 258{ 259 double cumulative_curr = 0; // cumulative leakage current 260 double cumulative_curr_Ig = 0; // cumulative leakage current 261 262 if (exist) 263 { // First check if this decoder exists 264 if (num_in_signals == 2) 265 { 266 cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); 267 cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); 268 } 269 else if (num_in_signals == 3) 270 { 271 cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);; 272 cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); 273 } 274 275 for (int i = 1; i < num_gates; i++) 276 { 277 cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); 278 cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); 279 } 280 281 power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; 282 power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; 283 } 284} 285 286PredecBlk::PredecBlk( 287 int num_dec_signals, 288 Decoder * dec_, 289 double C_wire_predec_blk_out, 290 double R_wire_predec_blk_out_, 291 int num_dec_per_predec, 292 bool is_dram, 293 bool is_blk1) 294 :dec(dec_), 295 exist(false), 296 number_input_addr_bits(0), 297 C_ld_predec_blk_out(0), 298 R_wire_predec_blk_out(0), 299 branch_effort_nand2_gate_output(1), 300 branch_effort_nand3_gate_output(1), 301 flag_two_unique_paths(false), 302 flag_L2_gate(0), 303 number_inputs_L1_gate(0), 304 number_gates_L1_nand2_path(0), 305 number_gates_L1_nand3_path(0), 306 number_gates_L2(0), 307 min_number_gates_L1(2), 308 min_number_gates_L2(2), 309 num_L1_active_nand2_path(0), 310 num_L1_active_nand3_path(0), 311 delay_nand2_path(0), 312 delay_nand3_path(0), 313 power_nand2_path(), 314 power_nand3_path(), 315 power_L2(), 316 is_dram_(is_dram) 317{ 318 int branch_effort_predec_out; 319 double C_ld_dec_gate; 320 int num_addr_bits_dec = _log2(num_dec_signals); 321 int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2; 322 int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits; 323 324 w_L1_nand2_n[0] = 0; 325 w_L1_nand2_p[0] = 0; 326 w_L1_nand3_n[0] = 0; 327 w_L1_nand3_p[0] = 0; 328 329 if (is_blk1 == true) 330 { 331 if (num_addr_bits_dec <= 0) 332 { 333 return; 334 } 335 else if (num_addr_bits_dec < 4) 336 { 337 // Just one predecoder block is required with NAND2 gates. No decoder required. 338 // The first level of predecoding directly drives the decoder output load 339 exist = true; 340 number_input_addr_bits = num_addr_bits_dec; 341 R_wire_predec_blk_out = dec->R_wire_dec_out; 342 C_ld_predec_blk_out = dec->C_ld_dec_out; 343 } 344 else 345 { 346 exist = true; 347 number_input_addr_bits = blk1_num_input_addr_bits; 348 branch_effort_predec_out = (1 << blk2_num_input_addr_bits); 349 C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); 350 R_wire_predec_blk_out = R_wire_predec_blk_out_; 351 C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; 352 } 353 } 354 else 355 { 356 if (num_addr_bits_dec >= 4) 357 { 358 exist = true; 359 number_input_addr_bits = blk2_num_input_addr_bits; 360 branch_effort_predec_out = (1 << blk1_num_input_addr_bits); 361 C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); 362 R_wire_predec_blk_out = R_wire_predec_blk_out_; 363 C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; 364 } 365 } 366 367 compute_widths(); 368 compute_area(); 369} 370 371 372 373void PredecBlk::compute_widths() 374{ 375 double F, c_load_nand3_path, c_load_nand2_path; 376 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); 377 double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); 378 double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); 379 380 if (exist == false) return; 381 382 383 switch (number_input_addr_bits) 384 { 385 case 1: 386 flag_two_unique_paths = false; 387 number_inputs_L1_gate = 2; 388 flag_L2_gate = 0; 389 break; 390 case 2: 391 flag_two_unique_paths = false; 392 number_inputs_L1_gate = 2; 393 flag_L2_gate = 0; 394 break; 395 case 3: 396 flag_two_unique_paths = false; 397 number_inputs_L1_gate = 3; 398 flag_L2_gate = 0; 399 break; 400 case 4: 401 flag_two_unique_paths = false; 402 number_inputs_L1_gate = 2; 403 flag_L2_gate = 2; 404 branch_effort_nand2_gate_output = 4; 405 break; 406 case 5: 407 flag_two_unique_paths = true; 408 flag_L2_gate = 2; 409 branch_effort_nand2_gate_output = 8; 410 branch_effort_nand3_gate_output = 4; 411 break; 412 case 6: 413 flag_two_unique_paths = false; 414 number_inputs_L1_gate = 3; 415 flag_L2_gate = 2; 416 branch_effort_nand3_gate_output = 8; 417 break; 418 case 7: 419 flag_two_unique_paths = true; 420 flag_L2_gate = 3; 421 branch_effort_nand2_gate_output = 32; 422 branch_effort_nand3_gate_output = 16; 423 break; 424 case 8: 425 flag_two_unique_paths = true; 426 flag_L2_gate = 3; 427 branch_effort_nand2_gate_output = 64; 428 branch_effort_nand3_gate_output = 32; 429 break; 430 case 9: 431 flag_two_unique_paths = false; 432 number_inputs_L1_gate = 3; 433 flag_L2_gate = 3; 434 branch_effort_nand3_gate_output = 64; 435 break; 436 default: 437 assert(0); 438 break; 439 } 440 441 // find the number of gates and sizing in second level of predecoder (if there is a second level) 442 if (flag_L2_gate) 443 { 444 if (flag_L2_gate == 2) 445 { // 2nd level is a NAND2 gate 446 w_L2_n[0] = 2 * g_tp.min_w_nmos_; 447 F = gnand2; 448 } 449 else 450 { // 2nd level is a NAND3 gate 451 w_L2_n[0] = 3 * g_tp.min_w_nmos_; 452 F = gnand3; 453 } 454 w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 455 F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); 456 number_gates_L2 = logical_effort( 457 min_number_gates_L2, 458 flag_L2_gate == 2 ? gnand2 : gnand3, 459 F, 460 w_L2_n, 461 w_L2_p, 462 C_ld_predec_blk_out, 463 p_to_n_sz_ratio, 464 is_dram_, false, 465 g_tp.max_w_nmos_); 466 467 // Now find the number of gates and widths in first level of predecoder 468 if ((flag_two_unique_paths)||(number_inputs_L1_gate == 2)) 469 { // Whenever flag_two_unique_paths is true, it means first level of decoder employs 470 // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it means 471 // a NAND2 gate is used in the first level of the predecoder 472 c_load_nand2_path = branch_effort_nand2_gate_output * 473 (gate_C(w_L2_n[0], 0, is_dram_) + 474 gate_C(w_L2_p[0], 0, is_dram_)); 475 w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; 476 w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 477 F = gnand2 * c_load_nand2_path / 478 (gate_C(w_L1_nand2_n[0], 0, is_dram_) + 479 gate_C(w_L1_nand2_p[0], 0, is_dram_)); 480 number_gates_L1_nand2_path = logical_effort( 481 min_number_gates_L1, 482 gnand2, 483 F, 484 w_L1_nand2_n, 485 w_L1_nand2_p, 486 c_load_nand2_path, 487 p_to_n_sz_ratio, 488 is_dram_, false, 489 g_tp.max_w_nmos_); 490 } 491 492 //Now find widths of gates along path in which first gate is a NAND3 493 if ((flag_two_unique_paths)||(number_inputs_L1_gate == 3)) 494 { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs 495 // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means 496 // a NAND3 gate is used in the first level of the predecoder 497 c_load_nand3_path = branch_effort_nand3_gate_output * 498 (gate_C(w_L2_n[0], 0, is_dram_) + 499 gate_C(w_L2_p[0], 0, is_dram_)); 500 w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; 501 w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 502 F = gnand3 * c_load_nand3_path / 503 (gate_C(w_L1_nand3_n[0], 0, is_dram_) + 504 gate_C(w_L1_nand3_p[0], 0, is_dram_)); 505 number_gates_L1_nand3_path = logical_effort( 506 min_number_gates_L1, 507 gnand3, 508 F, 509 w_L1_nand3_n, 510 w_L1_nand3_p, 511 c_load_nand3_path, 512 p_to_n_sz_ratio, 513 is_dram_, false, 514 g_tp.max_w_nmos_); 515 } 516 } 517 else 518 { // find number of gates and widths in first level of predecoder block when there is no second level 519 if (number_inputs_L1_gate == 2) 520 { 521 w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; 522 w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 523 F = gnand2*C_ld_predec_blk_out / 524 (gate_C(w_L1_nand2_n[0], 0, is_dram_) + 525 gate_C(w_L1_nand2_p[0], 0, is_dram_)); 526 number_gates_L1_nand2_path = logical_effort( 527 min_number_gates_L1, 528 gnand2, 529 F, 530 w_L1_nand2_n, 531 w_L1_nand2_p, 532 C_ld_predec_blk_out, 533 p_to_n_sz_ratio, 534 is_dram_, false, 535 g_tp.max_w_nmos_); 536 } 537 else if (number_inputs_L1_gate == 3) 538 { 539 w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; 540 w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 541 F = gnand3*C_ld_predec_blk_out / 542 (gate_C(w_L1_nand3_n[0], 0, is_dram_) + 543 gate_C(w_L1_nand3_p[0], 0, is_dram_)); 544 number_gates_L1_nand3_path = logical_effort( 545 min_number_gates_L1, 546 gnand3, 547 F, 548 w_L1_nand3_n, 549 w_L1_nand3_p, 550 C_ld_predec_blk_out, 551 p_to_n_sz_ratio, 552 is_dram_, false, 553 g_tp.max_w_nmos_); 554 } 555 } 556} 557 558 559 560void PredecBlk::compute_area() 561{ 562 if (exist) 563 { // First check whether a predecoder block is needed 564 int num_L1_nand2 = 0; 565 int num_L1_nand3 = 0; 566 int num_L2 = 0; 567 double tot_area_L1_nand3 =0; 568 double leak_L1_nand3 =0; 569 double gate_leak_L1_nand3 =0; 570 571 double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def); 572 double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); 573 double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); 574 if (number_inputs_L1_gate != 3) { 575 tot_area_L1_nand3 = 0; 576 leak_L1_nand3 = 0; 577 gate_leak_L1_nand3 =0; 578 } 579 else { 580 tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def); 581 leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); 582 gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); 583 } 584 585 switch (number_input_addr_bits) 586 { 587 case 1: //2 NAND2 gates 588 num_L1_nand2 = 2; 589 num_L2 = 0; 590 num_L1_active_nand2_path =1; 591 num_L1_active_nand3_path =0; 592 break; 593 case 2: //4 NAND2 gates 594 num_L1_nand2 = 4; 595 num_L2 = 0; 596 num_L1_active_nand2_path =1; 597 num_L1_active_nand3_path =0; 598 break; 599 case 3: //8 NAND3 gates 600 num_L1_nand3 = 8; 601 num_L2 = 0; 602 num_L1_active_nand2_path =0; 603 num_L1_active_nand3_path =1; 604 break; 605 case 4: //4 + 4 NAND2 gates 606 num_L1_nand2 = 8; 607 num_L2 = 16; 608 num_L1_active_nand2_path =2; 609 num_L1_active_nand3_path =0; 610 break; 611 case 5: //4 NAND2 gates, 8 NAND3 gates 612 num_L1_nand2 = 4; 613 num_L1_nand3 = 8; 614 num_L2 = 32; 615 num_L1_active_nand2_path =1; 616 num_L1_active_nand3_path =1; 617 break; 618 case 6: //8 + 8 NAND3 gates 619 num_L1_nand3 = 16; 620 num_L2 = 64; 621 num_L1_active_nand2_path =0; 622 num_L1_active_nand3_path =2; 623 break; 624 case 7: //4 + 4 NAND2 gates, 8 NAND3 gates 625 num_L1_nand2 = 8; 626 num_L1_nand3 = 8; 627 num_L2 = 128; 628 num_L1_active_nand2_path =2; 629 num_L1_active_nand3_path =1; 630 break; 631 case 8: //4 NAND2 gates, 8 + 8 NAND3 gates 632 num_L1_nand2 = 4; 633 num_L1_nand3 = 16; 634 num_L2 = 256; 635 num_L1_active_nand2_path =2; 636 num_L1_active_nand3_path =2; 637 break; 638 case 9: //8 + 8 + 8 NAND3 gates 639 num_L1_nand3 = 24; 640 num_L2 = 512; 641 num_L1_active_nand2_path =0; 642 num_L1_active_nand3_path =3; 643 break; 644 default: 645 break; 646 } 647 648 for (int i = 1; i < number_gates_L1_nand2_path; ++i) 649 { 650 tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def); 651 leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); 652 gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); 653 } 654 tot_area_L1_nand2 *= num_L1_nand2; 655 leak_L1_nand2 *= num_L1_nand2; 656 gate_leak_L1_nand2 *= num_L1_nand2; 657 658 for (int i = 1; i < number_gates_L1_nand3_path; ++i) 659 { 660 tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def); 661 leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); 662 gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); 663 } 664 tot_area_L1_nand3 *= num_L1_nand3; 665 leak_L1_nand3 *= num_L1_nand3; 666 gate_leak_L1_nand3 *= num_L1_nand3; 667 668 double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3; 669 double cumulative_area_L2 = 0.0; 670 double leakage_L2 = 0.0; 671 double gate_leakage_L2 = 0.0; 672 673 if (flag_L2_gate == 2) 674 { 675 cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); 676 leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); 677 gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); 678 } 679 else if (flag_L2_gate == 3) 680 { 681 cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); 682 leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); 683 gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); 684 } 685 686 for (int i = 1; i < number_gates_L2; ++i) 687 { 688 cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def); 689 leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); 690 gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); 691 } 692 cumulative_area_L2 *= num_L2; 693 leakage_L2 *= num_L2; 694 gate_leakage_L2 *= num_L2; 695 696 power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd; 697 power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd; 698 power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd; 699 area.set_area(cumulative_area_L1 + cumulative_area_L2); 700 power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd; 701 power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd; 702 power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; 703 } 704} 705 706 707 708pair<double, double> PredecBlk::compute_delays( 709 pair<double, double> inrisetime) // <nand2, nand3> 710{ 711 pair<double, double> ret_val; 712 ret_val.first = 0; // outrisetime_nand2_path 713 ret_val.second = 0; // outrisetime_nand3_path 714 715 double inrisetime_nand2_path = inrisetime.first; 716 double inrisetime_nand3_path = inrisetime.second; 717 int i; 718 double rd, c_load, c_intrinsic, tf, this_delay; 719 double Vdd = g_tp.peri_global.Vdd; 720 721 // TODO: following delay calculation part can be greatly simplified. 722 // first check whether a predecoder block is required 723 if (exist) 724 { 725 //Find delay in first level of predecoder block 726 //First find delay in path 727 if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) 728 { 729 //First gate is a NAND2 gate 730 rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_); 731 c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_); 732 c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 733 drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); 734 tf = rd * (c_intrinsic + c_load); 735 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 736 delay_nand2_path += this_delay; 737 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 738 power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; 739 740 //Add delays of all but the last inverter in the chain 741 for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) 742 { 743 rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); 744 c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_); 745 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 746 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 747 tf = rd * (c_intrinsic + c_load); 748 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 749 delay_nand2_path += this_delay; 750 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 751 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 752 } 753 754 //Add delay of the last inverter 755 i = number_gates_L1_nand2_path - 1; 756 rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); 757 if (flag_L2_gate) 758 { 759 c_load = branch_effort_nand2_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); 760 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 761 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 762 tf = rd * (c_intrinsic + c_load); 763 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 764 delay_nand2_path += this_delay; 765 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 766 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 767 } 768 else 769 { //First level directly drives decoder output load 770 c_load = C_ld_predec_blk_out; 771 c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 772 drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 773 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; 774 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 775 delay_nand2_path += this_delay; 776 ret_val.first = this_delay / (1.0 - 0.5); 777 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 778 } 779 } 780 781 if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) 782 { //Check if the number of gates in the first level is more than 1. 783 //First gate is a NAND3 gate 784 rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_); 785 c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_); 786 c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 787 drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); 788 tf = rd * (c_intrinsic + c_load); 789 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 790 delay_nand3_path += this_delay; 791 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 792 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 793 794 //Add delays of all but the last inverter in the chain 795 for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) 796 { 797 rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); 798 c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_); 799 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 800 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 801 tf = rd * (c_intrinsic + c_load); 802 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 803 delay_nand3_path += this_delay; 804 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 805 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 806 } 807 808 //Add delay of the last inverter 809 i = number_gates_L1_nand3_path - 1; 810 rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); 811 if (flag_L2_gate) 812 { 813 c_load = branch_effort_nand3_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); 814 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 815 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 816 tf = rd * (c_intrinsic + c_load); 817 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 818 delay_nand3_path += this_delay; 819 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 820 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 821 } 822 else 823 { //First level directly drives decoder output load 824 c_load = C_ld_predec_blk_out; 825 c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 826 drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 827 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; 828 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 829 delay_nand3_path += this_delay; 830 ret_val.second = this_delay / (1.0 - 0.5); 831 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 832 } 833 } 834 835 // Find delay through second level 836 if (flag_L2_gate) 837 { 838 if (flag_L2_gate == 2) 839 { 840 rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_); 841 c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); 842 c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 843 drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); 844 tf = rd * (c_intrinsic + c_load); 845 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 846 delay_nand2_path += this_delay; 847 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 848 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 849 } 850 else 851 { // flag_L2_gate = 3 852 rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_); 853 c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); 854 c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 855 drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); 856 tf = rd * (c_intrinsic + c_load); 857 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 858 delay_nand3_path += this_delay; 859 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 860 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 861 } 862 863 for (i = 1; i < number_gates_L2 - 1; ++i) 864 { 865 rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); 866 c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_); 867 c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 868 drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 869 tf = rd * (c_intrinsic + c_load); 870 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 871 delay_nand2_path += this_delay; 872 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 873 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 874 delay_nand3_path += this_delay; 875 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 876 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 877 } 878 879 //Add delay of final inverter that drives the wordline decoders 880 i = number_gates_L2 - 1; 881 c_load = C_ld_predec_blk_out; 882 rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); 883 c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 884 drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 885 tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; 886 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 887 delay_nand2_path += this_delay; 888 ret_val.first = this_delay / (1.0 - 0.5); 889 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 890 delay_nand3_path += this_delay; 891 ret_val.second = this_delay / (1.0 - 0.5); 892 power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; 893 } 894 } 895 896 delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second; 897 return ret_val; 898} 899 900void PredecBlk::leakage_feedback(double temperature) 901{ 902 if (exist) 903 { // First check whether a predecoder block is needed 904 int num_L1_nand2 = 0; 905 int num_L1_nand3 = 0; 906 int num_L2 = 0; 907 double leak_L1_nand3 =0; 908 double gate_leak_L1_nand3 =0; 909 910 double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); 911 double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); 912 if (number_inputs_L1_gate != 3) { 913 leak_L1_nand3 = 0; 914 gate_leak_L1_nand3 =0; 915 } 916 else { 917 leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); 918 gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); 919 } 920 921 switch (number_input_addr_bits) 922 { 923 case 1: //2 NAND2 gates 924 num_L1_nand2 = 2; 925 num_L2 = 0; 926 num_L1_active_nand2_path =1; 927 num_L1_active_nand3_path =0; 928 break; 929 case 2: //4 NAND2 gates 930 num_L1_nand2 = 4; 931 num_L2 = 0; 932 num_L1_active_nand2_path =1; 933 num_L1_active_nand3_path =0; 934 break; 935 case 3: //8 NAND3 gates 936 num_L1_nand3 = 8; 937 num_L2 = 0; 938 num_L1_active_nand2_path =0; 939 num_L1_active_nand3_path =1; 940 break; 941 case 4: //4 + 4 NAND2 gates 942 num_L1_nand2 = 8; 943 num_L2 = 16; 944 num_L1_active_nand2_path =2; 945 num_L1_active_nand3_path =0; 946 break; 947 case 5: //4 NAND2 gates, 8 NAND3 gates 948 num_L1_nand2 = 4; 949 num_L1_nand3 = 8; 950 num_L2 = 32; 951 num_L1_active_nand2_path =1; 952 num_L1_active_nand3_path =1; 953 break; 954 case 6: //8 + 8 NAND3 gates 955 num_L1_nand3 = 16; 956 num_L2 = 64; 957 num_L1_active_nand2_path =0; 958 num_L1_active_nand3_path =2; 959 break; 960 case 7: //4 + 4 NAND2 gates, 8 NAND3 gates 961 num_L1_nand2 = 8; 962 num_L1_nand3 = 8; 963 num_L2 = 128; 964 num_L1_active_nand2_path =2; 965 num_L1_active_nand3_path =1; 966 break; 967 case 8: //4 NAND2 gates, 8 + 8 NAND3 gates 968 num_L1_nand2 = 4; 969 num_L1_nand3 = 16; 970 num_L2 = 256; 971 num_L1_active_nand2_path =2; 972 num_L1_active_nand3_path =2; 973 break; 974 case 9: //8 + 8 + 8 NAND3 gates 975 num_L1_nand3 = 24; 976 num_L2 = 512; 977 num_L1_active_nand2_path =0; 978 num_L1_active_nand3_path =3; 979 break; 980 default: 981 break; 982 } 983 984 for (int i = 1; i < number_gates_L1_nand2_path; ++i) 985 { 986 leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); 987 gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); 988 } 989 leak_L1_nand2 *= num_L1_nand2; 990 gate_leak_L1_nand2 *= num_L1_nand2; 991 992 for (int i = 1; i < number_gates_L1_nand3_path; ++i) 993 { 994 leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); 995 gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); 996 } 997 leak_L1_nand3 *= num_L1_nand3; 998 gate_leak_L1_nand3 *= num_L1_nand3; 999 1000 double leakage_L2 = 0.0; 1001 double gate_leakage_L2 = 0.0; 1002 1003 if (flag_L2_gate == 2) 1004 { 1005 leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); 1006 gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); 1007 } 1008 else if (flag_L2_gate == 3) 1009 { 1010 leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); 1011 gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); 1012 } 1013 1014 for (int i = 1; i < number_gates_L2; ++i) 1015 { 1016 leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); 1017 gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); 1018 } 1019 leakage_L2 *= num_L2; 1020 gate_leakage_L2 *= num_L2; 1021 1022 power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd; 1023 power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd; 1024 power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd; 1025 1026 power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd; 1027 power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd; 1028 power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; 1029 } 1030} 1031 1032PredecBlkDrv::PredecBlkDrv( 1033 int way_select_, 1034 PredecBlk * blk_, 1035 bool is_dram) 1036 :flag_driver_exists(0), 1037 number_gates_nand2_path(0), 1038 number_gates_nand3_path(0), 1039 min_number_gates(2), 1040 num_buffers_driving_1_nand2_load(0), 1041 num_buffers_driving_2_nand2_load(0), 1042 num_buffers_driving_4_nand2_load(0), 1043 num_buffers_driving_2_nand3_load(0), 1044 num_buffers_driving_8_nand3_load(0), 1045 num_buffers_nand3_path(0), 1046 c_load_nand2_path_out(0), 1047 c_load_nand3_path_out(0), 1048 r_load_nand2_path_out(0), 1049 r_load_nand3_path_out(0), 1050 delay_nand2_path(0), 1051 delay_nand3_path(0), 1052 power_nand2_path(), 1053 power_nand3_path(), 1054 blk(blk_), dec(blk->dec), 1055 is_dram_(is_dram), 1056 way_select(way_select_) 1057{ 1058 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) 1059 { 1060 width_nand2_path_n[i] = 0; 1061 width_nand2_path_p[i] = 0; 1062 width_nand3_path_n[i] = 0; 1063 width_nand3_path_p[i] = 0; 1064 } 1065 1066 number_input_addr_bits = blk->number_input_addr_bits; 1067 1068 if (way_select > 1) 1069 { 1070 flag_driver_exists = 1; 1071 number_input_addr_bits = way_select; 1072 if (dec->num_in_signals == 2) 1073 { 1074 c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); 1075 num_buffers_driving_2_nand2_load = number_input_addr_bits; 1076 } 1077 else if (dec->num_in_signals == 3) 1078 { 1079 c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); 1080 num_buffers_driving_2_nand3_load = number_input_addr_bits; 1081 } 1082 } 1083 else if (way_select == 0) 1084 { 1085 if (blk->exist) 1086 { 1087 flag_driver_exists = 1; 1088 } 1089 } 1090 1091 compute_widths(); 1092 compute_area(); 1093} 1094 1095 1096 1097void PredecBlkDrv::compute_widths() 1098{ 1099 // The predecode block driver accepts as input the address bits from the h-tree network. For 1100 // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of 1101 // inversion to generate addrbar and simply treat addrbar as addr. 1102 1103 double F; 1104 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); 1105 1106 if (flag_driver_exists) 1107 { 1108 double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_); 1109 double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_); 1110 1111 if (way_select == 0) 1112 { 1113 if (blk->number_input_addr_bits == 1) 1114 { //2 NAND2 gates 1115 num_buffers_driving_2_nand2_load = 1; 1116 c_load_nand2_path_out = 2 * C_nand2_gate_blk; 1117 } 1118 else if (blk->number_input_addr_bits == 2) 1119 { //4 NAND2 gates one 2-4 decoder 1120 num_buffers_driving_4_nand2_load = 2; 1121 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1122 } 1123 else if (blk->number_input_addr_bits == 3) 1124 { //8 NAND3 gates one 3-8 decoder 1125 num_buffers_driving_8_nand3_load = 3; 1126 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1127 } 1128 else if (blk->number_input_addr_bits == 4) 1129 { //4 + 4 NAND2 gates two 2-4 decoder 1130 num_buffers_driving_4_nand2_load = 4; 1131 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1132 } 1133 else if (blk->number_input_addr_bits == 5) 1134 { //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 decoder 1135 num_buffers_driving_4_nand2_load = 2; 1136 num_buffers_driving_8_nand3_load = 3; 1137 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1138 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1139 } 1140 else if (blk->number_input_addr_bits == 6) 1141 { //8 + 8 NAND3 gates two 3-8 decoder 1142 num_buffers_driving_8_nand3_load = 6; 1143 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1144 } 1145 else if (blk->number_input_addr_bits == 7) 1146 { //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 decoder 1147 num_buffers_driving_4_nand2_load = 4; 1148 num_buffers_driving_8_nand3_load = 3; 1149 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1150 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1151 } 1152 else if (blk->number_input_addr_bits == 8) 1153 { //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 decoder 1154 num_buffers_driving_4_nand2_load = 2; 1155 num_buffers_driving_8_nand3_load = 6; 1156 c_load_nand2_path_out = 4 * C_nand2_gate_blk; 1157 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1158 } 1159 else if (blk->number_input_addr_bits == 9) 1160 { //8 + 8 + 8 NAND3 gates three 3-8 decoder 1161 num_buffers_driving_8_nand3_load = 9; 1162 c_load_nand3_path_out = 8 * C_nand3_gate_blk; 1163 } 1164 } 1165 1166 if ((blk->flag_two_unique_paths) || 1167 (blk->number_inputs_L1_gate == 2) || 1168 (number_input_addr_bits == 0) || 1169 ((way_select)&&(dec->num_in_signals == 2))) 1170 { //this means that way_select is driving NAND2 in decoder. 1171 width_nand2_path_n[0] = g_tp.min_w_nmos_; 1172 width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0]; 1173 F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_); 1174 number_gates_nand2_path = logical_effort( 1175 min_number_gates, 1176 1, 1177 F, 1178 width_nand2_path_n, 1179 width_nand2_path_p, 1180 c_load_nand2_path_out, 1181 p_to_n_sz_ratio, 1182 is_dram_, false, g_tp.max_w_nmos_); 1183 } 1184 1185 if ((blk->flag_two_unique_paths) || 1186 (blk->number_inputs_L1_gate == 3) || 1187 ((way_select)&&(dec->num_in_signals == 3))) 1188 { //this means that way_select is driving NAND3 in decoder. 1189 width_nand3_path_n[0] = g_tp.min_w_nmos_; 1190 width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0]; 1191 F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_); 1192 number_gates_nand3_path = logical_effort( 1193 min_number_gates, 1194 1, 1195 F, 1196 width_nand3_path_n, 1197 width_nand3_path_p, 1198 c_load_nand3_path_out, 1199 p_to_n_sz_ratio, 1200 is_dram_, false, g_tp.max_w_nmos_); 1201 } 1202 } 1203} 1204 1205 1206 1207void PredecBlkDrv::compute_area() 1208{ 1209 double area_nand2_path = 0; 1210 double area_nand3_path = 0; 1211 double leak_nand2_path = 0; 1212 double leak_nand3_path = 0; 1213 double gate_leak_nand2_path = 0; 1214 double gate_leak_nand3_path = 0; 1215 1216 if (flag_driver_exists) 1217 { // first check whether a predecoder block driver is needed 1218 for (int i = 0; i < number_gates_nand2_path; ++i) 1219 { 1220 area_nand2_path += compute_gate_area(INV, 1, width_nand2_path_p[i], width_nand2_path_n[i], g_tp.cell_h_def); 1221 leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); 1222 gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); 1223 } 1224 area_nand2_path *= (num_buffers_driving_1_nand2_load + 1225 num_buffers_driving_2_nand2_load + 1226 num_buffers_driving_4_nand2_load); 1227 leak_nand2_path *= (num_buffers_driving_1_nand2_load + 1228 num_buffers_driving_2_nand2_load + 1229 num_buffers_driving_4_nand2_load); 1230 gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load + 1231 num_buffers_driving_2_nand2_load + 1232 num_buffers_driving_4_nand2_load); 1233 1234 for (int i = 0; i < number_gates_nand3_path; ++i) 1235 { 1236 area_nand3_path += compute_gate_area(INV, 1, width_nand3_path_p[i], width_nand3_path_n[i], g_tp.cell_h_def); 1237 leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); 1238 gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); 1239 } 1240 area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); 1241 leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); 1242 gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); 1243 1244 power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; 1245 power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; 1246 power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; 1247 power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; 1248 area.set_area(area_nand2_path + area_nand3_path); 1249 } 1250} 1251 1252 1253 1254pair<double, double> PredecBlkDrv::compute_delays( 1255 double inrisetime_nand2_path, 1256 double inrisetime_nand3_path) 1257{ 1258 pair<double, double> ret_val; 1259 ret_val.first = 0; // outrisetime_nand2_path 1260 ret_val.second = 0; // outrisetime_nand3_path 1261 int i; 1262 double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay; 1263 double Vdd = g_tp.peri_global.Vdd; 1264 1265 if (flag_driver_exists) 1266 { 1267 for (i = 0; i < number_gates_nand2_path - 1; ++i) 1268 { 1269 rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); 1270 c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_); 1271 c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1272 drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1273 tf = rd * (c_intrinsic + c_gate_load); 1274 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 1275 delay_nand2_path += this_delay; 1276 inrisetime_nand2_path = this_delay / (1.0 - 0.5); 1277 power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd; 1278 } 1279 1280 // Final inverter drives the predecoder block or the decoder output load 1281 if (number_gates_nand2_path != 0) 1282 { 1283 i = number_gates_nand2_path - 1; 1284 rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); 1285 c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1286 drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1287 c_load = c_load_nand2_path_out; 1288 tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out*c_load/ 2; 1289 this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); 1290 delay_nand2_path += this_delay; 1291 ret_val.first = this_delay / (1.0 - 0.5); 1292 power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd; 1293// cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <<endl; 1294 } 1295 1296 for (i = 0; i < number_gates_nand3_path - 1; ++i) 1297 { 1298 rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_); 1299 c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_); 1300 c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1301 drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1302 tf = rd * (c_intrinsic + c_gate_load); 1303 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 1304 delay_nand3_path += this_delay; 1305 inrisetime_nand3_path = this_delay / (1.0 - 0.5); 1306 power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd; 1307 } 1308 1309 // Final inverter drives the predecoder block or the decoder output load 1310 if (number_gates_nand3_path != 0) 1311 { 1312 i = number_gates_nand3_path - 1; 1313 rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_); 1314 c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1315 drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1316 c_load = c_load_nand3_path_out; 1317 tf = rd*(c_intrinsic + c_load) + r_load_nand3_path_out*c_load / 2; 1318 this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); 1319 delay_nand3_path += this_delay; 1320 ret_val.second = this_delay / (1.0 - 0.5); 1321 power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd; 1322 } 1323 } 1324 return ret_val; 1325} 1326 1327 1328double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir) 1329{ 1330 return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic + 1331 num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir; 1332} 1333 1334 1335 1336Predec::Predec( 1337 PredecBlkDrv * drv1_, 1338 PredecBlkDrv * drv2_) 1339:blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) 1340{ 1341 driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + 1342 drv1->power_nand3_path.readOp.leakage + 1343 drv2->power_nand2_path.readOp.leakage + 1344 drv2->power_nand3_path.readOp.leakage; 1345 block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + 1346 blk1->power_nand3_path.readOp.leakage + 1347 blk1->power_L2.readOp.leakage + 1348 blk2->power_nand2_path.readOp.leakage + 1349 blk2->power_nand3_path.readOp.leakage + 1350 blk2->power_L2.readOp.leakage; 1351 power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; 1352 1353 driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + 1354 drv1->power_nand3_path.readOp.gate_leakage + 1355 drv2->power_nand2_path.readOp.gate_leakage + 1356 drv2->power_nand3_path.readOp.gate_leakage; 1357 block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + 1358 blk1->power_nand3_path.readOp.gate_leakage + 1359 blk1->power_L2.readOp.gate_leakage + 1360 blk2->power_nand2_path.readOp.gate_leakage + 1361 blk2->power_nand3_path.readOp.gate_leakage + 1362 blk2->power_L2.readOp.gate_leakage; 1363 power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; 1364} 1365 1366void PredecBlkDrv::leakage_feedback(double temperature) 1367{ 1368 double leak_nand2_path = 0; 1369 double leak_nand3_path = 0; 1370 double gate_leak_nand2_path = 0; 1371 double gate_leak_nand3_path = 0; 1372 1373 if (flag_driver_exists) 1374 { // first check whether a predecoder block driver is needed 1375 for (int i = 0; i < number_gates_nand2_path; ++i) 1376 { 1377 leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); 1378 gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); 1379 } 1380 leak_nand2_path *= (num_buffers_driving_1_nand2_load + 1381 num_buffers_driving_2_nand2_load + 1382 num_buffers_driving_4_nand2_load); 1383 gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load + 1384 num_buffers_driving_2_nand2_load + 1385 num_buffers_driving_4_nand2_load); 1386 1387 for (int i = 0; i < number_gates_nand3_path; ++i) 1388 { 1389 leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); 1390 gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); 1391 } 1392 leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); 1393 gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); 1394 1395 power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; 1396 power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; 1397 power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; 1398 power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; 1399 } 1400} 1401 1402double Predec::compute_delays(double inrisetime) 1403{ 1404 // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block. 1405 pair<double, double> tmp_pair1, tmp_pair2; 1406 tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime); 1407 tmp_pair1 = blk1->compute_delays(tmp_pair1); 1408 tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime); 1409 tmp_pair2 = blk2->compute_delays(tmp_pair2); 1410 tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2); 1411 1412 driver_power.readOp.dynamic = 1413 drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic + 1414 drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic + 1415 drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic + 1416 drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic; 1417 1418 block_power.readOp.dynamic = 1419 blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + 1420 blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + 1421 blk1->power_L2.readOp.dynamic + 1422 blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + 1423 blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + 1424 blk2->power_L2.readOp.dynamic; 1425 1426 power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic; 1427 1428 delay = tmp_pair1.first; 1429 return tmp_pair1.second; 1430} 1431 1432 1433void Predec::leakage_feedback(double temperature) 1434{ 1435 drv1->leakage_feedback(temperature); 1436 drv2->leakage_feedback(temperature); 1437 blk1->leakage_feedback(temperature); 1438 blk2->leakage_feedback(temperature); 1439 1440 driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + 1441 drv1->power_nand3_path.readOp.leakage + 1442 drv2->power_nand2_path.readOp.leakage + 1443 drv2->power_nand3_path.readOp.leakage; 1444 block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + 1445 blk1->power_nand3_path.readOp.leakage + 1446 blk1->power_L2.readOp.leakage + 1447 blk2->power_nand2_path.readOp.leakage + 1448 blk2->power_nand3_path.readOp.leakage + 1449 blk2->power_L2.readOp.leakage; 1450 power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; 1451 1452 driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + 1453 drv1->power_nand3_path.readOp.gate_leakage + 1454 drv2->power_nand2_path.readOp.gate_leakage + 1455 drv2->power_nand3_path.readOp.gate_leakage; 1456 block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + 1457 blk1->power_nand3_path.readOp.gate_leakage + 1458 blk1->power_L2.readOp.gate_leakage + 1459 blk2->power_nand2_path.readOp.gate_leakage + 1460 blk2->power_nand3_path.readOp.gate_leakage + 1461 blk2->power_L2.readOp.gate_leakage; 1462 power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; 1463} 1464 1465// returns <delay, risetime> 1466pair<double, double> Predec::get_max_delay_before_decoder( 1467 pair<double, double> input_pair1, 1468 pair<double, double> input_pair2) 1469{ 1470 pair<double, double> ret_val; 1471 double delay; 1472 1473 delay = drv1->delay_nand2_path + blk1->delay_nand2_path; 1474 ret_val.first = delay; 1475 ret_val.second = input_pair1.first; 1476 delay = drv1->delay_nand3_path + blk1->delay_nand3_path; 1477 if (ret_val.first < delay) 1478 { 1479 ret_val.first = delay; 1480 ret_val.second = input_pair1.second; 1481 } 1482 delay = drv2->delay_nand2_path + blk2->delay_nand2_path; 1483 if (ret_val.first < delay) 1484 { 1485 ret_val.first = delay; 1486 ret_val.second = input_pair2.first; 1487 } 1488 delay = drv2->delay_nand3_path + blk2->delay_nand3_path; 1489 if (ret_val.first < delay) 1490 { 1491 ret_val.first = delay; 1492 ret_val.second = input_pair2.second; 1493 } 1494 1495 return ret_val; 1496} 1497 1498 1499 1500Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram) 1501:number_gates(0), 1502 min_number_gates(2), 1503 c_gate_load(c_gate_load_), 1504 c_wire_load(c_wire_load_), 1505 r_wire_load(r_wire_load_), 1506 delay(0), 1507 power(), 1508 is_dram_(is_dram) 1509{ 1510 for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) 1511 { 1512 width_n[i] = 0; 1513 width_p[i] = 0; 1514 } 1515 1516 compute_widths(); 1517} 1518 1519 1520void Driver::compute_widths() 1521{ 1522 double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); 1523 double c_load = c_gate_load + c_wire_load; 1524 width_n[0] = g_tp.min_w_nmos_; 1525 width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; 1526 1527 double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_); 1528 number_gates = logical_effort( 1529 min_number_gates, 1530 1, 1531 F, 1532 width_n, 1533 width_p, 1534 c_load, 1535 p_to_n_sz_ratio, 1536 is_dram_, false, 1537 g_tp.max_w_nmos_); 1538} 1539 1540 1541 1542double Driver::compute_delay(double inrisetime) 1543{ 1544 int i; 1545 double rd, c_load, c_intrinsic, tf; 1546 double this_delay = 0; 1547 1548 for (i = 0; i < number_gates - 1; ++i) 1549 { 1550 rd = tr_R_on(width_n[i], NCH, 1, is_dram_); 1551 c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_); 1552 c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1553 drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1554 tf = rd * (c_intrinsic + c_load); 1555 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 1556 delay += this_delay; 1557 inrisetime = this_delay / (1.0 - 0.5); 1558 power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 1559 power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd; 1560 power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; 1561 } 1562 1563 i = number_gates - 1; 1564 c_load = c_gate_load + c_wire_load; 1565 rd = tr_R_on(width_n[i], NCH, 1, is_dram_); 1566 c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + 1567 drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); 1568 tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load); 1569 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); 1570 delay += this_delay; 1571 power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; 1572 power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd; 1573 power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; 1574 1575 return this_delay / (1.0 - 0.5); 1576} 1577 1578