1/*****************************************************************************
2 *                                McPAT/CACTI
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
6 *                          All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 ***************************************************************************/
32
33
34
35
36#include <cassert>
37#include <cmath>
38#include <iostream>
39
40#include "basic_circuit.h"
41#include "parameter.h"
42
43uint32_t _log2(uint64_t num) {
44    uint32_t log2 = 0;
45
46    if (num == 0) {
47        std::cerr << "log0?" << std::endl;
48        exit(1);
49    }
50
51    while (num > 1) {
52        num = (num >> 1);
53        log2++;
54    }
55
56    return log2;
57}
58
59
60bool is_pow2(int64_t val) {
61    if (val <= 0) {
62        return false;
63    } else if (val == 1) {
64        return true;
65    } else {
66        return (_log2(val) != _log2(val - 1));
67    }
68}
69
70
71int powers (int base, int n) {
72    int i, p;
73
74    p = 1;
75    for (i = 1; i <= n; ++i)
76        p *= base;
77    return p;
78}
79
80/*----------------------------------------------------------------------*/
81
82double logtwo (double x) {
83    assert(x > 0);
84    return ((double) (log (x) / log (2.0)));
85}
86
87/*----------------------------------------------------------------------*/
88
89
90double gate_C(
91    double width,
92    double wirelength,
93    bool   _is_dram,
94    bool   _is_cell,
95    bool   _is_wl_tr) {
96    const TechnologyParameter::DeviceType * dt;
97
98    if (_is_dram && _is_cell) {
99        dt = &g_tp.dram_acc;   //DRAM cell access transistor
100    } else if (_is_dram && _is_wl_tr) {
101        dt = &g_tp.dram_wl;    //DRAM wordline transistor
102    } else if (!_is_dram && _is_cell) {
103        dt = &g_tp.sram_cell;  // SRAM cell access transistor
104    } else {
105        dt = &g_tp.peri_global;
106    }
107
108    return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
109}
110
111
112// returns gate capacitance in Farads
113// actually this function is the same as gate_C() now
114double gate_C_pass(
115    double width,       // gate width in um (length is Lphy_periph_global)
116    double wirelength,  // poly wire length going to gate in lambda
117    bool   _is_dram,
118    bool   _is_cell,
119    bool   _is_wl_tr) {
120    // v5.0
121    const TechnologyParameter::DeviceType * dt;
122
123    if ((_is_dram) && (_is_cell)) {
124        dt = &g_tp.dram_acc;   //DRAM cell access transistor
125    } else if ((_is_dram) && (_is_wl_tr)) {
126        dt = &g_tp.dram_wl;    //DRAM wordline transistor
127    } else if ((!_is_dram) && _is_cell) {
128        dt = &g_tp.sram_cell;  // SRAM cell access transistor
129    } else {
130        dt = &g_tp.peri_global;
131    }
132
133    return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
134}
135
136
137
138double drain_C_(
139    double width,
140    int nchannel,
141    int stack,
142    int next_arg_thresh_folding_width_or_height_cell,
143    double fold_dimension,
144    bool _is_dram,
145    bool _is_cell,
146    bool _is_wl_tr) {
147    double w_folded_tr;
148    const  TechnologyParameter::DeviceType * dt;
149
150    if ((_is_dram) && (_is_cell)) {
151        dt = &g_tp.dram_acc;   // DRAM cell access transistor
152    } else if ((_is_dram) && (_is_wl_tr)) {
153        dt = &g_tp.dram_wl;    // DRAM wordline transistor
154    } else if ((!_is_dram) && _is_cell) {
155        dt = &g_tp.sram_cell;  // SRAM cell access transistor
156    } else {
157        dt = &g_tp.peri_global;
158    }
159
160    double c_junc_area = dt->C_junc;
161    double c_junc_sidewall = dt->C_junc_sidewall;
162    double c_fringe    = 2 * dt->C_fringe;
163    double c_overlap   = 2 * dt->C_overlap;
164    double drain_C_metal_connecting_folded_tr = 0;
165
166    // determine the width of the transistor after folding (if it is getting folded)
167    if (next_arg_thresh_folding_width_or_height_cell == 0) {
168        // interpret fold_dimension as the the folding width threshold
169        // i.e. the value of transistor width above which the transistor gets folded
170        w_folded_tr = fold_dimension;
171    } else { // interpret fold_dimension as the height of the cell that this transistor is part of.
172        double h_tr_region  = fold_dimension - 2 * g_tp.HPOWERRAIL;
173        // TODO : w_folded_tr must come from Component::compute_gate_area()
174        double ratio_p_to_n = 2.0 / (2.0 + 1.0);
175        if (nchannel) {
176            w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
177        } else {
178            w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
179        }
180    }
181    int num_folded_tr = (int) (ceil(width / w_folded_tr));
182
183    if (num_folded_tr < 2) {
184        w_folded_tr = width;
185    }
186
187    double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +  // only for drain
188                           (stack - 1) * g_tp.spacing_poly_to_poly;
189    double drain_h_for_sidewall = w_folded_tr;
190    double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1);
191    if (num_folded_tr > 1) {
192        total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +
193                         (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly);
194
195        if (num_folded_tr % 2 == 0) {
196            drain_h_for_sidewall = 0;
197        }
198        total_drain_height_for_cap_wrt_gate *= num_folded_tr;
199        drain_C_metal_connecting_folded_tr   = g_tp.wire_local.C_per_um * total_drain_w;
200    }
201
202    double drain_C_area     = c_junc_area * total_drain_w * w_folded_tr;
203    double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w);
204    double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate;
205
206    return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr);
207}
208
209
210double tr_R_on(
211    double width,
212    int nchannel,
213    int stack,
214    bool _is_dram,
215    bool _is_cell,
216    bool _is_wl_tr) {
217    const TechnologyParameter::DeviceType * dt;
218
219    if ((_is_dram) && (_is_cell)) {
220        dt = &g_tp.dram_acc;   //DRAM cell access transistor
221    } else if ((_is_dram) && (_is_wl_tr)) {
222        dt = &g_tp.dram_wl;    //DRAM wordline transistor
223    } else if ((!_is_dram) && _is_cell) {
224        dt = &g_tp.sram_cell;  // SRAM cell access transistor
225    } else {
226        dt = &g_tp.peri_global;
227    }
228
229    double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
230    return (stack * restrans / width);
231}
232
233
234/* This routine operates in reverse: given a resistance, it finds
235 * the transistor width that would have this R.  It is used in the
236 * data wordline to estimate the wordline driver size. */
237
238// returns width in um
239double R_to_w(
240    double res,
241    int   nchannel,
242    bool _is_dram,
243    bool _is_cell,
244    bool _is_wl_tr) {
245    const TechnologyParameter::DeviceType * dt;
246
247    if ((_is_dram) && (_is_cell)) {
248        dt = &g_tp.dram_acc;   //DRAM cell access transistor
249    } else if ((_is_dram) && (_is_wl_tr)) {
250        dt = &g_tp.dram_wl;    //DRAM wordline transistor
251    } else if ((!_is_dram) && (_is_cell)) {
252        dt = &g_tp.sram_cell;  // SRAM cell access transistor
253    } else {
254        dt = &g_tp.peri_global;
255    }
256
257    double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
258    return (restrans / res);
259}
260
261
262double pmos_to_nmos_sz_ratio(
263    bool _is_dram,
264    bool _is_wl_tr) {
265    double p_to_n_sizing_ratio;
266    if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
267        p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio;
268    } else { //DRAM or SRAM all other transistors
269        p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio;
270    }
271    return p_to_n_sizing_ratio;
272}
273
274
275// "Timing Models for MOS Circuits" by Mark Horowitz, 1984
276double horowitz(
277    double inputramptime, // input rise time
278    double tf,            // time constant of gate
279    double vs1,           // threshold voltage
280    double vs2,           // threshold voltage
281    int    rise) {        // whether input rises or fall
282    if (inputramptime == 0 && vs1 == vs2) {
283        return tf * (vs1 < 1 ? -log(vs1) : log(vs1));
284    }
285    double a, b, td;
286
287    a = inputramptime / tf;
288    if (rise == RISE) {
289        b = 0.5;
290        td = tf * sqrt(log(vs1) * log(vs1) + 2 * a * b * (1.0 - vs1)) +
291            tf * (log(vs1) - log(vs2));
292    } else {
293        b = 0.4;
294        td = tf * sqrt(log(1.0 - vs1) * log(1.0 - vs1) + 2 * a * b * (vs1)) +
295            tf * (log(1.0 - vs1) - log(1.0 - vs2));
296    }
297    return (td);
298}
299
300double cmos_Ileak(
301    double nWidth,
302    double pWidth,
303    bool _is_dram,
304    bool _is_cell,
305    bool _is_wl_tr) {
306    TechnologyParameter::DeviceType * dt;
307
308    if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
309        dt = &(g_tp.sram_cell);
310    } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
311        dt = &(g_tp.dram_wl);
312    } else { //DRAM or SRAM all other transistors
313        dt = &(g_tp.peri_global);
314    }
315    return nWidth*dt->I_off_n + pWidth*dt->I_off_p;
316}
317
318
319double simplified_nmos_leakage(
320    double nwidth,
321    bool _is_dram,
322    bool _is_cell,
323    bool _is_wl_tr) {
324    TechnologyParameter::DeviceType * dt;
325
326    if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
327        dt = &(g_tp.sram_cell);
328    } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
329        dt = &(g_tp.dram_wl);
330    } else { //DRAM or SRAM all other transistors
331        dt = &(g_tp.peri_global);
332    }
333    return nwidth * dt->I_off_n;
334}
335
336int factorial(int n, int m) {
337    int fa = m, i;
338    for (i = m + 1; i <= n; i++)
339        fa *= i;
340    return fa;
341}
342
343int combination(int n, int m) {
344    int ret;
345    ret = factorial(n, m + 1) / factorial(n - m);
346    return ret;
347}
348
349double simplified_pmos_leakage(
350    double pwidth,
351    bool _is_dram,
352    bool _is_cell,
353    bool _is_wl_tr) {
354    TechnologyParameter::DeviceType * dt;
355
356    if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
357        dt = &(g_tp.sram_cell);
358    } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
359        dt = &(g_tp.dram_wl);
360    } else { //DRAM or SRAM all other transistors
361        dt = &(g_tp.peri_global);
362    }
363    return pwidth * dt->I_off_p;
364}
365
366double cmos_Ig_n(
367    double nWidth,
368    bool _is_dram,
369    bool _is_cell,
370    bool _is_wl_tr) {
371    TechnologyParameter::DeviceType * dt;
372
373    if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
374        dt = &(g_tp.sram_cell);
375    } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
376        dt = &(g_tp.dram_wl);
377    } else { //DRAM or SRAM all other transistors
378        dt = &(g_tp.peri_global);
379    }
380    return nWidth*dt->I_g_on_n;
381}
382
383double cmos_Ig_p(
384    double pWidth,
385    bool _is_dram,
386    bool _is_cell,
387    bool _is_wl_tr) {
388    TechnologyParameter::DeviceType * dt;
389
390    if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
391        dt = &(g_tp.sram_cell);
392    } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
393        dt = &(g_tp.dram_wl);
394    } else { //DRAM or SRAM all other transistors
395        dt = &(g_tp.peri_global);
396    }
397    return pWidth*dt->I_g_on_p;
398}
399
400double cmos_Isub_leakage(
401    double nWidth,
402    double pWidth,
403    int    fanin,
404    enum Gate_type g_type,
405    bool _is_dram,
406    bool _is_cell,
407    bool _is_wl_tr,
408    enum Half_net_topology topo) {
409    assert (fanin >= 1);
410    double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr);
411    double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr);
412    double Isub = 0;
413    int    num_states;
414    int    num_off_tx;
415
416    num_states = int(pow(2.0, fanin));
417
418    switch (g_type) {
419    case nmos:
420        if (fanin == 1) {
421            Isub = nmos_leak / num_states;
422        } else {
423            if (topo == parallel) {
424                //only when all tx are off, leakage power is non-zero.
425                //The possibility of this state is 1/num_states
426                Isub = nmos_leak * fanin / num_states;
427            } else {
428                for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
429                    //when num_off_tx ==0 there is no leakage power
430                    Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR,
431                                            (num_off_tx - 1)) *
432                        combination(fanin, num_off_tx);
433                }
434                Isub /= num_states;
435            }
436
437        }
438        break;
439    case pmos:
440        if (fanin == 1) {
441            Isub = pmos_leak / num_states;
442        } else {
443            if (topo == parallel) {
444                //only when all tx are off, leakage power is non-zero.
445                //The possibility of this state is 1/num_states
446                Isub = pmos_leak * fanin / num_states;
447            } else {
448                for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
449                    //when num_off_tx ==0 there is no leakage power
450                    Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR,
451                                            (num_off_tx - 1)) *
452                        combination(fanin, num_off_tx);
453                }
454                Isub /= num_states;
455            }
456
457        }
458        break;
459    case inv:
460        Isub = (nmos_leak + pmos_leak) / 2;
461        break;
462    case nand:
463        Isub += fanin * pmos_leak;//the pullup network
464        for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
465            // the pulldown network
466            Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR,
467                                    (num_off_tx - 1)) *
468                combination(fanin, num_off_tx);
469        }
470        Isub /= num_states;
471        break;
472    case nor:
473        for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
474             // the pullup network
475            Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR,
476                                    (num_off_tx - 1)) *
477                combination(fanin, num_off_tx);
478        }
479        Isub += fanin * nmos_leak;//the pulldown network
480        Isub /= num_states;
481        break;
482    case tri:
483        Isub += (nmos_leak + pmos_leak) / 2;//enabled
484        //disabled upper bound of leakage power
485        Isub += nmos_leak * UNI_LEAK_STACK_FACTOR;
486        Isub /= 2;
487        break;
488    case tg:
489        Isub = (nmos_leak + pmos_leak) / 2;
490        break;
491    default:
492        assert(0);
493        break;
494    }
495
496    return Isub;
497}
498
499
500double cmos_Ig_leakage(
501    double nWidth,
502    double pWidth,
503    int    fanin,
504    enum Gate_type g_type,
505    bool _is_dram,
506    bool _is_cell,
507    bool _is_wl_tr,
508    enum Half_net_topology topo) {
509    assert (fanin >= 1);
510    double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr);
511    double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr);
512    double Ig_on = 0;
513    int    num_states;
514    int    num_on_tx;
515
516    num_states = int(pow(2.0, fanin));
517
518    switch (g_type) {
519    case nmos:
520        if (fanin == 1) {
521            Ig_on = nmos_leak / num_states;
522        } else {
523            if (topo == parallel) {
524                for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
525                    Ig_on += nmos_leak * combination(fanin, num_on_tx) *
526                        num_on_tx;
527                }
528            } else {
529                //pull down network when all TXs are on.
530                Ig_on += nmos_leak * fanin;
531                //num_on_tx is the number of on tx
532                for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
533                     //when num_on_tx=[1,n-1]
534                    //TODO: this is a approximation now, a precise computation
535                    //will be very complicated.
536                    Ig_on += nmos_leak * combination(fanin, num_on_tx) *
537                        num_on_tx / 2;
538                }
539                Ig_on /= num_states;
540            }
541        }
542        break;
543    case pmos:
544        if (fanin == 1) {
545            Ig_on = pmos_leak / num_states;
546        } else {
547            if (topo == parallel) {
548                for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
549                    Ig_on += pmos_leak * combination(fanin, num_on_tx) *
550                        num_on_tx;
551                }
552            } else {
553                //pull down network when all TXs are on.
554                Ig_on += pmos_leak * fanin;
555                //num_on_tx is the number of on tx
556                for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
557                    //when num_on_tx=[1,n-1]
558                    //TODO: this is a approximation now, a precise computation
559                    //will be very complicated.
560                    Ig_on += pmos_leak * combination(fanin, num_on_tx) *
561                        num_on_tx / 2;
562                }
563                Ig_on /= num_states;
564            }
565        }
566        break;
567
568    case inv:
569        Ig_on = (nmos_leak + pmos_leak) / 2;
570        break;
571    case nand:
572      //pull up network
573      //when num_on_tx=[1,n]
574      for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
575          Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx;
576      }
577
578      //pull down network
579      Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
580      //num_on_tx is the number of on tx
581      for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
582          //when num_on_tx=[1,n-1]
583          //TODO: this is a approximation now, a precise computation will be
584          //very complicated.
585          Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2;
586        }
587        Ig_on /= num_states;
588        break;
589    case nor:
590        // num_on_tx is the number of on tx in pull up network
591        Ig_on += pmos_leak * fanin;//pull up network when all TXs are on.
592        for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
593            Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2;
594
595        }
596        //pull down network
597        for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
598            //when num_on_tx=[1,n]
599            Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx;
600        }
601        Ig_on /= num_states;
602        break;
603    case tri:
604        Ig_on += (2 * nmos_leak + 2 * pmos_leak) / 2;//enabled
605        //disabled upper bound of leakage power
606        Ig_on += (nmos_leak + pmos_leak) / 2;
607        Ig_on /= 2;
608        break;
609    case tg:
610        Ig_on = (nmos_leak + pmos_leak) / 2;
611        break;
612    default:
613        assert(0);
614        break;
615    }
616
617    return Ig_on;
618}
619
620double shortcircuit_simple(
621    double vt,
622    double velocity_index,
623    double c_in,
624    double c_out,
625    double w_nmos,
626    double w_pmos,
627    double i_on_n,
628    double i_on_p,
629    double i_on_n_in,
630    double i_on_p_in,
631    double vdd) {
632
633    double p_short_circuit, p_short_circuit_discharge, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
634    double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
635
636    fo_n	= i_on_n / i_on_n_in;
637    fo_p	= i_on_p / i_on_p_in;
638    fanout	= c_out / c_in;
639    beta_ratio = i_on_p / i_on_n;
640    vt_to_vdd_ratio = vt / vdd;
641
642    //p_short_circuit_discharge_low 	= 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
643    p_short_circuit_discharge_low =
644        10 / 3 * (pow(((vdd - vt) - vt_to_vdd_ratio), 3.0) /
645                  pow(velocity_index, 2.0) / pow(2.0, 3 * vt_to_vdd_ratio *
646                                                 vt_to_vdd_ratio)) * c_in *
647        vdd * vdd * fo_p * fo_p / fanout / beta_ratio;
648    p_short_circuit_charge_low =
649        10 / 3 * (pow(((vdd - vt) - vt_to_vdd_ratio), 3.0) /
650                  pow(velocity_index, 2.0) / pow(2.0, 3 * vt_to_vdd_ratio *
651                                                 vt_to_vdd_ratio)) * c_in *
652        vdd * vdd * fo_n * fo_n / fanout * beta_ratio;
653//	double t1, t2, t3, t4, t5;
654//	t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
655//	t2=pow(velocity_index,2.0);
656//	t3=pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio);
657//	t4=t1/t2/t3;
658//	cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
659
660    p_short_circuit_discharge_high =
661        pow(((vdd - vt) - vt_to_vdd_ratio), 1.5) * c_in * vdd * vdd *
662        fo_p / 10 / pow(2, 3 * vt_to_vdd_ratio + 2 * velocity_index);
663    p_short_circuit_charge_high = pow(((vdd - vt) - vt_to_vdd_ratio), 1.5) *
664        c_in * vdd * vdd * fo_n / 10 / pow(2, 3 * vt_to_vdd_ratio + 2 *
665                                           velocity_index);
666
667//	t1=pow(((vdd-vt)-vt_to_vdd_ratio),1.5);
668//	t2=pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
669//	t3=t1/t2;
670//	cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
671//	p_short_circuit_discharge = 1.0/(1.0/p_short_circuit_discharge_low + 1.0/p_short_circuit_discharge_high);
672//	p_short_circuit_charge = 1/(1/p_short_circuit_charge_low + 1/p_short_circuit_charge_high); //harmmoic mean cannot be applied simple formulas.
673
674    p_short_circuit_discharge = p_short_circuit_discharge_low;
675    p_short_circuit_charge = p_short_circuit_charge_low;
676    p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge) / 2;
677
678    return (p_short_circuit);
679}
680
681double shortcircuit(
682    double vt,
683    double velocity_index,
684    double c_in,
685    double c_out,
686    double w_nmos,
687    double w_pmos,
688    double i_on_n,
689    double i_on_p,
690    double i_on_n_in,
691    double i_on_p_in,
692    double vdd) {
693
694    //this is actually energy
695    double p_short_circuit = 0, p_short_circuit_discharge;
696    double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
697    double f_alpha, k_v, e, g_v_alpha, h_v_alpha;
698
699    fo_n = i_on_n / i_on_n_in;
700    fo_p = i_on_p / i_on_p_in;
701    fanout = 1;
702    beta_ratio = i_on_p / i_on_n;
703    vt_to_vdd_ratio = vt / vdd;
704    e =	2.71828;
705    f_alpha	= 1 / (velocity_index + 2) - velocity_index /
706        (2 * (velocity_index + 3)) + velocity_index / (velocity_index + 4) *
707        (velocity_index / 2 - 1);
708    k_v	= 0.9 / 0.8 + (vdd - vt) / 0.8 * log(10 * (vdd - vt) / e);
709    g_v_alpha =	(velocity_index + 1) *
710        pow((1 - velocity_index), velocity_index) *
711        pow((1 - velocity_index), velocity_index / 2) / f_alpha /
712        pow((1 - velocity_index - velocity_index),
713            (velocity_index / 2 + velocity_index + 2));
714    h_v_alpha	=   pow(2, velocity_index) * (velocity_index + 1) *
715        pow((1 - velocity_index), velocity_index) /
716        pow((1 - velocity_index - velocity_index), (velocity_index + 1));
717
718    //p_short_circuit_discharge_low 	= 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
719//	p_short_circuit_discharge_low 	= 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
720//	p_short_circuit_charge_low 		= 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_n*fo_n/fanout*beta_ratio;
721//	double t1, t2, t3, t4, t5;
722//	t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
723//	t2=pow(velocity_index,2.0);
724//	t3=pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio);
725//	t4=t1/t2/t3;
726//
727//	cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
728//
729//
730//	p_short_circuit_discharge_high 	= pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_p/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
731//	p_short_circuit_charge_high 	= pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_n/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
732//
733//	p_short_circuit_discharge = 1.0/(1.0/p_short_circuit_discharge_low + 1.0/p_short_circuit_discharge_high);
734//	p_short_circuit_charge = 1/(1/p_short_circuit_charge_low + 1/p_short_circuit_charge_high);
735//
736//	p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge)/2;
737//
738//	p_short_circuit = p_short_circuit_discharge;
739
740    p_short_circuit_discharge = k_v * vdd * vdd * c_in * fo_p * fo_p /
741        ((vdd - vt) * g_v_alpha * fanout * beta_ratio / 2 / k_v + h_v_alpha *
742         fo_p);
743    return (p_short_circuit);
744}
745