4a5
> * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
28c29
< * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
---
> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42,46c43,46
< ):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt)
< {
< min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
< Vdd = dt->Vdd;
< CB_ADJ = 1;
---
> ): n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) {
> min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
> Vdd = dt->Vdd;
> CB_ADJ = 1;
49c49
< Crossbar::~Crossbar(){}
---
> Crossbar::~Crossbar() {}
51,52c51
< double Crossbar::output_buffer()
< {
---
> double Crossbar::output_buffer() {
54,62c53,62
< //Wire winit(4, 4);
< double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
< Wire w1(g_ip->wt, l_eff);
< //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
< double s1 = w1.repeater_size * (l_eff <w1.repeater_spacing? l_eff *ADJ/w1.repeater_spacing : ADJ);
< double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
< // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
< TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
< TriS2 = s1; //driver transistor
---
> //Wire winit(4, 4);
> double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch;
> Wire w1(g_ip->wt, l_eff);
> //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
> double s1 = w1.repeater_size * (l_eff < w1.repeater_spacing ?
> l_eff * ADJ / w1.repeater_spacing : ADJ);
> double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
> // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
> TriS1 = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size);
> TriS2 = s1; //driver transistor
64,65c64,65
< if (TriS1 < 1)
< TriS1 = 1;
---
> if (TriS1 < 1)
> TriS1 = 1;
67,68c67,68
< double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) +
< gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0);
---
> double input_cap = gate_C(TriS1 * (2 * min_w_pmos + g_tp.min_w_nmos_), 0) +
> gate_C(TriS1 * (min_w_pmos + 2 * g_tp.min_w_nmos_), 0);
75,83c75,84
< tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
< drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
< gate_C(TriS2*g_tp.min_w_nmos_, 0)+
< drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
< drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
< gate_C(TriS2*min_w_pmos, 0);
< double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
< drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
< double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0);
---
> tri_int_cap = drain_C_(TriS1 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
> drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 +
> gate_C(TriS2 * g_tp.min_w_nmos_, 0) +
> drain_C_(TriS1 * min_w_pmos, NCH, 1, 1, g_tp.cell_h_def) * 2 +
> drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
> gate_C(TriS2 * min_w_pmos, 0);
> double output_cap = drain_C_(TriS2 * g_tp.min_w_nmos_, NCH, 1, 1,
> g_tp.cell_h_def) +
> drain_C_(TriS2 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
> double ctr_cap = gate_C(TriS2 * (min_w_pmos + g_tp.min_w_nmos_), 0);
85,88c86,89
< tri_inp_cap = input_cap;
< tri_out_cap = output_cap;
< tri_ctr_cap = ctr_cap;
< return input_cap + output_cap + ctr_cap;
---
> tri_inp_cap = input_cap;
> tri_out_cap = output_cap;
> tri_ctr_cap = ctr_cap;
> return input_cap + output_cap + ctr_cap;
91,92c92
< void Crossbar::compute_power()
< {
---
> void Crossbar::compute_power() {
94,106c94,110
< Wire winit(4, 4);
< double tri_cap = output_buffer();
< assert(tri_cap > 0);
< //area of a tristate logic
< double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def);
< g_area *= 2; // to model area of output transistors
< g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def);
< g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def);
< double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def);
< // effective no. of tristate buffers that need to be laid side by side
< int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch));
< double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out);
< Wire w1(g_ip->wt, wire_len);
---
> Wire winit(4, 4);
> double tri_cap = output_buffer();
> assert(tri_cap > 0);
> //area of a tristate logic
> double g_area = compute_gate_area(INV, 1, TriS2 * g_tp.min_w_nmos_,
> TriS2 * min_w_pmos, g_tp.cell_h_def);
> g_area *= 2; // to model area of output transistors
> g_area += compute_gate_area (NAND, 2, TriS1 * 2 * g_tp.min_w_nmos_,
> TriS1 * min_w_pmos, g_tp.cell_h_def);
> g_area += compute_gate_area (NOR, 2, TriS1 * g_tp.min_w_nmos_,
> TriS1 * 2 * min_w_pmos, g_tp.cell_h_def);
> double width /*per tristate*/ = g_area / (CB_ADJ * g_tp.cell_h_def);
> // effective no. of tristate buffers that need to be laid side by side
> int ntri = (int)ceil(g_tp.cell_h_def / (g_tp.wire_outside_mat.pitch));
> double wire_len = MAX(width * ntri * n_out,
> flit_size * g_tp.wire_outside_mat.pitch * n_out);
> Wire w1(g_ip->wt, wire_len);
108,110c112,114
< area.w = wire_len;
< area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ;
< Wire w2(g_ip->wt, area.h);
---
> area.w = wire_len;
> area.h = g_tp.wire_outside_mat.pitch * n_inp * flit_size * CB_ADJ;
> Wire w2(g_ip->wt, area.h);
112,113c116,117
< double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp);
< if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb;
---
> double aspect_ratio_cb = (area.h / area.w) * (n_out / n_inp);
> if (aspect_ratio_cb > 1) aspect_ratio_cb = 1 / aspect_ratio_cb;
115,121c119,126
< if (aspect_ratio_cb < ASPECT_THRESHOLD) {
< if (n_out > 2 && n_inp > 2) {
< CB_ADJ+=0.2;
< //cout << "CB ADJ " << CB_ADJ << endl;
< if (CB_ADJ < 4) {
< this->compute_power();
< }
---
> if (aspect_ratio_cb < ASPECT_THRESHOLD) {
> if (n_out > 2 && n_inp > 2) {
> CB_ADJ += 0.2;
> //cout << "CB ADJ " << CB_ADJ << endl;
> if (CB_ADJ < 4) {
> this->compute_power();
> }
> }
123d127
< }
127,137c131,150
< power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size;
< power.readOp.leakage = n_inp * n_out * flit_size * (
< cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
< cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
< cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
< w1.power.readOp.leakage + w2.power.readOp.leakage);
< power.readOp.gate_leakage = n_inp * n_out * flit_size * (
< cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
< cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
< cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
< w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
---
> power.readOp.dynamic =
> (w1.power.readOp.dynamic + w2.power.readOp.dynamic +
> (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap +
> tri_int_cap) * Vdd * Vdd) * flit_size;
> power.readOp.leakage = n_inp * n_out * flit_size * (
> cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2,
> 1, inv) * Vdd +
> cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
> 2, nand) * Vdd +
> cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
> 2, nor) * Vdd +
> w1.power.readOp.leakage + w2.power.readOp.leakage);
> power.readOp.gate_leakage = n_inp * n_out * flit_size * (
> cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2,
> 1, inv) * Vdd +
> cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
> 2, nand) * Vdd +
> cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
> 2, nor) * Vdd +
> w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
139,144c152,160
< // delay calculation
< double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
< Wire wdriver(g_ip->wt, l_eff);
< double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1);
< double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap;
< delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
---
> // delay calculation
> double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch;
> Wire wdriver(g_ip->wt, l_eff);
> double res = g_tp.wire_outside_mat.R_per_um * (area.w + area.h) +
> tr_R_on(g_tp.min_w_nmos_ * wdriver.repeater_size, NCH, 1);
> double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out *
> tri_inp_cap + n_inp * tri_out_cap;
> delay = horowitz(w1.signal_rise_time(), res * cap, deviceType->Vth /
> deviceType->Vdd, deviceType->Vth / deviceType->Vdd, RISE);
146c162
< Wire wreset();
---
> Wire wreset();
149,158c165,176
< void Crossbar::print_crossbar()
< {
< cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
< cout << "Flit size : " << flit_size << " bits" << endl;
< cout << "Width : " << area.w << " u" << endl;
< cout << "Height : " << area.h << " u" << endl;
< cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl;
< cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
< cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl;
< cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
---
> void Crossbar::print_crossbar() {
> cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
> cout << "Flit size : " << flit_size << " bits" << endl;
> cout << "Width : " << area.w << " u" << endl;
> cout << "Height : " << area.h << " u" << endl;
> cout << "Dynamic Power : " << power.readOp.dynamic*1e9 *
> MIN(n_inp, n_out) << " (nJ)" << endl;
> cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)"
> << endl;
> cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3
> << " (mW)" << endl;
> cout << "Crossbar Delay : " << delay*1e12 << " ps\n";