1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
| 1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
|
| 5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
5 * All Rights Reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer; 11 * redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution; 14 * neither the name of the copyright holders nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
| 6 * All Rights Reserved 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are 10 * met: redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer; 12 * redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution; 15 * neither the name of the copyright holders nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
| 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29 * 30 ***************************************************************************/ 31 32#include "crossbar.h" 33 34#define ASPECT_THRESHOLD .8 35#define ADJ 1 36 37Crossbar::Crossbar( 38 double n_inp_, 39 double n_out_, 40 double flit_size_, 41 TechnologyParameter::DeviceType *dt
| 30 * 31 ***************************************************************************/ 32 33#include "crossbar.h" 34 35#define ASPECT_THRESHOLD .8 36#define ADJ 1 37 38Crossbar::Crossbar( 39 double n_inp_, 40 double n_out_, 41 double flit_size_, 42 TechnologyParameter::DeviceType *dt
|
42 ):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) 43{ 44 min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; 45 Vdd = dt->Vdd; 46 CB_ADJ = 1;
| 43): n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) { 44 min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; 45 Vdd = dt->Vdd; 46 CB_ADJ = 1;
|
47} 48
| 47} 48
|
49Crossbar::~Crossbar(){}
| 49Crossbar::~Crossbar() {}
|
50
| 50
|
51double Crossbar::output_buffer() 52{
| 51double Crossbar::output_buffer() {
|
53
| 52
|
54 //Wire winit(4, 4); 55 double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch; 56 Wire w1(g_ip->wt, l_eff); 57 //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing; 58 double s1 = w1.repeater_size * (l_eff <w1.repeater_spacing? l_eff *ADJ/w1.repeater_spacing : ADJ); 59 double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; 60 // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor 61 TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size); 62 TriS2 = s1; //driver transistor
| 53 //Wire winit(4, 4); 54 double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch; 55 Wire w1(g_ip->wt, l_eff); 56 //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing; 57 double s1 = w1.repeater_size * (l_eff < w1.repeater_spacing ? 58 l_eff * ADJ / w1.repeater_spacing : ADJ); 59 double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; 60 // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor 61 TriS1 = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size); 62 TriS2 = s1; //driver transistor
|
63
| 63
|
64 if (TriS1 < 1) 65 TriS1 = 1;
| 64 if (TriS1 < 1) 65 TriS1 = 1;
|
66
| 66
|
67 double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) + 68 gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0);
| 67 double input_cap = gate_C(TriS1 * (2 * min_w_pmos + g_tp.min_w_nmos_), 0) + 68 gate_C(TriS1 * (min_w_pmos + 2 * g_tp.min_w_nmos_), 0);
|
69// input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + 70// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + 71// gate_C(TriS2*g_tp.min_w_nmos_, 0)+ 72// drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 + 73// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + 74// gate_C(TriS2*min_w_pmos, 0);
| 69// input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + 70// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + 71// gate_C(TriS2*g_tp.min_w_nmos_, 0)+ 72// drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 + 73// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + 74// gate_C(TriS2*min_w_pmos, 0);
|
75 tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + 76 drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + 77 gate_C(TriS2*g_tp.min_w_nmos_, 0)+ 78 drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 + 79 drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + 80 gate_C(TriS2*min_w_pmos, 0); 81 double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + 82 drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def); 83 double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0);
| 75 tri_int_cap = drain_C_(TriS1 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + 76 drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 + 77 gate_C(TriS2 * g_tp.min_w_nmos_, 0) + 78 drain_C_(TriS1 * min_w_pmos, NCH, 1, 1, g_tp.cell_h_def) * 2 + 79 drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + 80 gate_C(TriS2 * min_w_pmos, 0); 81 double output_cap = drain_C_(TriS2 * g_tp.min_w_nmos_, NCH, 1, 1, 82 g_tp.cell_h_def) + 83 drain_C_(TriS2 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def); 84 double ctr_cap = gate_C(TriS2 * (min_w_pmos + g_tp.min_w_nmos_), 0);
|
84
| 85
|
85 tri_inp_cap = input_cap; 86 tri_out_cap = output_cap; 87 tri_ctr_cap = ctr_cap; 88 return input_cap + output_cap + ctr_cap;
| 86 tri_inp_cap = input_cap; 87 tri_out_cap = output_cap; 88 tri_ctr_cap = ctr_cap; 89 return input_cap + output_cap + ctr_cap;
|
89} 90
| 90} 91
|
91void Crossbar::compute_power() 92{
| 92void Crossbar::compute_power() {
|
93
| 93
|
94 Wire winit(4, 4); 95 double tri_cap = output_buffer(); 96 assert(tri_cap > 0); 97 //area of a tristate logic 98 double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def); 99 g_area *= 2; // to model area of output transistors 100 g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def); 101 g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def); 102 double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def); 103 // effective no. of tristate buffers that need to be laid side by side 104 int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch)); 105 double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out); 106 Wire w1(g_ip->wt, wire_len);
| 94 Wire winit(4, 4); 95 double tri_cap = output_buffer(); 96 assert(tri_cap > 0); 97 //area of a tristate logic 98 double g_area = compute_gate_area(INV, 1, TriS2 * g_tp.min_w_nmos_, 99 TriS2 * min_w_pmos, g_tp.cell_h_def); 100 g_area *= 2; // to model area of output transistors 101 g_area += compute_gate_area (NAND, 2, TriS1 * 2 * g_tp.min_w_nmos_, 102 TriS1 * min_w_pmos, g_tp.cell_h_def); 103 g_area += compute_gate_area (NOR, 2, TriS1 * g_tp.min_w_nmos_, 104 TriS1 * 2 * min_w_pmos, g_tp.cell_h_def); 105 double width /*per tristate*/ = g_area / (CB_ADJ * g_tp.cell_h_def); 106 // effective no. of tristate buffers that need to be laid side by side 107 int ntri = (int)ceil(g_tp.cell_h_def / (g_tp.wire_outside_mat.pitch)); 108 double wire_len = MAX(width * ntri * n_out, 109 flit_size * g_tp.wire_outside_mat.pitch * n_out); 110 Wire w1(g_ip->wt, wire_len);
|
107
| 111
|
108 area.w = wire_len; 109 area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ; 110 Wire w2(g_ip->wt, area.h);
| 112 area.w = wire_len; 113 area.h = g_tp.wire_outside_mat.pitch * n_inp * flit_size * CB_ADJ; 114 Wire w2(g_ip->wt, area.h);
|
111
| 115
|
112 double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp); 113 if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb;
| 116 double aspect_ratio_cb = (area.h / area.w) * (n_out / n_inp); 117 if (aspect_ratio_cb > 1) aspect_ratio_cb = 1 / aspect_ratio_cb;
|
114
| 118
|
115 if (aspect_ratio_cb < ASPECT_THRESHOLD) { 116 if (n_out > 2 && n_inp > 2) { 117 CB_ADJ+=0.2; 118 //cout << "CB ADJ " << CB_ADJ << endl; 119 if (CB_ADJ < 4) { 120 this->compute_power(); 121 }
| 119 if (aspect_ratio_cb < ASPECT_THRESHOLD) { 120 if (n_out > 2 && n_inp > 2) { 121 CB_ADJ += 0.2; 122 //cout << "CB ADJ " << CB_ADJ << endl; 123 if (CB_ADJ < 4) { 124 this->compute_power(); 125 } 126 }
|
122 }
| 127 }
|
123 }
| |
124 125 126
| 128 129 130
|
127 power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size; 128 power.readOp.leakage = n_inp * n_out * flit_size * ( 129 cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+ 130 cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+ 131 cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+ 132 w1.power.readOp.leakage + w2.power.readOp.leakage); 133 power.readOp.gate_leakage = n_inp * n_out * flit_size * ( 134 cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+ 135 cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+ 136 cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+ 137 w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
| 131 power.readOp.dynamic = 132 (w1.power.readOp.dynamic + w2.power.readOp.dynamic + 133 (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + 134 tri_int_cap) * Vdd * Vdd) * flit_size; 135 power.readOp.leakage = n_inp * n_out * flit_size * ( 136 cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2, 137 1, inv) * Vdd + 138 cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, 139 2, nand) * Vdd + 140 cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, 141 2, nor) * Vdd + 142 w1.power.readOp.leakage + w2.power.readOp.leakage); 143 power.readOp.gate_leakage = n_inp * n_out * flit_size * ( 144 cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2, 145 1, inv) * Vdd + 146 cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, 147 2, nand) * Vdd + 148 cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, 149 2, nor) * Vdd + 150 w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
|
138
| 151
|
139 // delay calculation 140 double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch; 141 Wire wdriver(g_ip->wt, l_eff); 142 double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1); 143 double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap; 144 delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
| 152 // delay calculation 153 double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch; 154 Wire wdriver(g_ip->wt, l_eff); 155 double res = g_tp.wire_outside_mat.R_per_um * (area.w + area.h) + 156 tr_R_on(g_tp.min_w_nmos_ * wdriver.repeater_size, NCH, 1); 157 double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out * 158 tri_inp_cap + n_inp * tri_out_cap; 159 delay = horowitz(w1.signal_rise_time(), res * cap, deviceType->Vth / 160 deviceType->Vdd, deviceType->Vth / deviceType->Vdd, RISE);
|
145
| 161
|
146 Wire wreset();
| 162 Wire wreset();
|
147} 148
| 163} 164
|
149void Crossbar::print_crossbar() 150{ 151 cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n"; 152 cout << "Flit size : " << flit_size << " bits" << endl; 153 cout << "Width : " << area.w << " u" << endl; 154 cout << "Height : " << area.h << " u" << endl; 155 cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl; 156 cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl; 157 cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl; 158 cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
| 165void Crossbar::print_crossbar() { 166 cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n"; 167 cout << "Flit size : " << flit_size << " bits" << endl; 168 cout << "Width : " << area.w << " u" << endl; 169 cout << "Height : " << area.h << " u" << endl; 170 cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * 171 MIN(n_inp, n_out) << " (nJ)" << endl; 172 cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" 173 << endl; 174 cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 175 << " (mW)" << endl; 176 cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
|
159} 160 161
| 177} 178 179
|