crossbar.cc (10152:52c552138ba1) crossbar.cc (10234:5cb711fa6176)
1/*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
1/*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
5 * All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the

--- 7 unchanged lines hidden (view full) ---

20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
6 * All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the

--- 7 unchanged lines hidden (view full) ---

21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 ***************************************************************************/
31
32#include "crossbar.h"
33
34#define ASPECT_THRESHOLD .8
35#define ADJ 1
36
37Crossbar::Crossbar(
38 double n_inp_,
39 double n_out_,
40 double flit_size_,
41 TechnologyParameter::DeviceType *dt
30 *
31 ***************************************************************************/
32
33#include "crossbar.h"
34
35#define ASPECT_THRESHOLD .8
36#define ADJ 1
37
38Crossbar::Crossbar(
39 double n_inp_,
40 double n_out_,
41 double flit_size_,
42 TechnologyParameter::DeviceType *dt
42 ):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt)
43{
44 min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
45 Vdd = dt->Vdd;
46 CB_ADJ = 1;
43): n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) {
44 min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
45 Vdd = dt->Vdd;
46 CB_ADJ = 1;
47}
48
47}
48
49Crossbar::~Crossbar(){}
49Crossbar::~Crossbar() {}
50
50
51double Crossbar::output_buffer()
52{
51double Crossbar::output_buffer() {
53
52
54 //Wire winit(4, 4);
55 double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
56 Wire w1(g_ip->wt, l_eff);
57 //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
58 double s1 = w1.repeater_size * (l_eff <w1.repeater_spacing? l_eff *ADJ/w1.repeater_spacing : ADJ);
59 double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
60 // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
61 TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
62 TriS2 = s1; //driver transistor
53 //Wire winit(4, 4);
54 double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch;
55 Wire w1(g_ip->wt, l_eff);
56 //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
57 double s1 = w1.repeater_size * (l_eff < w1.repeater_spacing ?
58 l_eff * ADJ / w1.repeater_spacing : ADJ);
59 double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
60 // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
61 TriS1 = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size);
62 TriS2 = s1; //driver transistor
63
63
64 if (TriS1 < 1)
65 TriS1 = 1;
64 if (TriS1 < 1)
65 TriS1 = 1;
66
66
67 double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) +
68 gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0);
67 double input_cap = gate_C(TriS1 * (2 * min_w_pmos + g_tp.min_w_nmos_), 0) +
68 gate_C(TriS1 * (min_w_pmos + 2 * g_tp.min_w_nmos_), 0);
69// input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
70// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
71// gate_C(TriS2*g_tp.min_w_nmos_, 0)+
72// drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
73// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
74// gate_C(TriS2*min_w_pmos, 0);
69// input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
70// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
71// gate_C(TriS2*g_tp.min_w_nmos_, 0)+
72// drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
73// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
74// gate_C(TriS2*min_w_pmos, 0);
75 tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
76 drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
77 gate_C(TriS2*g_tp.min_w_nmos_, 0)+
78 drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
79 drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
80 gate_C(TriS2*min_w_pmos, 0);
81 double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
82 drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
83 double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0);
75 tri_int_cap = drain_C_(TriS1 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
76 drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 +
77 gate_C(TriS2 * g_tp.min_w_nmos_, 0) +
78 drain_C_(TriS1 * min_w_pmos, NCH, 1, 1, g_tp.cell_h_def) * 2 +
79 drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
80 gate_C(TriS2 * min_w_pmos, 0);
81 double output_cap = drain_C_(TriS2 * g_tp.min_w_nmos_, NCH, 1, 1,
82 g_tp.cell_h_def) +
83 drain_C_(TriS2 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
84 double ctr_cap = gate_C(TriS2 * (min_w_pmos + g_tp.min_w_nmos_), 0);
84
85
85 tri_inp_cap = input_cap;
86 tri_out_cap = output_cap;
87 tri_ctr_cap = ctr_cap;
88 return input_cap + output_cap + ctr_cap;
86 tri_inp_cap = input_cap;
87 tri_out_cap = output_cap;
88 tri_ctr_cap = ctr_cap;
89 return input_cap + output_cap + ctr_cap;
89}
90
90}
91
91void Crossbar::compute_power()
92{
92void Crossbar::compute_power() {
93
93
94 Wire winit(4, 4);
95 double tri_cap = output_buffer();
96 assert(tri_cap > 0);
97 //area of a tristate logic
98 double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def);
99 g_area *= 2; // to model area of output transistors
100 g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def);
101 g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def);
102 double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def);
103 // effective no. of tristate buffers that need to be laid side by side
104 int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch));
105 double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out);
106 Wire w1(g_ip->wt, wire_len);
94 Wire winit(4, 4);
95 double tri_cap = output_buffer();
96 assert(tri_cap > 0);
97 //area of a tristate logic
98 double g_area = compute_gate_area(INV, 1, TriS2 * g_tp.min_w_nmos_,
99 TriS2 * min_w_pmos, g_tp.cell_h_def);
100 g_area *= 2; // to model area of output transistors
101 g_area += compute_gate_area (NAND, 2, TriS1 * 2 * g_tp.min_w_nmos_,
102 TriS1 * min_w_pmos, g_tp.cell_h_def);
103 g_area += compute_gate_area (NOR, 2, TriS1 * g_tp.min_w_nmos_,
104 TriS1 * 2 * min_w_pmos, g_tp.cell_h_def);
105 double width /*per tristate*/ = g_area / (CB_ADJ * g_tp.cell_h_def);
106 // effective no. of tristate buffers that need to be laid side by side
107 int ntri = (int)ceil(g_tp.cell_h_def / (g_tp.wire_outside_mat.pitch));
108 double wire_len = MAX(width * ntri * n_out,
109 flit_size * g_tp.wire_outside_mat.pitch * n_out);
110 Wire w1(g_ip->wt, wire_len);
107
111
108 area.w = wire_len;
109 area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ;
110 Wire w2(g_ip->wt, area.h);
112 area.w = wire_len;
113 area.h = g_tp.wire_outside_mat.pitch * n_inp * flit_size * CB_ADJ;
114 Wire w2(g_ip->wt, area.h);
111
115
112 double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp);
113 if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb;
116 double aspect_ratio_cb = (area.h / area.w) * (n_out / n_inp);
117 if (aspect_ratio_cb > 1) aspect_ratio_cb = 1 / aspect_ratio_cb;
114
118
115 if (aspect_ratio_cb < ASPECT_THRESHOLD) {
116 if (n_out > 2 && n_inp > 2) {
117 CB_ADJ+=0.2;
118 //cout << "CB ADJ " << CB_ADJ << endl;
119 if (CB_ADJ < 4) {
120 this->compute_power();
121 }
119 if (aspect_ratio_cb < ASPECT_THRESHOLD) {
120 if (n_out > 2 && n_inp > 2) {
121 CB_ADJ += 0.2;
122 //cout << "CB ADJ " << CB_ADJ << endl;
123 if (CB_ADJ < 4) {
124 this->compute_power();
125 }
126 }
122 }
127 }
123 }
124
125
126
128
129
130
127 power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size;
128 power.readOp.leakage = n_inp * n_out * flit_size * (
129 cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
130 cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
131 cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
132 w1.power.readOp.leakage + w2.power.readOp.leakage);
133 power.readOp.gate_leakage = n_inp * n_out * flit_size * (
134 cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
135 cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
136 cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
137 w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
131 power.readOp.dynamic =
132 (w1.power.readOp.dynamic + w2.power.readOp.dynamic +
133 (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap +
134 tri_int_cap) * Vdd * Vdd) * flit_size;
135 power.readOp.leakage = n_inp * n_out * flit_size * (
136 cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2,
137 1, inv) * Vdd +
138 cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
139 2, nand) * Vdd +
140 cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
141 2, nor) * Vdd +
142 w1.power.readOp.leakage + w2.power.readOp.leakage);
143 power.readOp.gate_leakage = n_inp * n_out * flit_size * (
144 cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2,
145 1, inv) * Vdd +
146 cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
147 2, nand) * Vdd +
148 cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
149 2, nor) * Vdd +
150 w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
138
151
139 // delay calculation
140 double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
141 Wire wdriver(g_ip->wt, l_eff);
142 double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1);
143 double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap;
144 delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
152 // delay calculation
153 double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch;
154 Wire wdriver(g_ip->wt, l_eff);
155 double res = g_tp.wire_outside_mat.R_per_um * (area.w + area.h) +
156 tr_R_on(g_tp.min_w_nmos_ * wdriver.repeater_size, NCH, 1);
157 double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out *
158 tri_inp_cap + n_inp * tri_out_cap;
159 delay = horowitz(w1.signal_rise_time(), res * cap, deviceType->Vth /
160 deviceType->Vdd, deviceType->Vth / deviceType->Vdd, RISE);
145
161
146 Wire wreset();
162 Wire wreset();
147}
148
163}
164
149void Crossbar::print_crossbar()
150{
151 cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
152 cout << "Flit size : " << flit_size << " bits" << endl;
153 cout << "Width : " << area.w << " u" << endl;
154 cout << "Height : " << area.h << " u" << endl;
155 cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl;
156 cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
157 cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl;
158 cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
165void Crossbar::print_crossbar() {
166 cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
167 cout << "Flit size : " << flit_size << " bits" << endl;
168 cout << "Width : " << area.w << " u" << endl;
169 cout << "Height : " << area.h << " u" << endl;
170 cout << "Dynamic Power : " << power.readOp.dynamic*1e9 *
171 MIN(n_inp, n_out) << " (nJ)" << endl;
172 cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)"
173 << endl;
174 cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3
175 << " (mW)" << endl;
176 cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
159}
160
161
177}
178
179