1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. |
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. |
6 * All Rights Reserved 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are 10 * met: redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer; 12 * redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the --- 7 unchanged lines hidden (view full) --- 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
30 * 31 ***************************************************************************/ 32 33 34 35#include "router.h" 36 37Router::Router( 38 double flit_size_, 39 double vc_buf, /* vc size = vc_buffer_size * flit_size */ 40 double vc_c, 41 TechnologyParameter::DeviceType *dt, 42 double I_, 43 double O_, 44 double M_ |
45 ): flit_size(flit_size_), 46 deviceType(dt), 47 I(I_), 48 O(O_), 49 M(M_) { 50 vc_buffer_size = vc_buf; 51 vc_count = vc_c; 52 min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; 53 double technology = g_ip->F_sz_um; |
54 |
55 Vdd = dt->Vdd; |
56 |
57 /*Crossbar parameters. Transmisson gate is employed for connector*/ 58 NTtr = 10 * technology * 1e-6 / 2; /*Transmission gate's nmos tr. length*/ 59 PTtr = 20 * technology * 1e-6 / 2; /* pmos tr. length*/ 60 wt = 15 * technology * 1e-6 / 2; /*track width*/ 61 ht = 15 * technology * 1e-6 / 2; /*track height*/ |
62// I = 5; /*Number of crossbar input ports*/ 63// O = 5; /*Number of crossbar output ports*/ |
64 NTi = 12.5 * technology * 1e-6 / 2; 65 PTi = 25 * technology * 1e-6 / 2; |
66 |
67 NTid = 60 * technology * 1e-6 / 2; //m 68 PTid = 120 * technology * 1e-6 / 2; // m 69 NTod = 60 * technology * 1e-6 / 2; // m 70 PTod = 120 * technology * 1e-6 / 2; // m |
71 |
72 calc_router_parameters(); |
73} 74 |
75Router::~Router() {} |
76 77 78double //wire cap with triple spacing 79Router::Cw3(double length) { |
80 Wire wc(g_ip->wt, length, 1, 3, 3); 81 return (wc.wire_cap(length)); |
82} 83 84/*Function to calculate the gate capacitance*/ 85double 86Router::gate_cap(double w) { |
87 return (double) gate_C (w*1e6 /*u*/, 0); |
88} 89 90/*Function to calculate the diffusion capacitance*/ 91double 92Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, |
93 double s /*number of stacking transistors*/) { 94 return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def); |
95} 96 97 98/*crossbar related functions */ 99 100// Model for simple transmission gate 101double 102Router::transmission_buf_inpcap() { |
103 return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1); |
104} 105 106double 107Router::transmission_buf_outcap() { |
108 return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1); |
109} 110 111double 112Router::transmission_buf_ctrcap() { |
113 return gate_cap(NTtr) + gate_cap(PTtr); |
114} 115 116double 117Router::crossbar_inpline() { |
118 return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) + 119 gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1)); |
120} 121 122double 123Router::crossbar_outline() { |
124 return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) + 125 gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1)); |
126} 127 128double 129Router::crossbar_ctrline() { |
130 return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() + 131 diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) + 132 gate_cap(NTi) + gate_cap(PTi)); |
133} 134 135double 136Router::tr_crossbar_power() { |
137 return (crossbar_inpline()*Vdd*Vdd*flit_size / 2 + 138 crossbar_outline()*Vdd*Vdd*flit_size / 2) * 2; |
139} 140 |
141void Router::buffer_stats() { 142 DynamicParameter dyn_p; 143 dyn_p.is_tag = false; 144 dyn_p.pure_cam = false; 145 dyn_p.fully_assoc = false; 146 dyn_p.pure_ram = true; 147 dyn_p.is_dram = false; 148 dyn_p.is_main_mem = false; 149 dyn_p.num_subarrays = 1; 150 dyn_p.num_mats = 1; 151 dyn_p.Ndbl = 1; 152 dyn_p.Ndwl = 1; 153 dyn_p.Nspd = 1; 154 dyn_p.deg_bl_muxing = 1; 155 dyn_p.deg_senseamp_muxing_non_associativity = 1; 156 dyn_p.Ndsam_lev_1 = 1; 157 dyn_p.Ndsam_lev_2 = 1; 158 dyn_p.Ndcm = 1; 159 dyn_p.number_addr_bits_mat = 8; 160 dyn_p.number_way_select_signals_mat = 1; 161 dyn_p.number_subbanks_decode = 0; 162 dyn_p.num_act_mats_hor_dir = 1; 163 dyn_p.V_b_sense = Vdd; // FIXME check power calc. 164 dyn_p.ram_cell_tech_type = 0; 165 dyn_p.num_r_subarray = (int) vc_buffer_size; 166 dyn_p.num_c_subarray = (int) flit_size * (int) vc_count; 167 dyn_p.num_mats_h_dir = 1; 168 dyn_p.num_mats_v_dir = 1; 169 dyn_p.num_do_b_subbank = (int)flit_size; 170 dyn_p.num_di_b_subbank = (int)flit_size; 171 dyn_p.num_do_b_mat = (int) flit_size; 172 dyn_p.num_di_b_mat = (int) flit_size; 173 dyn_p.num_do_b_mat = (int) flit_size; 174 dyn_p.num_di_b_mat = (int) flit_size; 175 dyn_p.num_do_b_bank_per_port = (int) flit_size; 176 dyn_p.num_di_b_bank_per_port = (int) flit_size; 177 dyn_p.out_w = (int) flit_size; |
178 |
179 dyn_p.use_inp_params = 1; 180 dyn_p.num_wr_ports = (unsigned int) vc_count; 181 dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book 182 dyn_p.num_rw_ports = 0; 183 dyn_p.num_se_rd_ports = 0; 184 dyn_p.num_search_ports = 0; |
185 186 187 |
188 dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports + 189 dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports); 190 dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 + 191 (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) + 192 dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports; |
193 |
194 Mat buff(dyn_p); 195 buff.compute_delays(0); 196 buff.compute_power_energy(); 197 buffer.power.readOp = buff.power.readOp; 198 buffer.power.writeOp = buffer.power.readOp; //FIXME 199 buffer.area = buff.area; |
200} 201 202 203 |
204void 205Router::cb_stats () { 206 if (1) { 207 Crossbar c_b(I, O, flit_size); 208 c_b.compute_power(); 209 crossbar.delay = c_b.delay; 210 crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic; 211 crossbar.power.readOp.leakage = c_b.power.readOp.leakage; 212 crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage; 213 crossbar.area = c_b.area; |
214// c_b.print_crossbar(); |
215 } else { 216 crossbar.power.readOp.dynamic = tr_crossbar_power(); 217 crossbar.power.readOp.leakage = flit_size * I * O * 218 cmos_Isub_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg); 219 crossbar.power.readOp.gate_leakage = flit_size * I * O * 220 cmos_Ig_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg); 221 } |
222} 223 224void |
225Router::get_router_power() { 226 /* calculate buffer stats */ 227 buffer_stats(); |
228 |
229 /* calculate cross-bar stats */ 230 cb_stats(); |
231 |
232 /* calculate arbiter stats */ 233 Arbiter vcarb(vc_count, flit_size, buffer.area.w); 234 Arbiter cbarb(I, flit_size, crossbar.area.w); 235 vcarb.compute_power(); 236 cbarb.compute_power(); 237 arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I + 238 cbarb.power.readOp.dynamic * O; 239 arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I + 240 cbarb.power.readOp.leakage * O; 241 arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I + 242 cbarb.power.readOp.gate_leakage * O; |
243 244// arb_stats(); |
245 power.readOp.dynamic = ((buffer.power.readOp.dynamic + 246 buffer.power.writeOp.dynamic) + 247 crossbar.power.readOp.dynamic + 248 arbiter.power.readOp.dynamic) * MIN(I, O) * M; 249 double pppm_t[4] = {1, I, I, 1}; 250 power = power + (buffer.power * pppm_t + crossbar.power + arbiter.power) * 251 pppm_lkg; |
252 253} 254 |
255void 256Router::get_router_delay () { 257 FREQUENCY = 5; // move this to config file --TODO 258 cycle_time = (1 / (double)FREQUENCY) * 1e3; //ps 259 delay = 4; 260 max_cyc = 17 * g_tp.FO4; //s 261 max_cyc *= 1e12; //ps 262 if (cycle_time < max_cyc) { 263 FREQUENCY = (1 / max_cyc) * 1e3; //GHz 264 } |
265} 266 |
267void 268Router::get_router_area() { 269 area.h = I * buffer.area.h; 270 area.w = buffer.area.w + crossbar.area.w; |
271} 272 |
273void 274Router::calc_router_parameters() { 275 /* calculate router frequency and pipeline cycles */ 276 get_router_delay(); |
277 |
278 /* router power stats */ 279 get_router_power(); |
280 |
281 /* area stats */ 282 get_router_area(); |
283} 284 |
285void 286Router::print_router() { 287 cout << "\n\nRouter stats:\n"; 288 cout << "\tRouter Area - " << area.get_area()*1e-6 << "(mm^2)\n"; 289 cout << "\tMaximum possible network frequency - " << (1 / max_cyc)*1e3 290 << "GHz\n"; 291 cout << "\tNetwork frequency - " << FREQUENCY << " GHz\n"; 292 cout << "\tNo. of Virtual channels - " << vc_count << "\n"; 293 cout << "\tNo. of pipeline stages - " << delay << endl; 294 cout << "\tLink bandwidth - " << flit_size << " (bits)\n"; 295 cout << "\tNo. of buffer entries per virtual channel - " 296 << vc_buffer_size << "\n"; 297 cout << "\tSimple buffer Area - " << buffer.area.get_area()*1e-6 298 << "(mm^2)\n"; 299 cout << "\tSimple buffer access (Read) - " 300 << buffer.power.readOp.dynamic * 1e9 << " (nJ)\n"; 301 cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3 302 << " (mW)\n"; 303 cout << "\tCrossbar Area - " << crossbar.area.get_area()*1e-6 304 << "(mm^2)\n"; 305 cout << "\tCross bar access energy - " 306 << crossbar.power.readOp.dynamic * 1e9 << " (nJ)\n"; 307 cout << "\tCross bar leakage power - " 308 << crossbar.power.readOp.leakage * 1e3 << " (mW)\n"; 309 cout << "\tArbiter access energy (VC arb + Crossbar arb) - " 310 << arbiter.power.readOp.dynamic * 1e9 << " (nJ)\n"; 311 cout << "\tArbiter leakage (VC arb + Crossbar arb) - " 312 << arbiter.power.readOp.leakage * 1e3 << " (mW)\n"; |
313 314} 315 |