4a5
> * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
28c29
< * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
---
> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44,53c45,53
< ):flit_size(flit_size_),
< deviceType(dt),
< I(I_),
< O(O_),
< M(M_)
< {
< vc_buffer_size = vc_buf;
< vc_count = vc_c;
< min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
< double technology = g_ip->F_sz_um;
---
> ): flit_size(flit_size_),
> deviceType(dt),
> I(I_),
> O(O_),
> M(M_) {
> vc_buffer_size = vc_buf;
> vc_count = vc_c;
> min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
> double technology = g_ip->F_sz_um;
55c55
< Vdd = dt->Vdd;
---
> Vdd = dt->Vdd;
57,61c57,61
< /*Crossbar parameters. Transmisson gate is employed for connector*/
< NTtr = 10*technology*1e-6/2; /*Transmission gate's nmos tr. length*/
< PTtr = 20*technology*1e-6/2; /* pmos tr. length*/
< wt = 15*technology*1e-6/2; /*track width*/
< ht = 15*technology*1e-6/2; /*track height*/
---
> /*Crossbar parameters. Transmisson gate is employed for connector*/
> NTtr = 10 * technology * 1e-6 / 2; /*Transmission gate's nmos tr. length*/
> PTtr = 20 * technology * 1e-6 / 2; /* pmos tr. length*/
> wt = 15 * technology * 1e-6 / 2; /*track width*/
> ht = 15 * technology * 1e-6 / 2; /*track height*/
64,65c64,65
< NTi = 12.5*technology*1e-6/2;
< PTi = 25*technology*1e-6/2;
---
> NTi = 12.5 * technology * 1e-6 / 2;
> PTi = 25 * technology * 1e-6 / 2;
67,70c67,70
< NTid = 60*technology*1e-6/2; //m
< PTid = 120*technology*1e-6/2; // m
< NTod = 60*technology*1e-6/2; // m
< PTod = 120*technology*1e-6/2; // m
---
> NTid = 60 * technology * 1e-6 / 2; //m
> PTid = 120 * technology * 1e-6 / 2; // m
> NTod = 60 * technology * 1e-6 / 2; // m
> PTod = 120 * technology * 1e-6 / 2; // m
72c72
< calc_router_parameters();
---
> calc_router_parameters();
75c75
< Router::~Router(){}
---
> Router::~Router() {}
80,81c80,81
< Wire wc(g_ip->wt, length, 1, 3, 3);
< return (wc.wire_cap(length));
---
> Wire wc(g_ip->wt, length, 1, 3, 3);
> return (wc.wire_cap(length));
87c87
< return (double) gate_C (w*1e6 /*u*/, 0);
---
> return (double) gate_C (w*1e6 /*u*/, 0);
93,94c93,94
< double s /*number of stacking transistors*/) {
< return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def);
---
> double s /*number of stacking transistors*/) {
> return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def);
103c103
< return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
---
> return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1);
108c108
< return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
---
> return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1);
113c113
< return gate_cap(NTtr)+gate_cap(PTtr);
---
> return gate_cap(NTtr) + gate_cap(PTtr);
118,119c118,119
< return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) +
< gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1));
---
> return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) +
> gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1));
124,125c124,125
< return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) +
< gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1));
---
> return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) +
> gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1));
130,132c130,132
< return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() +
< diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) +
< gate_cap(NTi) + gate_cap(PTi));
---
> return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() +
> diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) +
> gate_cap(NTi) + gate_cap(PTi));
137,138c137,138
< return (crossbar_inpline()*Vdd*Vdd*flit_size/2 +
< crossbar_outline()*Vdd*Vdd*flit_size/2)*2;
---
> return (crossbar_inpline()*Vdd*Vdd*flit_size / 2 +
> crossbar_outline()*Vdd*Vdd*flit_size / 2) * 2;
141,178c141,177
< void Router::buffer_stats()
< {
< DynamicParameter dyn_p;
< dyn_p.is_tag = false;
< dyn_p.pure_cam = false;
< dyn_p.fully_assoc = false;
< dyn_p.pure_ram = true;
< dyn_p.is_dram = false;
< dyn_p.is_main_mem = false;
< dyn_p.num_subarrays = 1;
< dyn_p.num_mats = 1;
< dyn_p.Ndbl = 1;
< dyn_p.Ndwl = 1;
< dyn_p.Nspd = 1;
< dyn_p.deg_bl_muxing = 1;
< dyn_p.deg_senseamp_muxing_non_associativity = 1;
< dyn_p.Ndsam_lev_1 = 1;
< dyn_p.Ndsam_lev_2 = 1;
< dyn_p.Ndcm = 1;
< dyn_p.number_addr_bits_mat = 8;
< dyn_p.number_way_select_signals_mat = 1;
< dyn_p.number_subbanks_decode = 0;
< dyn_p.num_act_mats_hor_dir = 1;
< dyn_p.V_b_sense = Vdd; // FIXME check power calc.
< dyn_p.ram_cell_tech_type = 0;
< dyn_p.num_r_subarray = (int) vc_buffer_size;
< dyn_p.num_c_subarray = (int) flit_size * (int) vc_count;
< dyn_p.num_mats_h_dir = 1;
< dyn_p.num_mats_v_dir = 1;
< dyn_p.num_do_b_subbank = (int)flit_size;
< dyn_p.num_di_b_subbank = (int)flit_size;
< dyn_p.num_do_b_mat = (int) flit_size;
< dyn_p.num_di_b_mat = (int) flit_size;
< dyn_p.num_do_b_mat = (int) flit_size;
< dyn_p.num_di_b_mat = (int) flit_size;
< dyn_p.num_do_b_bank_per_port = (int) flit_size;
< dyn_p.num_di_b_bank_per_port = (int) flit_size;
< dyn_p.out_w = (int) flit_size;
---
> void Router::buffer_stats() {
> DynamicParameter dyn_p;
> dyn_p.is_tag = false;
> dyn_p.pure_cam = false;
> dyn_p.fully_assoc = false;
> dyn_p.pure_ram = true;
> dyn_p.is_dram = false;
> dyn_p.is_main_mem = false;
> dyn_p.num_subarrays = 1;
> dyn_p.num_mats = 1;
> dyn_p.Ndbl = 1;
> dyn_p.Ndwl = 1;
> dyn_p.Nspd = 1;
> dyn_p.deg_bl_muxing = 1;
> dyn_p.deg_senseamp_muxing_non_associativity = 1;
> dyn_p.Ndsam_lev_1 = 1;
> dyn_p.Ndsam_lev_2 = 1;
> dyn_p.Ndcm = 1;
> dyn_p.number_addr_bits_mat = 8;
> dyn_p.number_way_select_signals_mat = 1;
> dyn_p.number_subbanks_decode = 0;
> dyn_p.num_act_mats_hor_dir = 1;
> dyn_p.V_b_sense = Vdd; // FIXME check power calc.
> dyn_p.ram_cell_tech_type = 0;
> dyn_p.num_r_subarray = (int) vc_buffer_size;
> dyn_p.num_c_subarray = (int) flit_size * (int) vc_count;
> dyn_p.num_mats_h_dir = 1;
> dyn_p.num_mats_v_dir = 1;
> dyn_p.num_do_b_subbank = (int)flit_size;
> dyn_p.num_di_b_subbank = (int)flit_size;
> dyn_p.num_do_b_mat = (int) flit_size;
> dyn_p.num_di_b_mat = (int) flit_size;
> dyn_p.num_do_b_mat = (int) flit_size;
> dyn_p.num_di_b_mat = (int) flit_size;
> dyn_p.num_do_b_bank_per_port = (int) flit_size;
> dyn_p.num_di_b_bank_per_port = (int) flit_size;
> dyn_p.out_w = (int) flit_size;
180,185c179,184
< dyn_p.use_inp_params = 1;
< dyn_p.num_wr_ports = (unsigned int) vc_count;
< dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book
< dyn_p.num_rw_ports = 0;
< dyn_p.num_se_rd_ports =0;
< dyn_p.num_search_ports =0;
---
> dyn_p.use_inp_params = 1;
> dyn_p.num_wr_ports = (unsigned int) vc_count;
> dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book
> dyn_p.num_rw_ports = 0;
> dyn_p.num_se_rd_ports = 0;
> dyn_p.num_search_ports = 0;
189,193c188,192
< dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports +
< dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports);
< dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 +
< (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) +
< dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports;
---
> dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports +
> dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports);
> dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 +
> (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) +
> dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports;
195,200c194,199
< Mat buff(dyn_p);
< buff.compute_delays(0);
< buff.compute_power_energy();
< buffer.power.readOp = buff.power.readOp;
< buffer.power.writeOp = buffer.power.readOp; //FIXME
< buffer.area = buff.area;
---
> Mat buff(dyn_p);
> buff.compute_delays(0);
> buff.compute_power_energy();
> buffer.power.readOp = buff.power.readOp;
> buffer.power.writeOp = buffer.power.readOp; //FIXME
> buffer.area = buff.area;
205,215c204,213
< void
< Router::cb_stats ()
< {
< if (1) {
< Crossbar c_b(I, O, flit_size);
< c_b.compute_power();
< crossbar.delay = c_b.delay;
< crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic;
< crossbar.power.readOp.leakage = c_b.power.readOp.leakage;
< crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage;
< crossbar.area = c_b.area;
---
> void
> Router::cb_stats () {
> if (1) {
> Crossbar c_b(I, O, flit_size);
> c_b.compute_power();
> crossbar.delay = c_b.delay;
> crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic;
> crossbar.power.readOp.leakage = c_b.power.readOp.leakage;
> crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage;
> crossbar.area = c_b.area;
217,224c215,221
< }
< else {
< crossbar.power.readOp.dynamic = tr_crossbar_power();
< crossbar.power.readOp.leakage = flit_size * I * O *
< cmos_Isub_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
< crossbar.power.readOp.gate_leakage = flit_size * I * O *
< cmos_Ig_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
< }
---
> } else {
> crossbar.power.readOp.dynamic = tr_crossbar_power();
> crossbar.power.readOp.leakage = flit_size * I * O *
> cmos_Isub_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg);
> crossbar.power.readOp.gate_leakage = flit_size * I * O *
> cmos_Ig_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg);
> }
228,231c225,227
< Router::get_router_power()
< {
< /* calculate buffer stats */
< buffer_stats();
---
> Router::get_router_power() {
> /* calculate buffer stats */
> buffer_stats();
233,234c229,230
< /* calculate cross-bar stats */
< cb_stats();
---
> /* calculate cross-bar stats */
> cb_stats();
236,246c232,242
< /* calculate arbiter stats */
< Arbiter vcarb(vc_count, flit_size, buffer.area.w);
< Arbiter cbarb(I, flit_size, crossbar.area.w);
< vcarb.compute_power();
< cbarb.compute_power();
< arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I +
< cbarb.power.readOp.dynamic * O;
< arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I +
< cbarb.power.readOp.leakage * O;
< arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I +
< cbarb.power.readOp.gate_leakage * O;
---
> /* calculate arbiter stats */
> Arbiter vcarb(vc_count, flit_size, buffer.area.w);
> Arbiter cbarb(I, flit_size, crossbar.area.w);
> vcarb.compute_power();
> cbarb.compute_power();
> arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I +
> cbarb.power.readOp.dynamic * O;
> arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I +
> cbarb.power.readOp.leakage * O;
> arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I +
> cbarb.power.readOp.gate_leakage * O;
249,253c245,251
< power.readOp.dynamic = ((buffer.power.readOp.dynamic+buffer.power.writeOp.dynamic) +
< crossbar.power.readOp.dynamic +
< arbiter.power.readOp.dynamic)*MIN(I, O)*M;
< double pppm_t[4] = {1,I,I,1};
< power = power + (buffer.power*pppm_t + crossbar.power + arbiter.power)*pppm_lkg;
---
> power.readOp.dynamic = ((buffer.power.readOp.dynamic +
> buffer.power.writeOp.dynamic) +
> crossbar.power.readOp.dynamic +
> arbiter.power.readOp.dynamic) * MIN(I, O) * M;
> double pppm_t[4] = {1, I, I, 1};
> power = power + (buffer.power * pppm_t + crossbar.power + arbiter.power) *
> pppm_lkg;
257,267c255,264
< void
< Router::get_router_delay ()
< {
< FREQUENCY=5; // move this to config file --TODO
< cycle_time = (1/(double)FREQUENCY)*1e3; //ps
< delay = 4;
< max_cyc = 17 * g_tp.FO4; //s
< max_cyc *= 1e12; //ps
< if (cycle_time < max_cyc) {
< FREQUENCY = (1/max_cyc)*1e3; //GHz
< }
---
> void
> Router::get_router_delay () {
> FREQUENCY = 5; // move this to config file --TODO
> cycle_time = (1 / (double)FREQUENCY) * 1e3; //ps
> delay = 4;
> max_cyc = 17 * g_tp.FO4; //s
> max_cyc *= 1e12; //ps
> if (cycle_time < max_cyc) {
> FREQUENCY = (1 / max_cyc) * 1e3; //GHz
> }
270,274c267,270
< void
< Router::get_router_area()
< {
< area.h = I*buffer.area.h;
< area.w = buffer.area.w+crossbar.area.w;
---
> void
> Router::get_router_area() {
> area.h = I * buffer.area.h;
> area.w = buffer.area.w + crossbar.area.w;
277,281c273,276
< void
< Router::calc_router_parameters()
< {
< /* calculate router frequency and pipeline cycles */
< get_router_delay();
---
> void
> Router::calc_router_parameters() {
> /* calculate router frequency and pipeline cycles */
> get_router_delay();
283,284c278,279
< /* router power stats */
< get_router_power();
---
> /* router power stats */
> get_router_power();
286,287c281,282
< /* area stats */
< get_router_area();
---
> /* area stats */
> get_router_area();
290,308c285,312
< void
< Router::print_router()
< {
< cout << "\n\nRouter stats:\n";
< cout << "\tRouter Area - "<< area.get_area()*1e-6<<"(mm^2)\n";
< cout << "\tMaximum possible network frequency - " << (1/max_cyc)*1e3 << "GHz\n";
< cout << "\tNetwork frequency - " << FREQUENCY <<" GHz\n";
< cout << "\tNo. of Virtual channels - " << vc_count << "\n";
< cout << "\tNo. of pipeline stages - " << delay << endl;
< cout << "\tLink bandwidth - " << flit_size << " (bits)\n";
< cout << "\tNo. of buffer entries per virtual channel - "<< vc_buffer_size << "\n";
< cout << "\tSimple buffer Area - "<< buffer.area.get_area()*1e-6<<"(mm^2)\n";
< cout << "\tSimple buffer access (Read) - " << buffer.power.readOp.dynamic * 1e9 <<" (nJ)\n";
< cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3 <<" (mW)\n";
< cout << "\tCrossbar Area - "<< crossbar.area.get_area()*1e-6<<"(mm^2)\n";
< cout << "\tCross bar access energy - " << crossbar.power.readOp.dynamic * 1e9<<" (nJ)\n";
< cout << "\tCross bar leakage power - " << crossbar.power.readOp.leakage * 1e3<<" (mW)\n";
< cout << "\tArbiter access energy (VC arb + Crossbar arb) - "<<arbiter.power.readOp.dynamic * 1e9 <<" (nJ)\n";
< cout << "\tArbiter leakage (VC arb + Crossbar arb) - "<<arbiter.power.readOp.leakage * 1e3 <<" (mW)\n";
---
> void
> Router::print_router() {
> cout << "\n\nRouter stats:\n";
> cout << "\tRouter Area - " << area.get_area()*1e-6 << "(mm^2)\n";
> cout << "\tMaximum possible network frequency - " << (1 / max_cyc)*1e3
> << "GHz\n";
> cout << "\tNetwork frequency - " << FREQUENCY << " GHz\n";
> cout << "\tNo. of Virtual channels - " << vc_count << "\n";
> cout << "\tNo. of pipeline stages - " << delay << endl;
> cout << "\tLink bandwidth - " << flit_size << " (bits)\n";
> cout << "\tNo. of buffer entries per virtual channel - "
> << vc_buffer_size << "\n";
> cout << "\tSimple buffer Area - " << buffer.area.get_area()*1e-6
> << "(mm^2)\n";
> cout << "\tSimple buffer access (Read) - "
> << buffer.power.readOp.dynamic * 1e9 << " (nJ)\n";
> cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3
> << " (mW)\n";
> cout << "\tCrossbar Area - " << crossbar.area.get_area()*1e-6
> << "(mm^2)\n";
> cout << "\tCross bar access energy - "
> << crossbar.power.readOp.dynamic * 1e9 << " (nJ)\n";
> cout << "\tCross bar leakage power - "
> << crossbar.power.readOp.leakage * 1e3 << " (mW)\n";
> cout << "\tArbiter access energy (VC arb + Crossbar arb) - "
> << arbiter.power.readOp.dynamic * 1e9 << " (nJ)\n";
> cout << "\tArbiter leakage (VC arb + Crossbar arb) - "
> << arbiter.power.readOp.leakage * 1e3 << " (mW)\n";