4a5
> * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
28c29
< * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
---
> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39c40
< unsigned int MIN_BANKSIZE=65536;
---
> unsigned int MIN_BANKSIZE = 65536;
46,50c47,50
< Nuca::Nuca(
< TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
< ):deviceType(dt)
< {
< init_cont();
---
> Nuca::Nuca(
> TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
> ): deviceType(dt) {
> init_cont();
54,63c54,62
< Nuca::init_cont()
< {
< FILE *cont;
< char line[5000];
< char jk[5000];
< cont = fopen("contention.dat", "r");
< if (!cont) {
< cout << "contention.dat file is missing!\n";
< exit(0);
< }
---
> Nuca::init_cont() {
> FILE *cont;
> char line[5000];
> char jk[5000];
> cont = fopen("contention.dat", "r");
> if (!cont) {
> cout << "contention.dat file is missing!\n";
> exit(0);
> }
65,72c64,74
< for(int i=0; i<2; i++) {
< for(int j=2; j<5; j++) {
< for(int k=0; k<ROUTER_TYPES; k++) {
< for(int l=0;l<7; l++) {
< int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/];
< assert(fscanf(cont, "%[^\n]\n", line) != EOF);
< sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d",jk, &temp[0], &temp[1], &temp[2], &temp[3],
< &temp[4], &temp[5], &temp[6], &temp[7]);
---
> for (int i = 0; i < 2; i++) {
> for (int j = 2; j < 5; j++) {
> for (int k = 0; k < ROUTER_TYPES; k++) {
> for (int l = 0; l < 7; l++) {
> int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/];
> assert(fscanf(cont, "%[^\n]\n", line) != EOF);
> sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d", jk,
> &temp[0], &temp[1], &temp[2], &temp[3],
> &temp[4], &temp[5], &temp[6], &temp[7]);
> }
> }
74d75
< }
76,77c77
< }
< fclose(cont);
---
> fclose(cont);
80,90c80,91
< void
< Nuca::print_cont_stats()
< {
< for(int i=0; i<2; i++) {
< for(int j=2; j<5; j++) {
< for(int k=0; k<ROUTER_TYPES; k++) {
< for(int l=0;l<7; l++) {
< for(int m=0;l<7; l++) {
< cout << cont_stats[i][j][k][l][m] << " ";
< }
< cout << endl;
---
> void
> Nuca::print_cont_stats() {
> for (int i = 0; i < 2; i++) {
> for (int j = 2; j < 5; j++) {
> for (int k = 0; k < ROUTER_TYPES; k++) {
> for (int l = 0; l < 7; l++) {
> for (int m = 0; l < 7; l++) {
> cout << cont_stats[i][j][k][l][m] << " ";
> }
> cout << endl;
> }
> }
92d92
< }
94,95c94
< }
< cout << endl;
---
> cout << endl;
98,102c97,101
< Nuca::~Nuca(){
< for (int i = wt_min; i <= wt_max; i++) {
< delete wire_vertical[i];
< delete wire_horizontal[i];
< }
---
> Nuca::~Nuca() {
> for (int i = wt_min; i <= wt_max; i++) {
> delete wire_vertical[i];
> delete wire_horizontal[i];
> }
106,112c105,110
< int
< Nuca::calc_cycles(double lat, double oper_freq)
< {
< //TODO: convert latch delay to FO4 */
< double cycle_time = (1.0/(oper_freq*1e9)); /*s*/
< cycle_time -= LATCH_DELAY;
< cycle_time -= FIXED_OVERHEAD;
---
> int
> Nuca::calc_cycles(double lat, double oper_freq) {
> //TODO: convert latch delay to FO4 */
> double cycle_time = (1.0 / (oper_freq * 1e9)); /*s*/
> cycle_time -= LATCH_DELAY;
> cycle_time -= FIXED_OVERHEAD;
114c112
< return (int)ceil(lat/cycle_time);
---
> return (int)ceil(lat / cycle_time);
119,121c117,119
< // if(h_wire) delete h_wire;
< // if(v_wire) delete v_wire;
< // if(router) delete router;
---
> // if(h_wire) delete h_wire;
> // if(v_wire) delete v_wire;
> // if(router) delete router;
140,160c138,157
< void
< Nuca::sim_nuca()
< {
< /* temp variables */
< int it, ro, wr;
< int num_cyc;
< unsigned int i, j, k;
< unsigned int r, c;
< int l2_c;
< int bank_count = 0;
< uca_org_t ures;
< nuca_org_t *opt_n;
< mem_array tag, data;
< list<nuca_org_t *> nuca_list;
< Router *router_s[ROUTER_TYPES];
< router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global));
< router_s[0]->print_router();
< router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global));
< router_s[1]->print_router();
< router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global));
< router_s[2]->print_router();
---
> void
> Nuca::sim_nuca() {
> /* temp variables */
> int it, ro, wr;
> int num_cyc;
> unsigned int i, j, k;
> unsigned int r, c;
> int l2_c;
> int bank_count = 0;
> uca_org_t ures;
> nuca_org_t *opt_n;
> mem_array tag, data;
> list<nuca_org_t *> nuca_list;
> Router *router_s[ROUTER_TYPES];
> router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global));
> router_s[0]->print_router();
> router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global));
> router_s[1]->print_router();
> router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global));
> router_s[2]->print_router();
162c159
< int core_in; // to store no. of cores
---
> int core_in; // to store no. of cores
164,168c161,165
< /* to search diff grid organizations */
< double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat,
< curr_acclat;
< double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power,
< avg_leakage_power;
---
> /* to search diff grid organizations */
> double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat,
> curr_acclat;
> double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power,
> avg_leakage_power;
170,176c167,173
< double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF;
< int opt_rows = 0;
< int opt_columns = 0;
< double opt_totno_hops = 0;
< double opt_avg_hop = 0;
< double opt_dyn_power = 0, opt_leakage_power = 0;
< min_values_t minval;
---
> double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF;
> int opt_rows = 0;
> int opt_columns = 0;
> double opt_totno_hops = 0;
> double opt_avg_hop = 0;
> double opt_dyn_power = 0, opt_leakage_power = 0;
> min_values_t minval;
178c175
< int bank_start = 0;
---
> int bank_start = 0;
180c177
< int flit_width = 0;
---
> int flit_width = 0;
182,183c179,180
< /* vertical and horizontal hop latency values */
< int ver_hop_lat, hor_hop_lat; /* in cycles */
---
> /* vertical and horizontal hop latency values */
> int ver_hop_lat, hor_hop_lat; /* in cycles */
186,187c183,184
< /* no. of different bank sizes to consider */
< int iterations;
---
> /* no. of different bank sizes to consider */
> int iterations;
190,191c187,188
< g_ip->nuca_cache_sz = g_ip->cache_sz;
< nuca_list.push_back(new nuca_org_t());
---
> g_ip->nuca_cache_sz = g_ip->cache_sz;
> nuca_list.push_back(new nuca_org_t());
193,194c190,191
< if (g_ip->cache_level == 0) l2_c = 1;
< else l2_c = 0;
---
> if (g_ip->cache_level == 0) l2_c = 1;
> else l2_c = 0;
196,199c193,199
< if (g_ip->cores <= 4) core_in = 2;
< else if (g_ip->cores <= 8) core_in = 3;
< else if (g_ip->cores <= 16) core_in = 4;
< else {cout << "Number of cores should be <= 16!\n"; exit(0);}
---
> if (g_ip->cores <= 4) core_in = 2;
> else if (g_ip->cores <= 8) core_in = 3;
> else if (g_ip->cores <= 16) core_in = 4;
> else {
> cout << "Number of cores should be <= 16!\n";
> exit(0);
> }
202,207c202,208
< // set the lower bound to an appropriate value. this depends on cache associativity
< if (g_ip->assoc > 2) {
< i = 2;
< while (i != g_ip->assoc) {
< MIN_BANKSIZE *= 2;
< i *= 2;
---
> // set the lower bound to an appropriate value. this depends on cache associativity
> if (g_ip->assoc > 2) {
> i = 2;
> while (i != g_ip->assoc) {
> MIN_BANKSIZE *= 2;
> i *= 2;
> }
209d209
< }
211c211
< iterations = (int)logtwo((int)g_ip->cache_sz/MIN_BANKSIZE);
---
> iterations = (int)logtwo((int)g_ip->cache_sz / MIN_BANKSIZE);
213,217c213,223
< if (g_ip->force_wiretype)
< {
< if (g_ip->wt == Low_swing) {
< wt_min = Low_swing;
< wt_max = Low_swing;
---
> if (g_ip->force_wiretype) {
> if (g_ip->wt == Low_swing) {
> wt_min = Low_swing;
> wt_max = Low_swing;
> } else {
> wt_min = Global;
> wt_max = Low_swing - 1;
> }
> } else {
> wt_min = Global;
> wt_max = Low_swing;
219,221c225,234
< else {
< wt_min = Global;
< wt_max = Low_swing-1;
---
> if (g_ip->nuca_bank_count != 0) { // simulate just one bank
> if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 &&
> g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 &&
> g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) {
> fprintf(stderr, "Incorrect bank count value! Please fix the ",
> "value in cache.cfg\n");
> }
> bank_start = (int)logtwo((double)g_ip->nuca_bank_count);
> iterations = bank_start + 1;
> g_ip->cache_sz = g_ip->cache_sz / g_ip->nuca_bank_count;
223,245c236,244
< }
< else {
< wt_min = Global;
< wt_max = Low_swing;
< }
< if (g_ip->nuca_bank_count != 0) { // simulate just one bank
< if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 &&
< g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 &&
< g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) {
< fprintf(stderr,"Incorrect bank count value! Please fix the value in cache.cfg\n");
< }
< bank_start = (int)logtwo((double)g_ip->nuca_bank_count);
< iterations = bank_start+1;
< g_ip->cache_sz = g_ip->cache_sz/g_ip->nuca_bank_count;
< }
< cout << "Simulating various NUCA configurations\n";
< for (it=bank_start; it<iterations; it++) { /* different bank count values */
< ures.tag_array2 = &tag;
< ures.data_array2 = &data;
< /*
< * find the optimal bank organization
< */
< solve(&ures);
---
> cout << "Simulating various NUCA configurations\n";
> for (it = bank_start; it < iterations; it++) {
> /* different bank count values */
> ures.tag_array2 = &tag;
> ures.data_array2 = &data;
> /*
> * find the optimal bank organization
> */
> solve(&ures);
247,248c246,247
< bank_count = g_ip->nuca_cache_sz/g_ip->cache_sz;
< cout << "====" << g_ip->cache_sz << "\n";
---
> bank_count = g_ip->nuca_cache_sz / g_ip->cache_sz;
> cout << "====" << g_ip->cache_sz << "\n";
250c249
< for (wr=wt_min; wr<=wt_max; wr++) {
---
> for (wr = wt_min; wr <= wt_max; wr++) {
252,255c251,253
< for (ro=0; ro<ROUTER_TYPES; ro++)
< {
< flit_width = (int) router_s[ro]->flit_size; //initialize router
< nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time;
---
> for (ro = 0; ro < ROUTER_TYPES; ro++) {
> flit_width = (int) router_s[ro]->flit_size; //initialize router
> nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time;
257c255
< /* calculate router and wire parameters */
---
> /* calculate router and wire parameters */
259,260c257,258
< double vlength = ures.cache_ht; /* length of the wire (u)*/
< double hlength = ures.cache_len; // u
---
> double vlength = ures.cache_ht; /* length of the wire (u)*/
> double hlength = ures.cache_len; // u
262,264c260,262
< /* find delay, area, and power for wires */
< wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength);
< wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength);
---
> /* find delay, area, and power for wires */
> wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength);
> wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength);
267,270c265,272
< hor_hop_lat = calc_cycles(wire_horizontal[wr]->delay,
< 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
< ver_hop_lat = calc_cycles(wire_vertical[wr]->delay,
< 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
---
> hor_hop_lat =
> calc_cycles(wire_horizontal[wr]->delay,
> 1 /(nuca_list.back()->nuca_pda.cycle_time *
> .001));
> ver_hop_lat =
> calc_cycles(wire_vertical[wr]->delay,
> 1 / (nuca_list.back()->nuca_pda.cycle_time *
> .001));
272,278c274,280
< /*
< * assume a grid like topology and explore for optimal network
< * configuration using different row and column count values.
< */
< for (c=1; c<=(unsigned int)bank_count; c++) {
< while (bank_count%c != 0) c++;
< r = bank_count/c;
---
> /*
> * assume a grid like topology and explore for optimal network
> * configuration using different row and column count values.
> */
> for (c = 1; c <= (unsigned int)bank_count; c++) {
> while (bank_count % c != 0) c++;
> r = bank_count / c;
280,301c282,303
< /*
< * to find the avg access latency of a NUCA cache, uncontended
< * access time to each bank from the
< * cache controller is calculated.
< * avg latency =
< * sum of the access latencies to individual banks)/bank
< * count value.
< */
< totno_hops = totno_hhops = totno_vhops = tot_lat = 0;
< k = 1;
< for (i=0; i<r; i++) {
< for (j=0; j<c; j++) {
< /*
< * vertical hops including the
< * first hop from the cache controller
< */
< curr_hop = i + 1;
< curr_hop += j; /* horizontal hops */
< totno_hhops += j;
< totno_vhops += (i+1);
< curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT +
< j * hor_hop_lat);
---
> /*
> * to find the avg access latency of a NUCA cache, uncontended
> * access time to each bank from the
> * cache controller is calculated.
> * avg latency =
> * sum of the access latencies to individual banks)/bank
> * count value.
> */
> totno_hops = totno_hhops = totno_vhops = tot_lat = 0;
> k = 1;
> for (i = 0; i < r; i++) {
> for (j = 0; j < c; j++) {
> /*
> * vertical hops including the
> * first hop from the cache controller
> */
> curr_hop = i + 1;
> curr_hop += j; /* horizontal hops */
> totno_hhops += j;
> totno_vhops += (i + 1);
> curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT +
> j * hor_hop_lat);
303,310c305,312
< tot_lat += curr_acclat;
< totno_hops += curr_hop;
< }
< }
< avg_lat = tot_lat/bank_count;
< avg_hop = totno_hops/bank_count;
< avg_hhop = totno_hhops/bank_count;
< avg_vhop = totno_vhops/bank_count;
---
> tot_lat += curr_acclat;
> totno_hops += curr_hop;
> }
> }
> avg_lat = tot_lat / bank_count;
> avg_hop = totno_hops / bank_count;
> avg_hhop = totno_hhops / bank_count;
> avg_vhop = totno_vhops / bank_count;
312,315c314,320
< /* net access latency */
< curr_acclat = 2*avg_lat + 2*(router_s[ro]->delay*avg_hop) +
< calc_cycles(ures.access_time,
< 1/(nuca_list.back()->nuca_pda.cycle_time*.001));
---
> /* net access latency */
> curr_acclat = 2 * avg_lat + 2 * (router_s[ro]->delay *
> avg_hop) +
> calc_cycles(ures.access_time,
> 1 /
> (nuca_list.back()->nuca_pda.cycle_time *
> .001));
317,324c322,329
< /* avg access lat of nuca */
< avg_dyn_power =
< avg_hop *
< (router_s[ro]->power.readOp.dynamic) + avg_hhop *
< (wire_horizontal[wr]->power.readOp.dynamic) *
< (g_ip->block_sz*8 + 64) + avg_vhop *
< (wire_vertical[wr]->power.readOp.dynamic) *
< (g_ip->block_sz*8 + 64) + ures.power.readOp.dynamic;
---
> /* avg access lat of nuca */
> avg_dyn_power =
> avg_hop *
> (router_s[ro]->power.readOp.dynamic) + avg_hhop *
> (wire_horizontal[wr]->power.readOp.dynamic) *
> (g_ip->block_sz * 8 + 64) + avg_vhop *
> (wire_vertical[wr]->power.readOp.dynamic) *
> (g_ip->block_sz * 8 + 64) + ures.power.readOp.dynamic;
326,331c331,336
< avg_leakage_power =
< bank_count * router_s[ro]->power.readOp.leakage +
< avg_hhop * (wire_horizontal[wr]->power.readOp.leakage*
< wire_horizontal[wr]->delay) * flit_width +
< avg_vhop * (wire_vertical[wr]->power.readOp.leakage *
< wire_horizontal[wr]->delay);
---
> avg_leakage_power =
> bank_count * router_s[ro]->power.readOp.leakage +
> avg_hhop * (wire_horizontal[wr]->power.readOp.leakage *
> wire_horizontal[wr]->delay) * flit_width +
> avg_vhop * (wire_vertical[wr]->power.readOp.leakage *
> wire_horizontal[wr]->delay);
333,358c338,363
< if (curr_acclat < opt_acclat) {
< opt_acclat = curr_acclat;
< opt_tot_lat = tot_lat;
< opt_avg_lat = avg_lat;
< opt_totno_hops = totno_hops;
< opt_avg_hop = avg_hop;
< opt_rows = r;
< opt_columns = c;
< opt_dyn_power = avg_dyn_power;
< opt_leakage_power = avg_leakage_power;
< }
< totno_hops = 0;
< tot_lat = 0;
< totno_hhops = 0;
< totno_vhops = 0;
< }
< nuca_list.back()->wire_pda.power.readOp.dynamic =
< opt_avg_hop * flit_width *
< (wire_horizontal[wr]->power.readOp.dynamic +
< wire_vertical[wr]->power.readOp.dynamic);
< nuca_list.back()->avg_hops = opt_avg_hop;
< /* network delay/power */
< nuca_list.back()->h_wire = wire_horizontal[wr];
< nuca_list.back()->v_wire = wire_vertical[wr];
< nuca_list.back()->router = router_s[ro];
< /* bank delay/power */
---
> if (curr_acclat < opt_acclat) {
> opt_acclat = curr_acclat;
> opt_tot_lat = tot_lat;
> opt_avg_lat = avg_lat;
> opt_totno_hops = totno_hops;
> opt_avg_hop = avg_hop;
> opt_rows = r;
> opt_columns = c;
> opt_dyn_power = avg_dyn_power;
> opt_leakage_power = avg_leakage_power;
> }
> totno_hops = 0;
> tot_lat = 0;
> totno_hhops = 0;
> totno_vhops = 0;
> }
> nuca_list.back()->wire_pda.power.readOp.dynamic =
> opt_avg_hop * flit_width *
> (wire_horizontal[wr]->power.readOp.dynamic +
> wire_vertical[wr]->power.readOp.dynamic);
> nuca_list.back()->avg_hops = opt_avg_hop;
> /* network delay/power */
> nuca_list.back()->h_wire = wire_horizontal[wr];
> nuca_list.back()->v_wire = wire_vertical[wr];
> nuca_list.back()->router = router_s[ro];
> /* bank delay/power */
360,364c365,369
< nuca_list.back()->bank_pda.delay = ures.access_time;
< nuca_list.back()->bank_pda.power = ures.power;
< nuca_list.back()->bank_pda.area.h = ures.cache_ht;
< nuca_list.back()->bank_pda.area.w = ures.cache_len;
< nuca_list.back()->bank_pda.cycle_time = ures.cycle_time;
---
> nuca_list.back()->bank_pda.delay = ures.access_time;
> nuca_list.back()->bank_pda.power = ures.power;
> nuca_list.back()->bank_pda.area.h = ures.cache_ht;
> nuca_list.back()->bank_pda.area.w = ures.cache_len;
> nuca_list.back()->bank_pda.cycle_time = ures.cycle_time;
366,369c371,376
< num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/,
< 1/(nuca_list.back()->nuca_pda.cycle_time*.001/*GHz*/));
< if(num_cyc%2 != 0) num_cyc++;
< if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles
---
> num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/,
> 1 /
> (nuca_list.back()->nuca_pda.cycle_time *
> .001/*GHz*/));
> if (num_cyc % 2 != 0) num_cyc++;
> if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles
371,384c378,390
< if (it < 7) {
< nuca_list.back()->nuca_pda.delay = opt_acclat +
< cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
< nuca_list.back()->contention =
< cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
< }
< else {
< nuca_list.back()->nuca_pda.delay = opt_acclat +
< cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
< nuca_list.back()->contention =
< cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
< }
< nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power;
< nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power;
---
> if (it < 7) {
> nuca_list.back()->nuca_pda.delay = opt_acclat +
> cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
> nuca_list.back()->contention =
> cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
> } else {
> nuca_list.back()->nuca_pda.delay = opt_acclat +
> cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
> nuca_list.back()->contention =
> cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
> }
> nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power;
> nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power;
386,390c392,396
< /* array organization */
< nuca_list.back()->bank_count = bank_count;
< nuca_list.back()->rows = opt_rows;
< nuca_list.back()->columns = opt_columns;
< calculate_nuca_area (nuca_list.back());
---
> /* array organization */
> nuca_list.back()->bank_count = bank_count;
> nuca_list.back()->rows = opt_rows;
> nuca_list.back()->columns = opt_columns;
> calculate_nuca_area (nuca_list.back());
392,394c398,400
< minval.update_min_values(nuca_list.back());
< nuca_list.push_back(new nuca_org_t());
< opt_acclat = BIGNUM;
---
> minval.update_min_values(nuca_list.back());
> nuca_list.push_back(new nuca_org_t());
> opt_acclat = BIGNUM;
396c402,404
< }
---
> }
> }
> g_ip->cache_sz /= 2;
398,399d405
< g_ip->cache_sz /= 2;
< }
401,405c407,411
< delete(nuca_list.back());
< nuca_list.pop_back();
< opt_n = find_optimal_nuca(&nuca_list, &minval);
< print_nuca(opt_n);
< g_ip->cache_sz = g_ip->nuca_cache_sz/opt_n->bank_count;
---
> delete(nuca_list.back());
> nuca_list.pop_back();
> opt_n = find_optimal_nuca(&nuca_list, &minval);
> print_nuca(opt_n);
> g_ip->cache_sz = g_ip->nuca_cache_sz / opt_n->bank_count;
407,412c413,417
< list<nuca_org_t *>::iterator niter;
< for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter)
< {
< delete *niter;
< }
< nuca_list.clear();
---
> list<nuca_org_t *>::iterator niter;
> for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) {
> delete *niter;
> }
> nuca_list.clear();
414,425c419,429
< for(int i=0; i < ROUTER_TYPES; i++)
< {
< delete router_s[i];
< }
< g_ip->display_ip();
< // g_ip->force_cache_config = true;
< // g_ip->ndwl = 8;
< // g_ip->ndbl = 16;
< // g_ip->nspd = 4;
< // g_ip->ndcm = 1;
< // g_ip->ndsam1 = 8;
< // g_ip->ndsam2 = 32;
---
> for (int i = 0; i < ROUTER_TYPES; i++) {
> delete router_s[i];
> }
> g_ip->display_ip();
> // g_ip->force_cache_config = true;
> // g_ip->ndwl = 8;
> // g_ip->ndbl = 16;
> // g_ip->nspd = 4;
> // g_ip->ndcm = 1;
> // g_ip->ndsam1 = 8;
> // g_ip->ndsam2 = 32;
430,442c434,445
< void
< Nuca::print_nuca (nuca_org_t *fr)
< {
< printf("\n---------- CACTI version 6.5, Non-uniform Cache Access "
< "----------\n\n");
< printf("Optimal number of banks - %d\n", fr->bank_count);
< printf("Grid organization rows x columns - %d x %d\n",
< fr->rows, fr->columns);
< printf("Network frequency - %g GHz\n",
< (1/fr->nuca_pda.cycle_time)*1e3);
< printf("Cache dimension (mm x mm) - %g x %g\n",
< fr->nuca_pda.area.h,
< fr->nuca_pda.area.w);
---
> void
> Nuca::print_nuca (nuca_org_t *fr) {
> printf("\n---------- CACTI version 6.5, Non-uniform Cache Access "
> "----------\n\n");
> printf("Optimal number of banks - %d\n", fr->bank_count);
> printf("Grid organization rows x columns - %d x %d\n",
> fr->rows, fr->columns);
> printf("Network frequency - %g GHz\n",
> (1 / fr->nuca_pda.cycle_time)*1e3);
> printf("Cache dimension (mm x mm) - %g x %g\n",
> fr->nuca_pda.area.h,
> fr->nuca_pda.area.w);
444c447
< fr->router->print_router();
---
> fr->router->print_router();
446,469c449,467
< printf("\n\nWire stats:\n");
< if (fr->h_wire->wt == Global) {
< printf("\tWire type - Full swing global wires with least "
< "possible delay\n");
< }
< else if (fr->h_wire->wt == Global_5) {
< printf("\tWire type - Full swing global wires with "
< "5%% delay penalty\n");
< }
< else if (fr->h_wire->wt == Global_10) {
< printf("\tWire type - Full swing global wires with "
< "10%% delay penalty\n");
< }
< else if (fr->h_wire->wt == Global_20) {
< printf("\tWire type - Full swing global wires with "
< "20%% delay penalty\n");
< }
< else if (fr->h_wire->wt == Global_30) {
< printf("\tWire type - Full swing global wires with "
< "30%% delay penalty\n");
< }
< else if(fr->h_wire->wt == Low_swing) {
< printf("\tWire type - Low swing wires\n");
< }
---
> printf("\n\nWire stats:\n");
> if (fr->h_wire->wt == Global) {
> printf("\tWire type - Full swing global wires with least "
> "possible delay\n");
> } else if (fr->h_wire->wt == Global_5) {
> printf("\tWire type - Full swing global wires with "
> "5%% delay penalty\n");
> } else if (fr->h_wire->wt == Global_10) {
> printf("\tWire type - Full swing global wires with "
> "10%% delay penalty\n");
> } else if (fr->h_wire->wt == Global_20) {
> printf("\tWire type - Full swing global wires with "
> "20%% delay penalty\n");
> } else if (fr->h_wire->wt == Global_30) {
> printf("\tWire type - Full swing global wires with "
> "30%% delay penalty\n");
> } else if (fr->h_wire->wt == Low_swing) {
> printf("\tWire type - Low swing wires\n");
> }
471,487c469,485
< printf("\tHorizontal link delay - %g (ns)\n",
< fr->h_wire->delay*1e9);
< printf("\tVertical link delay - %g (ns)\n",
< fr->v_wire->delay*1e9);
< printf("\tDelay/length - %g (ns/mm)\n",
< fr->h_wire->delay*1e9/fr->bank_pda.area.w);
< printf("\tHorizontal link energy -dynamic/access %g (nJ)\n"
< "\t -leakage %g (nW)\n\n",
< fr->h_wire->power.readOp.dynamic*1e9,
< fr->h_wire->power.readOp.leakage*1e9);
< printf("\tVertical link energy -dynamic/access %g (nJ)\n"
< "\t -leakage %g (nW)\n\n",
< fr->v_wire->power.readOp.dynamic*1e9,
< fr->v_wire->power.readOp.leakage*1e9);
< printf("\n\n");
< fr->v_wire->print_wire();
< printf("\n\nBank stats:\n");
---
> printf("\tHorizontal link delay - %g (ns)\n",
> fr->h_wire->delay*1e9);
> printf("\tVertical link delay - %g (ns)\n",
> fr->v_wire->delay*1e9);
> printf("\tDelay/length - %g (ns/mm)\n",
> fr->h_wire->delay*1e9 / fr->bank_pda.area.w);
> printf("\tHorizontal link energy -dynamic/access %g (nJ)\n"
> "\t -leakage %g (nW)\n\n",
> fr->h_wire->power.readOp.dynamic*1e9,
> fr->h_wire->power.readOp.leakage*1e9);
> printf("\tVertical link energy -dynamic/access %g (nJ)\n"
> "\t -leakage %g (nW)\n\n",
> fr->v_wire->power.readOp.dynamic*1e9,
> fr->v_wire->power.readOp.leakage*1e9);
> printf("\n\n");
> fr->v_wire->print_wire();
> printf("\n\nBank stats:\n");
491,503c489,500
< nuca_org_t *
< Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval)
< {
< double cost = 0;
< double min_cost = BIGNUM;
< nuca_org_t *res = NULL;
< float d, a, dp, lp, c;
< int v;
< dp = g_ip->dynamic_power_wt_nuca;
< lp = g_ip->leakage_power_wt_nuca;
< a = g_ip->area_wt_nuca;
< d = g_ip->delay_wt_nuca;
< c = g_ip->cycle_time_wt_nuca;
---
> nuca_org_t *
> Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval) {
> double cost = 0;
> double min_cost = BIGNUM;
> nuca_org_t *res = NULL;
> float d, a, dp, lp, c;
> int v;
> dp = g_ip->dynamic_power_wt_nuca;
> lp = g_ip->leakage_power_wt_nuca;
> a = g_ip->area_wt_nuca;
> d = g_ip->delay_wt_nuca;
> c = g_ip->cycle_time_wt_nuca;
505c502
< list<nuca_org_t *>::iterator niter;
---
> list<nuca_org_t *>::iterator niter;
508,510c505,507
< for (niter = n->begin(); niter != n->end(); niter++) {
< fprintf(stderr, "\n-----------------------------"
< "---------------\n");
---
> for (niter = n->begin(); niter != n->end(); niter++) {
> fprintf(stderr, "\n-----------------------------"
> "---------------\n");
513,521c510,518
< printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t "
< "bank_dpower = %g \tleak = %g \tcycle = %g\n",
< (*niter)->bank_count,
< (*niter)->nuca_pda.delay,
< (*niter)->nuca_pda.power.readOp.dynamic,
< (*niter)->h_wire->wt,
< (*niter)->bank_pda.power.readOp.dynamic,
< (*niter)->nuca_pda.power.readOp.leakage,
< (*niter)->nuca_pda.cycle_time);
---
> printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t "
> "bank_dpower = %g \tleak = %g \tcycle = %g\n",
> (*niter)->bank_count,
> (*niter)->nuca_pda.delay,
> (*niter)->nuca_pda.power.readOp.dynamic,
> (*niter)->h_wire->wt,
> (*niter)->bank_pda.power.readOp.dynamic,
> (*niter)->nuca_pda.power.readOp.leakage,
> (*niter)->nuca_pda.cycle_time);
524,547c521,542
< if (g_ip->ed == 1) {
< cost = ((*niter)->nuca_pda.delay/minval->min_delay)*
< ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn);
< if (min_cost > cost) {
< min_cost = cost;
< res = ((*niter));
< }
< }
< else if (g_ip->ed == 2) {
< cost = ((*niter)->nuca_pda.delay/minval->min_delay)*
< ((*niter)->nuca_pda.delay/minval->min_delay)*
< ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn);
< if (min_cost > cost) {
< min_cost = cost;
< res = ((*niter));
< }
< }
< else {
< /*
< * check whether the current organization
< * meets the input deviation constraints
< */
< v = check_nuca_org((*niter), minval);
< if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
---
> if (g_ip->ed == 1) {
> cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
> ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
> if (min_cost > cost) {
> min_cost = cost;
> res = ((*niter));
> }
> } else if (g_ip->ed == 2) {
> cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
> ((*niter)->nuca_pda.delay / minval->min_delay) *
> ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
> if (min_cost > cost) {
> min_cost = cost;
> res = ((*niter));
> }
> } else {
> /*
> * check whether the current organization
> * meets the input deviation constraints
> */
> v = check_nuca_org((*niter), minval);
> if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
549,555c544,553
< if (v) {
< cost = (d * ((*niter)->nuca_pda.delay/minval->min_delay) +
< c * ((*niter)->nuca_pda.cycle_time/minval->min_cyc) +
< dp * ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn) +
< lp * ((*niter)->nuca_pda.power.readOp.leakage/minval->min_leakage) +
< a * ((*niter)->nuca_pda.area.get_area()/minval->min_area));
< fprintf(stderr, "cost = %g\n", cost);
---
> if (v) {
> cost = (d * ((*niter)->nuca_pda.delay / minval->min_delay) +
> c * ((*niter)->nuca_pda.cycle_time / minval->min_cyc) +
> dp * ((*niter)->nuca_pda.power.readOp.dynamic /
> minval->min_dyn) +
> lp * ((*niter)->nuca_pda.power.readOp.leakage /
> minval->min_leakage) +
> a * ((*niter)->nuca_pda.area.get_area() /
> minval->min_area));
> fprintf(stderr, "cost = %g\n", cost);
557,559c555,563
< if (min_cost > cost) {
< min_cost = cost;
< res = ((*niter));
---
> if (min_cost > cost) {
> min_cost = cost;
> res = ((*niter));
> }
> } else {
> niter = n->erase(niter);
> if (niter != n->begin())
> niter --;
> }
561,566d564
< }
< else {
< niter = n->erase(niter);
< if (niter !=n->begin())
< niter --;
< }
568,569c566
< }
< return res;
---
> return res;
572,594c569,594
< int
< Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval)
< {
< if (((n->nuca_pda.delay - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev_nuca) {
< return 0;
< }
< if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
< g_ip->dynamic_power_dev_nuca) {
< return 0;
< }
< if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
< g_ip->leakage_power_dev_nuca) {
< return 0;
< }
< if (((n->nuca_pda.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
< g_ip->cycle_time_dev_nuca) {
< return 0;
< }
< if (((n->nuca_pda.area.get_area() - minval->min_area)/minval->min_area)*100 >
< g_ip->area_dev_nuca) {
< return 0;
< }
< return 1;
---
> int
> Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) {
> if (((n->nuca_pda.delay - minval->min_delay)*100 / minval->min_delay) >
> g_ip->delay_dev_nuca) {
> return 0;
> }
> if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn) /
> minval->min_dyn)*100 >
> g_ip->dynamic_power_dev_nuca) {
> return 0;
> }
> if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage) /
> minval->min_leakage)*100 >
> g_ip->leakage_power_dev_nuca) {
> return 0;
> }
> if (((n->nuca_pda.cycle_time - minval->min_cyc) / minval->min_cyc)*100 >
> g_ip->cycle_time_dev_nuca) {
> return 0;
> }
> if (((n->nuca_pda.area.get_area() - minval->min_area) / minval->min_area) *
> 100 >
> g_ip->area_dev_nuca) {
> return 0;
> }
> return 1;
597,604c597,603
< void
< Nuca::calculate_nuca_area (nuca_org_t *nuca)
< {
< nuca->nuca_pda.area.h=
< nuca->rows * ((nuca->h_wire->wire_width +
< nuca->h_wire->wire_spacing)
< * nuca->router->flit_size +
< nuca->bank_pda.area.h);
---
> void
> Nuca::calculate_nuca_area (nuca_org_t *nuca) {
> nuca->nuca_pda.area.h =
> nuca->rows * ((nuca->h_wire->wire_width +
> nuca->h_wire->wire_spacing)
> * nuca->router->flit_size +
> nuca->bank_pda.area.h);
606,610c605,609
< nuca->nuca_pda.area.w =
< nuca->columns * ((nuca->v_wire->wire_width +
< nuca->v_wire->wire_spacing)
< * nuca->router->flit_size +
< nuca->bank_pda.area.w);
---
> nuca->nuca_pda.area.w =
> nuca->columns * ((nuca->v_wire->wire_width +
> nuca->v_wire->wire_spacing)
> * nuca->router->flit_size +
> nuca->bank_pda.area.w);