1/*****************************************************************************
2 *                                McPAT/CACTI
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
6 *                          All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 ***************************************************************************/
32
33
34
35#include <cassert>
36
37#include "Ucache.h"
38#include "nuca.h"
39
40unsigned int MIN_BANKSIZE = 65536;
41#define FIXED_OVERHEAD 55e-12 /* clock skew and jitter in s. Ref: Hrishikesh et al ISCA 01 */
42#define LATCH_DELAY 28e-12 /* latch delay in s (later should use FO4 TODO) */
43#define CONTR_2_BANK_LAT 0
44
45int cont_stats[2 /*l2 or l3*/][5/* cores */][ROUTER_TYPES][7 /*banks*/][8 /* cycle time */];
46
47Nuca::Nuca(
48    TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
49): deviceType(dt) {
50    init_cont();
51}
52
53void
54Nuca::init_cont() {
55    FILE *cont;
56    char line[5000];
57    char jk[5000];
58    cont = fopen("contention.dat", "r");
59    if (!cont) {
60        cout << "contention.dat file is missing!\n";
61        exit(0);
62    }
63
64    for (int i = 0; i < 2; i++) {
65        for (int j = 2; j < 5; j++) {
66            for (int k = 0; k < ROUTER_TYPES; k++) {
67                for (int l = 0; l < 7; l++) {
68                    int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/];
69                    assert(fscanf(cont, "%[^\n]\n", line) != EOF);
70                    sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d", jk,
71                           &temp[0], &temp[1], &temp[2], &temp[3],
72                           &temp[4], &temp[5], &temp[6], &temp[7]);
73                }
74            }
75        }
76    }
77    fclose(cont);
78}
79
80void
81Nuca::print_cont_stats() {
82    for (int i = 0; i < 2; i++) {
83        for (int j = 2; j < 5; j++) {
84            for (int k = 0; k < ROUTER_TYPES; k++) {
85                for (int l = 0; l < 7; l++) {
86                    for (int m = 0; l < 7; l++) {
87                        cout << cont_stats[i][j][k][l][m] << " ";
88                    }
89                    cout << endl;
90                }
91            }
92        }
93    }
94    cout << endl;
95}
96
97Nuca::~Nuca() {
98    for (int i = wt_min; i <= wt_max; i++) {
99        delete wire_vertical[i];
100        delete wire_horizontal[i];
101    }
102}
103
104/* converts latency (in s) to cycles depending upon the FREQUENCY (in GHz) */
105int
106Nuca::calc_cycles(double lat, double oper_freq) {
107    //TODO: convert latch delay to FO4 */
108    double cycle_time = (1.0 / (oper_freq * 1e9)); /*s*/
109    cycle_time -= LATCH_DELAY;
110    cycle_time -= FIXED_OVERHEAD;
111
112    return (int)ceil(lat / cycle_time);
113}
114
115
116nuca_org_t::~nuca_org_t() {
117    // if(h_wire) delete h_wire;
118    // if(v_wire) delete v_wire;
119    // if(router) delete router;
120}
121
122/*
123 * Version - 6.0
124 *
125 * Perform exhaustive search across different bank organizatons,
126 * router configurations, grid organizations, and wire models and
127 * find an optimal NUCA organization
128 * For different bank count values
129 * 1. Optimal bank organization is calculated
130 * 2. For each bank organization, find different NUCA organizations
131 *    using various router configurations, grid organizations,
132 *    and wire models.
133 * 3. NUCA model with the least cost is picked for
134 *    this particular bank count
135 * Finally include contention statistics and find the optimal
136 *    NUCA configuration
137 */
138void
139Nuca::sim_nuca() {
140    /* temp variables */
141    int it, ro, wr;
142    int num_cyc;
143    unsigned int i, j, k;
144    unsigned int r, c;
145    int l2_c;
146    int bank_count = 0;
147    uca_org_t ures;
148    nuca_org_t *opt_n;
149    mem_array tag, data;
150    list<nuca_org_t *> nuca_list;
151    Router *router_s[ROUTER_TYPES];
152    router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global));
153    router_s[0]->print_router();
154    router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global));
155    router_s[1]->print_router();
156    router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global));
157    router_s[2]->print_router();
158
159    int core_in; // to store no. of cores
160
161    /* to search diff grid organizations */
162    double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat,
163    curr_acclat;
164    double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power,
165    avg_leakage_power;
166
167    double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF;
168    int opt_rows = 0;
169    int opt_columns = 0;
170    double opt_totno_hops = 0;
171    double opt_avg_hop = 0;
172    double opt_dyn_power = 0, opt_leakage_power = 0;
173    min_values_t minval;
174
175    int bank_start = 0;
176
177    int flit_width = 0;
178
179    /* vertical and horizontal hop latency values */
180    int ver_hop_lat, hor_hop_lat; /* in cycles */
181
182
183    /* no. of different bank sizes to consider */
184    int iterations;
185
186
187    g_ip->nuca_cache_sz = g_ip->cache_sz;
188    nuca_list.push_back(new nuca_org_t());
189
190    if (g_ip->cache_level == 0) l2_c = 1;
191    else l2_c = 0;
192
193    if (g_ip->cores <= 4) core_in = 2;
194    else if (g_ip->cores <= 8) core_in = 3;
195    else if (g_ip->cores <= 16) core_in = 4;
196    else {
197        cout << "Number of cores should be <= 16!\n";
198        exit(0);
199    }
200
201
202    // set the lower bound to an appropriate value. this depends on cache associativity
203    if (g_ip->assoc > 2) {
204        i = 2;
205        while (i != g_ip->assoc) {
206            MIN_BANKSIZE *= 2;
207            i *= 2;
208        }
209    }
210
211    iterations = (int)logtwo((int)g_ip->cache_sz / MIN_BANKSIZE);
212
213    if (g_ip->force_wiretype) {
214        if (g_ip->wt == Low_swing) {
215            wt_min = Low_swing;
216            wt_max = Low_swing;
217        } else {
218            wt_min = Global;
219            wt_max = Low_swing - 1;
220        }
221    } else {
222        wt_min = Global;
223        wt_max = Low_swing;
224    }
225    if (g_ip->nuca_bank_count != 0) { // simulate just one bank
226        if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 &&
227                g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 &&
228                g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) {
229            fprintf(stderr, "Incorrect bank count value! Please fix the ",
230                    "value in cache.cfg\n");
231        }
232        bank_start = (int)logtwo((double)g_ip->nuca_bank_count);
233        iterations = bank_start + 1;
234        g_ip->cache_sz = g_ip->cache_sz / g_ip->nuca_bank_count;
235    }
236    cout << "Simulating various NUCA configurations\n";
237    for (it = bank_start; it < iterations; it++) {
238        /* different bank count values */
239        ures.tag_array2 = &tag;
240        ures.data_array2 = &data;
241        /*
242         * find the optimal bank organization
243         */
244        solve(&ures);
245//    output_UCA(&ures);
246        bank_count = g_ip->nuca_cache_sz / g_ip->cache_sz;
247        cout << "====" <<  g_ip->cache_sz << "\n";
248
249        for (wr = wt_min; wr <= wt_max; wr++) {
250
251            for (ro = 0; ro < ROUTER_TYPES; ro++) {
252                flit_width = (int) router_s[ro]->flit_size; //initialize router
253                nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time;
254
255                /* calculate router and wire parameters */
256
257                double vlength = ures.cache_ht; /* length of the wire (u)*/
258                double hlength = ures.cache_len; // u
259
260                /* find delay, area, and power for wires */
261                wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength);
262                wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength);
263
264
265                hor_hop_lat =
266                    calc_cycles(wire_horizontal[wr]->delay,
267                                1 /(nuca_list.back()->nuca_pda.cycle_time *
268                                    .001));
269                ver_hop_lat =
270                    calc_cycles(wire_vertical[wr]->delay,
271                                1 / (nuca_list.back()->nuca_pda.cycle_time *
272                                     .001));
273
274                /*
275                 * assume a grid like topology and explore for optimal network
276                 * configuration using different row and column count values.
277                 */
278                for (c = 1; c <= (unsigned int)bank_count; c++) {
279                    while (bank_count % c != 0) c++;
280                    r = bank_count / c;
281
282                    /*
283                     * to find the avg access latency of a NUCA cache, uncontended
284                     * access time to each bank from the
285                     * cache controller is calculated.
286                     * avg latency =
287                     * sum of the access latencies to individual banks)/bank
288                     * count value.
289                     */
290                    totno_hops = totno_hhops = totno_vhops = tot_lat = 0;
291                    k = 1;
292                    for (i = 0; i < r; i++) {
293                        for (j = 0; j < c; j++) {
294                            /*
295                             * vertical hops including the
296                             * first hop from the cache controller
297                             */
298                            curr_hop = i + 1;
299                            curr_hop += j; /* horizontal hops */
300                            totno_hhops += j;
301                            totno_vhops += (i + 1);
302                            curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT +
303                                           j * hor_hop_lat);
304
305                            tot_lat += curr_acclat;
306                            totno_hops += curr_hop;
307                        }
308                    }
309                    avg_lat = tot_lat / bank_count;
310                    avg_hop = totno_hops / bank_count;
311                    avg_hhop = totno_hhops / bank_count;
312                    avg_vhop = totno_vhops / bank_count;
313
314                    /* net access latency */
315                    curr_acclat = 2 * avg_lat + 2 * (router_s[ro]->delay *
316                                                     avg_hop) +
317                        calc_cycles(ures.access_time,
318                                    1 /
319                                    (nuca_list.back()->nuca_pda.cycle_time *
320                                     .001));
321
322                    /* avg access lat of nuca */
323                    avg_dyn_power =
324                        avg_hop *
325                        (router_s[ro]->power.readOp.dynamic) + avg_hhop *
326                        (wire_horizontal[wr]->power.readOp.dynamic) *
327                        (g_ip->block_sz * 8 + 64) + avg_vhop *
328                        (wire_vertical[wr]->power.readOp.dynamic) *
329                        (g_ip->block_sz * 8 + 64) + ures.power.readOp.dynamic;
330
331                    avg_leakage_power =
332                        bank_count * router_s[ro]->power.readOp.leakage +
333                        avg_hhop * (wire_horizontal[wr]->power.readOp.leakage *
334                                    wire_horizontal[wr]->delay) * flit_width +
335                        avg_vhop * (wire_vertical[wr]->power.readOp.leakage *
336                                    wire_horizontal[wr]->delay);
337
338                    if (curr_acclat < opt_acclat) {
339                        opt_acclat = curr_acclat;
340                        opt_tot_lat = tot_lat;
341                        opt_avg_lat = avg_lat;
342                        opt_totno_hops = totno_hops;
343                        opt_avg_hop = avg_hop;
344                        opt_rows = r;
345                        opt_columns = c;
346                        opt_dyn_power = avg_dyn_power;
347                        opt_leakage_power = avg_leakage_power;
348                    }
349                    totno_hops = 0;
350                    tot_lat = 0;
351                    totno_hhops = 0;
352                    totno_vhops = 0;
353                }
354                nuca_list.back()->wire_pda.power.readOp.dynamic =
355                    opt_avg_hop * flit_width *
356                    (wire_horizontal[wr]->power.readOp.dynamic +
357                     wire_vertical[wr]->power.readOp.dynamic);
358                nuca_list.back()->avg_hops = opt_avg_hop;
359                /* network delay/power */
360                nuca_list.back()->h_wire = wire_horizontal[wr];
361                nuca_list.back()->v_wire = wire_vertical[wr];
362                nuca_list.back()->router = router_s[ro];
363                /* bank delay/power */
364
365                nuca_list.back()->bank_pda.delay = ures.access_time;
366                nuca_list.back()->bank_pda.power = ures.power;
367                nuca_list.back()->bank_pda.area.h = ures.cache_ht;
368                nuca_list.back()->bank_pda.area.w = ures.cache_len;
369                nuca_list.back()->bank_pda.cycle_time = ures.cycle_time;
370
371                num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/,
372                                      1 /
373                                      (nuca_list.back()->nuca_pda.cycle_time *
374                                       .001/*GHz*/));
375                if (num_cyc % 2 != 0) num_cyc++;
376                if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles
377
378                if (it < 7) {
379                    nuca_list.back()->nuca_pda.delay = opt_acclat +
380                                                       cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
381                    nuca_list.back()->contention =
382                        cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
383                } else {
384                    nuca_list.back()->nuca_pda.delay = opt_acclat +
385                                                       cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
386                    nuca_list.back()->contention =
387                        cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
388                }
389                nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power;
390                nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power;
391
392                /* array organization */
393                nuca_list.back()->bank_count = bank_count;
394                nuca_list.back()->rows = opt_rows;
395                nuca_list.back()->columns = opt_columns;
396                calculate_nuca_area (nuca_list.back());
397
398                minval.update_min_values(nuca_list.back());
399                nuca_list.push_back(new nuca_org_t());
400                opt_acclat = BIGNUM;
401
402            }
403        }
404        g_ip->cache_sz /= 2;
405    }
406
407    delete(nuca_list.back());
408    nuca_list.pop_back();
409    opt_n = find_optimal_nuca(&nuca_list, &minval);
410    print_nuca(opt_n);
411    g_ip->cache_sz = g_ip->nuca_cache_sz / opt_n->bank_count;
412
413    list<nuca_org_t *>::iterator niter;
414    for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) {
415        delete *niter;
416    }
417    nuca_list.clear();
418
419    for (int i = 0; i < ROUTER_TYPES; i++) {
420        delete router_s[i];
421    }
422    g_ip->display_ip();
423    //  g_ip->force_cache_config = true;
424    //  g_ip->ndwl = 8;
425    //  g_ip->ndbl = 16;
426    //  g_ip->nspd = 4;
427    //  g_ip->ndcm = 1;
428    //  g_ip->ndsam1 = 8;
429    //  g_ip->ndsam2 = 32;
430
431}
432
433
434void
435Nuca::print_nuca (nuca_org_t *fr) {
436    printf("\n---------- CACTI version 6.5, Non-uniform Cache Access "
437           "----------\n\n");
438    printf("Optimal number of banks - %d\n", fr->bank_count);
439    printf("Grid organization rows x columns - %d x %d\n",
440           fr->rows, fr->columns);
441    printf("Network frequency - %g GHz\n",
442           (1 / fr->nuca_pda.cycle_time)*1e3);
443    printf("Cache dimension (mm x mm) - %g x %g\n",
444           fr->nuca_pda.area.h,
445           fr->nuca_pda.area.w);
446
447    fr->router->print_router();
448
449    printf("\n\nWire stats:\n");
450    if (fr->h_wire->wt == Global) {
451        printf("\tWire type - Full swing global wires with least "
452               "possible delay\n");
453    } else if (fr->h_wire->wt == Global_5) {
454        printf("\tWire type - Full swing global wires with "
455               "5%% delay penalty\n");
456    } else if (fr->h_wire->wt == Global_10) {
457        printf("\tWire type - Full swing global wires with "
458               "10%% delay penalty\n");
459    } else if (fr->h_wire->wt == Global_20) {
460        printf("\tWire type - Full swing global wires with "
461               "20%% delay penalty\n");
462    } else if (fr->h_wire->wt == Global_30) {
463        printf("\tWire type - Full swing global wires with "
464               "30%% delay penalty\n");
465    } else if (fr->h_wire->wt == Low_swing) {
466        printf("\tWire type - Low swing wires\n");
467    }
468
469    printf("\tHorizontal link delay - %g (ns)\n",
470           fr->h_wire->delay*1e9);
471    printf("\tVertical link delay - %g (ns)\n",
472           fr->v_wire->delay*1e9);
473    printf("\tDelay/length - %g (ns/mm)\n",
474           fr->h_wire->delay*1e9 / fr->bank_pda.area.w);
475    printf("\tHorizontal link energy -dynamic/access %g (nJ)\n"
476           "\t                       -leakage %g (nW)\n\n",
477           fr->h_wire->power.readOp.dynamic*1e9,
478           fr->h_wire->power.readOp.leakage*1e9);
479    printf("\tVertical link energy -dynamic/access %g (nJ)\n"
480           "\t                     -leakage %g (nW)\n\n",
481           fr->v_wire->power.readOp.dynamic*1e9,
482           fr->v_wire->power.readOp.leakage*1e9);
483    printf("\n\n");
484    fr->v_wire->print_wire();
485    printf("\n\nBank stats:\n");
486}
487
488
489nuca_org_t *
490Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval) {
491    double cost = 0;
492    double min_cost = BIGNUM;
493    nuca_org_t *res = NULL;
494    float d, a, dp, lp, c;
495    int v;
496    dp = g_ip->dynamic_power_wt_nuca;
497    lp = g_ip->leakage_power_wt_nuca;
498    a = g_ip->area_wt_nuca;
499    d = g_ip->delay_wt_nuca;
500    c = g_ip->cycle_time_wt_nuca;
501
502    list<nuca_org_t *>::iterator niter;
503
504
505    for (niter = n->begin(); niter != n->end(); niter++) {
506        fprintf(stderr, "\n-----------------------------"
507                "---------------\n");
508
509
510        printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t "
511               "bank_dpower = %g \tleak = %g \tcycle = %g\n",
512               (*niter)->bank_count,
513               (*niter)->nuca_pda.delay,
514               (*niter)->nuca_pda.power.readOp.dynamic,
515               (*niter)->h_wire->wt,
516               (*niter)->bank_pda.power.readOp.dynamic,
517               (*niter)->nuca_pda.power.readOp.leakage,
518               (*niter)->nuca_pda.cycle_time);
519
520
521        if (g_ip->ed == 1) {
522            cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
523                   ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
524            if (min_cost > cost) {
525                min_cost = cost;
526                res = ((*niter));
527            }
528        } else if (g_ip->ed == 2) {
529            cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
530                   ((*niter)->nuca_pda.delay / minval->min_delay) *
531                   ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
532            if (min_cost > cost) {
533                min_cost = cost;
534                res = ((*niter));
535            }
536        } else {
537            /*
538             * check whether the current organization
539             * meets the input deviation constraints
540             */
541            v = check_nuca_org((*niter), minval);
542            if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
543
544            if (v) {
545                cost = (d  * ((*niter)->nuca_pda.delay / minval->min_delay) +
546                        c  * ((*niter)->nuca_pda.cycle_time / minval->min_cyc) +
547                        dp * ((*niter)->nuca_pda.power.readOp.dynamic /
548                              minval->min_dyn) +
549                        lp * ((*niter)->nuca_pda.power.readOp.leakage /
550                              minval->min_leakage) +
551                        a  * ((*niter)->nuca_pda.area.get_area() /
552                              minval->min_area));
553                fprintf(stderr, "cost = %g\n", cost);
554
555                if (min_cost > cost) {
556                    min_cost = cost;
557                    res = ((*niter));
558                }
559            } else {
560                niter = n->erase(niter);
561                if (niter != n->begin())
562                    niter --;
563            }
564        }
565    }
566    return res;
567}
568
569int
570Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) {
571    if (((n->nuca_pda.delay - minval->min_delay)*100 / minval->min_delay) >
572        g_ip->delay_dev_nuca) {
573        return 0;
574    }
575    if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn) /
576         minval->min_dyn)*100 >
577        g_ip->dynamic_power_dev_nuca) {
578        return 0;
579    }
580    if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage) /
581         minval->min_leakage)*100 >
582        g_ip->leakage_power_dev_nuca) {
583        return 0;
584    }
585    if (((n->nuca_pda.cycle_time - minval->min_cyc) / minval->min_cyc)*100 >
586        g_ip->cycle_time_dev_nuca) {
587        return 0;
588    }
589    if (((n->nuca_pda.area.get_area() - minval->min_area) / minval->min_area) *
590        100 >
591        g_ip->area_dev_nuca) {
592        return 0;
593    }
594    return 1;
595}
596
597void
598Nuca::calculate_nuca_area (nuca_org_t *nuca) {
599    nuca->nuca_pda.area.h =
600        nuca->rows * ((nuca->h_wire->wire_width +
601                       nuca->h_wire->wire_spacing)
602                      * nuca->router->flit_size +
603                      nuca->bank_pda.area.h);
604
605    nuca->nuca_pda.area.w =
606        nuca->columns * ((nuca->v_wire->wire_width +
607                          nuca->v_wire->wire_spacing)
608                         * nuca->router->flit_size +
609                         nuca->bank_pda.area.w);
610}
611
612