110152Satgutier@umich.edu/*****************************************************************************
210152Satgutier@umich.edu *                                McPAT/CACTI
310152Satgutier@umich.edu *                      SOFTWARE LICENSE AGREEMENT
410152Satgutier@umich.edu *            Copyright 2012 Hewlett-Packard Development Company, L.P.
510234Syasuko.eckert@amd.com *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
610152Satgutier@umich.edu *                          All Rights Reserved
710152Satgutier@umich.edu *
810152Satgutier@umich.edu * Redistribution and use in source and binary forms, with or without
910152Satgutier@umich.edu * modification, are permitted provided that the following conditions are
1010152Satgutier@umich.edu * met: redistributions of source code must retain the above copyright
1110152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer;
1210152Satgutier@umich.edu * redistributions in binary form must reproduce the above copyright
1310152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer in the
1410152Satgutier@umich.edu * documentation and/or other materials provided with the distribution;
1510152Satgutier@umich.edu * neither the name of the copyright holders nor the names of its
1610152Satgutier@umich.edu * contributors may be used to endorse or promote products derived from
1710152Satgutier@umich.edu * this software without specific prior written permission.
1810152Satgutier@umich.edu
1910152Satgutier@umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2010152Satgutier@umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2110152Satgutier@umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2210152Satgutier@umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2310152Satgutier@umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2410152Satgutier@umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2510152Satgutier@umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2610152Satgutier@umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2710152Satgutier@umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2810152Satgutier@umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2910234Syasuko.eckert@amd.com * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3010152Satgutier@umich.edu *
3110152Satgutier@umich.edu ***************************************************************************/
3210152Satgutier@umich.edu
3310152Satgutier@umich.edu
3410152Satgutier@umich.edu
3510152Satgutier@umich.edu#include <cassert>
3610152Satgutier@umich.edu
3710152Satgutier@umich.edu#include "Ucache.h"
3810152Satgutier@umich.edu#include "nuca.h"
3910152Satgutier@umich.edu
4010234Syasuko.eckert@amd.comunsigned int MIN_BANKSIZE = 65536;
4110152Satgutier@umich.edu#define FIXED_OVERHEAD 55e-12 /* clock skew and jitter in s. Ref: Hrishikesh et al ISCA 01 */
4210152Satgutier@umich.edu#define LATCH_DELAY 28e-12 /* latch delay in s (later should use FO4 TODO) */
4310152Satgutier@umich.edu#define CONTR_2_BANK_LAT 0
4410152Satgutier@umich.edu
4510152Satgutier@umich.eduint cont_stats[2 /*l2 or l3*/][5/* cores */][ROUTER_TYPES][7 /*banks*/][8 /* cycle time */];
4610152Satgutier@umich.edu
4710234Syasuko.eckert@amd.comNuca::Nuca(
4810234Syasuko.eckert@amd.com    TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
4910234Syasuko.eckert@amd.com): deviceType(dt) {
5010234Syasuko.eckert@amd.com    init_cont();
5110152Satgutier@umich.edu}
5210152Satgutier@umich.edu
5310152Satgutier@umich.eduvoid
5410234Syasuko.eckert@amd.comNuca::init_cont() {
5510234Syasuko.eckert@amd.com    FILE *cont;
5610234Syasuko.eckert@amd.com    char line[5000];
5710234Syasuko.eckert@amd.com    char jk[5000];
5810234Syasuko.eckert@amd.com    cont = fopen("contention.dat", "r");
5910234Syasuko.eckert@amd.com    if (!cont) {
6010234Syasuko.eckert@amd.com        cout << "contention.dat file is missing!\n";
6110234Syasuko.eckert@amd.com        exit(0);
6210234Syasuko.eckert@amd.com    }
6310152Satgutier@umich.edu
6410234Syasuko.eckert@amd.com    for (int i = 0; i < 2; i++) {
6510234Syasuko.eckert@amd.com        for (int j = 2; j < 5; j++) {
6610234Syasuko.eckert@amd.com            for (int k = 0; k < ROUTER_TYPES; k++) {
6710234Syasuko.eckert@amd.com                for (int l = 0; l < 7; l++) {
6810234Syasuko.eckert@amd.com                    int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/];
6910234Syasuko.eckert@amd.com                    assert(fscanf(cont, "%[^\n]\n", line) != EOF);
7010234Syasuko.eckert@amd.com                    sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d", jk,
7110234Syasuko.eckert@amd.com                           &temp[0], &temp[1], &temp[2], &temp[3],
7210234Syasuko.eckert@amd.com                           &temp[4], &temp[5], &temp[6], &temp[7]);
7310234Syasuko.eckert@amd.com                }
7410234Syasuko.eckert@amd.com            }
7510152Satgutier@umich.edu        }
7610152Satgutier@umich.edu    }
7710234Syasuko.eckert@amd.com    fclose(cont);
7810152Satgutier@umich.edu}
7910152Satgutier@umich.edu
8010234Syasuko.eckert@amd.comvoid
8110234Syasuko.eckert@amd.comNuca::print_cont_stats() {
8210234Syasuko.eckert@amd.com    for (int i = 0; i < 2; i++) {
8310234Syasuko.eckert@amd.com        for (int j = 2; j < 5; j++) {
8410234Syasuko.eckert@amd.com            for (int k = 0; k < ROUTER_TYPES; k++) {
8510234Syasuko.eckert@amd.com                for (int l = 0; l < 7; l++) {
8610234Syasuko.eckert@amd.com                    for (int m = 0; l < 7; l++) {
8710234Syasuko.eckert@amd.com                        cout << cont_stats[i][j][k][l][m] << " ";
8810234Syasuko.eckert@amd.com                    }
8910234Syasuko.eckert@amd.com                    cout << endl;
9010234Syasuko.eckert@amd.com                }
9110234Syasuko.eckert@amd.com            }
9210152Satgutier@umich.edu        }
9310152Satgutier@umich.edu    }
9410234Syasuko.eckert@amd.com    cout << endl;
9510152Satgutier@umich.edu}
9610152Satgutier@umich.edu
9710234Syasuko.eckert@amd.comNuca::~Nuca() {
9810234Syasuko.eckert@amd.com    for (int i = wt_min; i <= wt_max; i++) {
9910234Syasuko.eckert@amd.com        delete wire_vertical[i];
10010234Syasuko.eckert@amd.com        delete wire_horizontal[i];
10110234Syasuko.eckert@amd.com    }
10210152Satgutier@umich.edu}
10310152Satgutier@umich.edu
10410152Satgutier@umich.edu/* converts latency (in s) to cycles depending upon the FREQUENCY (in GHz) */
10510234Syasuko.eckert@amd.comint
10610234Syasuko.eckert@amd.comNuca::calc_cycles(double lat, double oper_freq) {
10710234Syasuko.eckert@amd.com    //TODO: convert latch delay to FO4 */
10810234Syasuko.eckert@amd.com    double cycle_time = (1.0 / (oper_freq * 1e9)); /*s*/
10910234Syasuko.eckert@amd.com    cycle_time -= LATCH_DELAY;
11010234Syasuko.eckert@amd.com    cycle_time -= FIXED_OVERHEAD;
11110152Satgutier@umich.edu
11210234Syasuko.eckert@amd.com    return (int)ceil(lat / cycle_time);
11310152Satgutier@umich.edu}
11410152Satgutier@umich.edu
11510152Satgutier@umich.edu
11610152Satgutier@umich.edunuca_org_t::~nuca_org_t() {
11710234Syasuko.eckert@amd.com    // if(h_wire) delete h_wire;
11810234Syasuko.eckert@amd.com    // if(v_wire) delete v_wire;
11910234Syasuko.eckert@amd.com    // if(router) delete router;
12010152Satgutier@umich.edu}
12110152Satgutier@umich.edu
12210152Satgutier@umich.edu/*
12310152Satgutier@umich.edu * Version - 6.0
12410152Satgutier@umich.edu *
12510152Satgutier@umich.edu * Perform exhaustive search across different bank organizatons,
12610152Satgutier@umich.edu * router configurations, grid organizations, and wire models and
12710152Satgutier@umich.edu * find an optimal NUCA organization
12810152Satgutier@umich.edu * For different bank count values
12910152Satgutier@umich.edu * 1. Optimal bank organization is calculated
13010152Satgutier@umich.edu * 2. For each bank organization, find different NUCA organizations
13110152Satgutier@umich.edu *    using various router configurations, grid organizations,
13210152Satgutier@umich.edu *    and wire models.
13310152Satgutier@umich.edu * 3. NUCA model with the least cost is picked for
13410152Satgutier@umich.edu *    this particular bank count
13510152Satgutier@umich.edu * Finally include contention statistics and find the optimal
13610152Satgutier@umich.edu *    NUCA configuration
13710152Satgutier@umich.edu */
13810234Syasuko.eckert@amd.comvoid
13910234Syasuko.eckert@amd.comNuca::sim_nuca() {
14010234Syasuko.eckert@amd.com    /* temp variables */
14110234Syasuko.eckert@amd.com    int it, ro, wr;
14210234Syasuko.eckert@amd.com    int num_cyc;
14310234Syasuko.eckert@amd.com    unsigned int i, j, k;
14410234Syasuko.eckert@amd.com    unsigned int r, c;
14510234Syasuko.eckert@amd.com    int l2_c;
14610234Syasuko.eckert@amd.com    int bank_count = 0;
14710234Syasuko.eckert@amd.com    uca_org_t ures;
14810234Syasuko.eckert@amd.com    nuca_org_t *opt_n;
14910234Syasuko.eckert@amd.com    mem_array tag, data;
15010234Syasuko.eckert@amd.com    list<nuca_org_t *> nuca_list;
15110234Syasuko.eckert@amd.com    Router *router_s[ROUTER_TYPES];
15210234Syasuko.eckert@amd.com    router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global));
15310234Syasuko.eckert@amd.com    router_s[0]->print_router();
15410234Syasuko.eckert@amd.com    router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global));
15510234Syasuko.eckert@amd.com    router_s[1]->print_router();
15610234Syasuko.eckert@amd.com    router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global));
15710234Syasuko.eckert@amd.com    router_s[2]->print_router();
15810152Satgutier@umich.edu
15910234Syasuko.eckert@amd.com    int core_in; // to store no. of cores
16010152Satgutier@umich.edu
16110234Syasuko.eckert@amd.com    /* to search diff grid organizations */
16210234Syasuko.eckert@amd.com    double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat,
16310234Syasuko.eckert@amd.com    curr_acclat;
16410234Syasuko.eckert@amd.com    double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power,
16510234Syasuko.eckert@amd.com    avg_leakage_power;
16610152Satgutier@umich.edu
16710234Syasuko.eckert@amd.com    double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF;
16810234Syasuko.eckert@amd.com    int opt_rows = 0;
16910234Syasuko.eckert@amd.com    int opt_columns = 0;
17010234Syasuko.eckert@amd.com    double opt_totno_hops = 0;
17110234Syasuko.eckert@amd.com    double opt_avg_hop = 0;
17210234Syasuko.eckert@amd.com    double opt_dyn_power = 0, opt_leakage_power = 0;
17310234Syasuko.eckert@amd.com    min_values_t minval;
17410152Satgutier@umich.edu
17510234Syasuko.eckert@amd.com    int bank_start = 0;
17610152Satgutier@umich.edu
17710234Syasuko.eckert@amd.com    int flit_width = 0;
17810152Satgutier@umich.edu
17910234Syasuko.eckert@amd.com    /* vertical and horizontal hop latency values */
18010234Syasuko.eckert@amd.com    int ver_hop_lat, hor_hop_lat; /* in cycles */
18110152Satgutier@umich.edu
18210152Satgutier@umich.edu
18310234Syasuko.eckert@amd.com    /* no. of different bank sizes to consider */
18410234Syasuko.eckert@amd.com    int iterations;
18510152Satgutier@umich.edu
18610152Satgutier@umich.edu
18710234Syasuko.eckert@amd.com    g_ip->nuca_cache_sz = g_ip->cache_sz;
18810234Syasuko.eckert@amd.com    nuca_list.push_back(new nuca_org_t());
18910152Satgutier@umich.edu
19010234Syasuko.eckert@amd.com    if (g_ip->cache_level == 0) l2_c = 1;
19110234Syasuko.eckert@amd.com    else l2_c = 0;
19210152Satgutier@umich.edu
19310234Syasuko.eckert@amd.com    if (g_ip->cores <= 4) core_in = 2;
19410234Syasuko.eckert@amd.com    else if (g_ip->cores <= 8) core_in = 3;
19510234Syasuko.eckert@amd.com    else if (g_ip->cores <= 16) core_in = 4;
19610234Syasuko.eckert@amd.com    else {
19710234Syasuko.eckert@amd.com        cout << "Number of cores should be <= 16!\n";
19810234Syasuko.eckert@amd.com        exit(0);
19910234Syasuko.eckert@amd.com    }
20010152Satgutier@umich.edu
20110152Satgutier@umich.edu
20210234Syasuko.eckert@amd.com    // set the lower bound to an appropriate value. this depends on cache associativity
20310234Syasuko.eckert@amd.com    if (g_ip->assoc > 2) {
20410234Syasuko.eckert@amd.com        i = 2;
20510234Syasuko.eckert@amd.com        while (i != g_ip->assoc) {
20610234Syasuko.eckert@amd.com            MIN_BANKSIZE *= 2;
20710234Syasuko.eckert@amd.com            i *= 2;
20810234Syasuko.eckert@amd.com        }
20910152Satgutier@umich.edu    }
21010152Satgutier@umich.edu
21110234Syasuko.eckert@amd.com    iterations = (int)logtwo((int)g_ip->cache_sz / MIN_BANKSIZE);
21210152Satgutier@umich.edu
21310234Syasuko.eckert@amd.com    if (g_ip->force_wiretype) {
21410234Syasuko.eckert@amd.com        if (g_ip->wt == Low_swing) {
21510234Syasuko.eckert@amd.com            wt_min = Low_swing;
21610234Syasuko.eckert@amd.com            wt_max = Low_swing;
21710234Syasuko.eckert@amd.com        } else {
21810234Syasuko.eckert@amd.com            wt_min = Global;
21910234Syasuko.eckert@amd.com            wt_max = Low_swing - 1;
22010234Syasuko.eckert@amd.com        }
22110234Syasuko.eckert@amd.com    } else {
22210234Syasuko.eckert@amd.com        wt_min = Global;
22310234Syasuko.eckert@amd.com        wt_max = Low_swing;
22410152Satgutier@umich.edu    }
22510234Syasuko.eckert@amd.com    if (g_ip->nuca_bank_count != 0) { // simulate just one bank
22610234Syasuko.eckert@amd.com        if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 &&
22710234Syasuko.eckert@amd.com                g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 &&
22810234Syasuko.eckert@amd.com                g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) {
22910234Syasuko.eckert@amd.com            fprintf(stderr, "Incorrect bank count value! Please fix the ",
23010234Syasuko.eckert@amd.com                    "value in cache.cfg\n");
23110234Syasuko.eckert@amd.com        }
23210234Syasuko.eckert@amd.com        bank_start = (int)logtwo((double)g_ip->nuca_bank_count);
23310234Syasuko.eckert@amd.com        iterations = bank_start + 1;
23410234Syasuko.eckert@amd.com        g_ip->cache_sz = g_ip->cache_sz / g_ip->nuca_bank_count;
23510152Satgutier@umich.edu    }
23610234Syasuko.eckert@amd.com    cout << "Simulating various NUCA configurations\n";
23710234Syasuko.eckert@amd.com    for (it = bank_start; it < iterations; it++) {
23810234Syasuko.eckert@amd.com        /* different bank count values */
23910234Syasuko.eckert@amd.com        ures.tag_array2 = &tag;
24010234Syasuko.eckert@amd.com        ures.data_array2 = &data;
24110234Syasuko.eckert@amd.com        /*
24210234Syasuko.eckert@amd.com         * find the optimal bank organization
24310234Syasuko.eckert@amd.com         */
24410234Syasuko.eckert@amd.com        solve(&ures);
24510234Syasuko.eckert@amd.com//    output_UCA(&ures);
24610234Syasuko.eckert@amd.com        bank_count = g_ip->nuca_cache_sz / g_ip->cache_sz;
24710234Syasuko.eckert@amd.com        cout << "====" <<  g_ip->cache_sz << "\n";
24810234Syasuko.eckert@amd.com
24910234Syasuko.eckert@amd.com        for (wr = wt_min; wr <= wt_max; wr++) {
25010234Syasuko.eckert@amd.com
25110234Syasuko.eckert@amd.com            for (ro = 0; ro < ROUTER_TYPES; ro++) {
25210234Syasuko.eckert@amd.com                flit_width = (int) router_s[ro]->flit_size; //initialize router
25310234Syasuko.eckert@amd.com                nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time;
25410234Syasuko.eckert@amd.com
25510234Syasuko.eckert@amd.com                /* calculate router and wire parameters */
25610234Syasuko.eckert@amd.com
25710234Syasuko.eckert@amd.com                double vlength = ures.cache_ht; /* length of the wire (u)*/
25810234Syasuko.eckert@amd.com                double hlength = ures.cache_len; // u
25910234Syasuko.eckert@amd.com
26010234Syasuko.eckert@amd.com                /* find delay, area, and power for wires */
26110234Syasuko.eckert@amd.com                wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength);
26210234Syasuko.eckert@amd.com                wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength);
26310234Syasuko.eckert@amd.com
26410234Syasuko.eckert@amd.com
26510234Syasuko.eckert@amd.com                hor_hop_lat =
26610234Syasuko.eckert@amd.com                    calc_cycles(wire_horizontal[wr]->delay,
26710234Syasuko.eckert@amd.com                                1 /(nuca_list.back()->nuca_pda.cycle_time *
26810234Syasuko.eckert@amd.com                                    .001));
26910234Syasuko.eckert@amd.com                ver_hop_lat =
27010234Syasuko.eckert@amd.com                    calc_cycles(wire_vertical[wr]->delay,
27110234Syasuko.eckert@amd.com                                1 / (nuca_list.back()->nuca_pda.cycle_time *
27210234Syasuko.eckert@amd.com                                     .001));
27310234Syasuko.eckert@amd.com
27410234Syasuko.eckert@amd.com                /*
27510234Syasuko.eckert@amd.com                 * assume a grid like topology and explore for optimal network
27610234Syasuko.eckert@amd.com                 * configuration using different row and column count values.
27710234Syasuko.eckert@amd.com                 */
27810234Syasuko.eckert@amd.com                for (c = 1; c <= (unsigned int)bank_count; c++) {
27910234Syasuko.eckert@amd.com                    while (bank_count % c != 0) c++;
28010234Syasuko.eckert@amd.com                    r = bank_count / c;
28110234Syasuko.eckert@amd.com
28210234Syasuko.eckert@amd.com                    /*
28310234Syasuko.eckert@amd.com                     * to find the avg access latency of a NUCA cache, uncontended
28410234Syasuko.eckert@amd.com                     * access time to each bank from the
28510234Syasuko.eckert@amd.com                     * cache controller is calculated.
28610234Syasuko.eckert@amd.com                     * avg latency =
28710234Syasuko.eckert@amd.com                     * sum of the access latencies to individual banks)/bank
28810234Syasuko.eckert@amd.com                     * count value.
28910234Syasuko.eckert@amd.com                     */
29010234Syasuko.eckert@amd.com                    totno_hops = totno_hhops = totno_vhops = tot_lat = 0;
29110234Syasuko.eckert@amd.com                    k = 1;
29210234Syasuko.eckert@amd.com                    for (i = 0; i < r; i++) {
29310234Syasuko.eckert@amd.com                        for (j = 0; j < c; j++) {
29410234Syasuko.eckert@amd.com                            /*
29510234Syasuko.eckert@amd.com                             * vertical hops including the
29610234Syasuko.eckert@amd.com                             * first hop from the cache controller
29710234Syasuko.eckert@amd.com                             */
29810234Syasuko.eckert@amd.com                            curr_hop = i + 1;
29910234Syasuko.eckert@amd.com                            curr_hop += j; /* horizontal hops */
30010234Syasuko.eckert@amd.com                            totno_hhops += j;
30110234Syasuko.eckert@amd.com                            totno_vhops += (i + 1);
30210234Syasuko.eckert@amd.com                            curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT +
30310234Syasuko.eckert@amd.com                                           j * hor_hop_lat);
30410234Syasuko.eckert@amd.com
30510234Syasuko.eckert@amd.com                            tot_lat += curr_acclat;
30610234Syasuko.eckert@amd.com                            totno_hops += curr_hop;
30710234Syasuko.eckert@amd.com                        }
30810234Syasuko.eckert@amd.com                    }
30910234Syasuko.eckert@amd.com                    avg_lat = tot_lat / bank_count;
31010234Syasuko.eckert@amd.com                    avg_hop = totno_hops / bank_count;
31110234Syasuko.eckert@amd.com                    avg_hhop = totno_hhops / bank_count;
31210234Syasuko.eckert@amd.com                    avg_vhop = totno_vhops / bank_count;
31310234Syasuko.eckert@amd.com
31410234Syasuko.eckert@amd.com                    /* net access latency */
31510234Syasuko.eckert@amd.com                    curr_acclat = 2 * avg_lat + 2 * (router_s[ro]->delay *
31610234Syasuko.eckert@amd.com                                                     avg_hop) +
31710234Syasuko.eckert@amd.com                        calc_cycles(ures.access_time,
31810234Syasuko.eckert@amd.com                                    1 /
31910234Syasuko.eckert@amd.com                                    (nuca_list.back()->nuca_pda.cycle_time *
32010234Syasuko.eckert@amd.com                                     .001));
32110234Syasuko.eckert@amd.com
32210234Syasuko.eckert@amd.com                    /* avg access lat of nuca */
32310234Syasuko.eckert@amd.com                    avg_dyn_power =
32410234Syasuko.eckert@amd.com                        avg_hop *
32510234Syasuko.eckert@amd.com                        (router_s[ro]->power.readOp.dynamic) + avg_hhop *
32610234Syasuko.eckert@amd.com                        (wire_horizontal[wr]->power.readOp.dynamic) *
32710234Syasuko.eckert@amd.com                        (g_ip->block_sz * 8 + 64) + avg_vhop *
32810234Syasuko.eckert@amd.com                        (wire_vertical[wr]->power.readOp.dynamic) *
32910234Syasuko.eckert@amd.com                        (g_ip->block_sz * 8 + 64) + ures.power.readOp.dynamic;
33010234Syasuko.eckert@amd.com
33110234Syasuko.eckert@amd.com                    avg_leakage_power =
33210234Syasuko.eckert@amd.com                        bank_count * router_s[ro]->power.readOp.leakage +
33310234Syasuko.eckert@amd.com                        avg_hhop * (wire_horizontal[wr]->power.readOp.leakage *
33410234Syasuko.eckert@amd.com                                    wire_horizontal[wr]->delay) * flit_width +
33510234Syasuko.eckert@amd.com                        avg_vhop * (wire_vertical[wr]->power.readOp.leakage *
33610234Syasuko.eckert@amd.com                                    wire_horizontal[wr]->delay);
33710234Syasuko.eckert@amd.com
33810234Syasuko.eckert@amd.com                    if (curr_acclat < opt_acclat) {
33910234Syasuko.eckert@amd.com                        opt_acclat = curr_acclat;
34010234Syasuko.eckert@amd.com                        opt_tot_lat = tot_lat;
34110234Syasuko.eckert@amd.com                        opt_avg_lat = avg_lat;
34210234Syasuko.eckert@amd.com                        opt_totno_hops = totno_hops;
34310234Syasuko.eckert@amd.com                        opt_avg_hop = avg_hop;
34410234Syasuko.eckert@amd.com                        opt_rows = r;
34510234Syasuko.eckert@amd.com                        opt_columns = c;
34610234Syasuko.eckert@amd.com                        opt_dyn_power = avg_dyn_power;
34710234Syasuko.eckert@amd.com                        opt_leakage_power = avg_leakage_power;
34810234Syasuko.eckert@amd.com                    }
34910234Syasuko.eckert@amd.com                    totno_hops = 0;
35010234Syasuko.eckert@amd.com                    tot_lat = 0;
35110234Syasuko.eckert@amd.com                    totno_hhops = 0;
35210234Syasuko.eckert@amd.com                    totno_vhops = 0;
35310234Syasuko.eckert@amd.com                }
35410234Syasuko.eckert@amd.com                nuca_list.back()->wire_pda.power.readOp.dynamic =
35510234Syasuko.eckert@amd.com                    opt_avg_hop * flit_width *
35610234Syasuko.eckert@amd.com                    (wire_horizontal[wr]->power.readOp.dynamic +
35710234Syasuko.eckert@amd.com                     wire_vertical[wr]->power.readOp.dynamic);
35810234Syasuko.eckert@amd.com                nuca_list.back()->avg_hops = opt_avg_hop;
35910234Syasuko.eckert@amd.com                /* network delay/power */
36010234Syasuko.eckert@amd.com                nuca_list.back()->h_wire = wire_horizontal[wr];
36110234Syasuko.eckert@amd.com                nuca_list.back()->v_wire = wire_vertical[wr];
36210234Syasuko.eckert@amd.com                nuca_list.back()->router = router_s[ro];
36310234Syasuko.eckert@amd.com                /* bank delay/power */
36410234Syasuko.eckert@amd.com
36510234Syasuko.eckert@amd.com                nuca_list.back()->bank_pda.delay = ures.access_time;
36610234Syasuko.eckert@amd.com                nuca_list.back()->bank_pda.power = ures.power;
36710234Syasuko.eckert@amd.com                nuca_list.back()->bank_pda.area.h = ures.cache_ht;
36810234Syasuko.eckert@amd.com                nuca_list.back()->bank_pda.area.w = ures.cache_len;
36910234Syasuko.eckert@amd.com                nuca_list.back()->bank_pda.cycle_time = ures.cycle_time;
37010234Syasuko.eckert@amd.com
37110234Syasuko.eckert@amd.com                num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/,
37210234Syasuko.eckert@amd.com                                      1 /
37310234Syasuko.eckert@amd.com                                      (nuca_list.back()->nuca_pda.cycle_time *
37410234Syasuko.eckert@amd.com                                       .001/*GHz*/));
37510234Syasuko.eckert@amd.com                if (num_cyc % 2 != 0) num_cyc++;
37610234Syasuko.eckert@amd.com                if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles
37710234Syasuko.eckert@amd.com
37810234Syasuko.eckert@amd.com                if (it < 7) {
37910234Syasuko.eckert@amd.com                    nuca_list.back()->nuca_pda.delay = opt_acclat +
38010234Syasuko.eckert@amd.com                                                       cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
38110234Syasuko.eckert@amd.com                    nuca_list.back()->contention =
38210234Syasuko.eckert@amd.com                        cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
38310234Syasuko.eckert@amd.com                } else {
38410234Syasuko.eckert@amd.com                    nuca_list.back()->nuca_pda.delay = opt_acclat +
38510234Syasuko.eckert@amd.com                                                       cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
38610234Syasuko.eckert@amd.com                    nuca_list.back()->contention =
38710234Syasuko.eckert@amd.com                        cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
38810234Syasuko.eckert@amd.com                }
38910234Syasuko.eckert@amd.com                nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power;
39010234Syasuko.eckert@amd.com                nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power;
39110234Syasuko.eckert@amd.com
39210234Syasuko.eckert@amd.com                /* array organization */
39310234Syasuko.eckert@amd.com                nuca_list.back()->bank_count = bank_count;
39410234Syasuko.eckert@amd.com                nuca_list.back()->rows = opt_rows;
39510234Syasuko.eckert@amd.com                nuca_list.back()->columns = opt_columns;
39610234Syasuko.eckert@amd.com                calculate_nuca_area (nuca_list.back());
39710234Syasuko.eckert@amd.com
39810234Syasuko.eckert@amd.com                minval.update_min_values(nuca_list.back());
39910234Syasuko.eckert@amd.com                nuca_list.push_back(new nuca_org_t());
40010234Syasuko.eckert@amd.com                opt_acclat = BIGNUM;
40110234Syasuko.eckert@amd.com
40210234Syasuko.eckert@amd.com            }
40310234Syasuko.eckert@amd.com        }
40410234Syasuko.eckert@amd.com        g_ip->cache_sz /= 2;
40510152Satgutier@umich.edu    }
40610152Satgutier@umich.edu
40710234Syasuko.eckert@amd.com    delete(nuca_list.back());
40810234Syasuko.eckert@amd.com    nuca_list.pop_back();
40910234Syasuko.eckert@amd.com    opt_n = find_optimal_nuca(&nuca_list, &minval);
41010234Syasuko.eckert@amd.com    print_nuca(opt_n);
41110234Syasuko.eckert@amd.com    g_ip->cache_sz = g_ip->nuca_cache_sz / opt_n->bank_count;
41210152Satgutier@umich.edu
41310234Syasuko.eckert@amd.com    list<nuca_org_t *>::iterator niter;
41410234Syasuko.eckert@amd.com    for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) {
41510234Syasuko.eckert@amd.com        delete *niter;
41610234Syasuko.eckert@amd.com    }
41710234Syasuko.eckert@amd.com    nuca_list.clear();
41810152Satgutier@umich.edu
41910234Syasuko.eckert@amd.com    for (int i = 0; i < ROUTER_TYPES; i++) {
42010234Syasuko.eckert@amd.com        delete router_s[i];
42110152Satgutier@umich.edu    }
42210234Syasuko.eckert@amd.com    g_ip->display_ip();
42310234Syasuko.eckert@amd.com    //  g_ip->force_cache_config = true;
42410234Syasuko.eckert@amd.com    //  g_ip->ndwl = 8;
42510234Syasuko.eckert@amd.com    //  g_ip->ndbl = 16;
42610234Syasuko.eckert@amd.com    //  g_ip->nspd = 4;
42710234Syasuko.eckert@amd.com    //  g_ip->ndcm = 1;
42810234Syasuko.eckert@amd.com    //  g_ip->ndsam1 = 8;
42910234Syasuko.eckert@amd.com    //  g_ip->ndsam2 = 32;
43010152Satgutier@umich.edu
43110152Satgutier@umich.edu}
43210152Satgutier@umich.edu
43310152Satgutier@umich.edu
43410234Syasuko.eckert@amd.comvoid
43510234Syasuko.eckert@amd.comNuca::print_nuca (nuca_org_t *fr) {
43610234Syasuko.eckert@amd.com    printf("\n---------- CACTI version 6.5, Non-uniform Cache Access "
43710234Syasuko.eckert@amd.com           "----------\n\n");
43810234Syasuko.eckert@amd.com    printf("Optimal number of banks - %d\n", fr->bank_count);
43910234Syasuko.eckert@amd.com    printf("Grid organization rows x columns - %d x %d\n",
44010234Syasuko.eckert@amd.com           fr->rows, fr->columns);
44110234Syasuko.eckert@amd.com    printf("Network frequency - %g GHz\n",
44210234Syasuko.eckert@amd.com           (1 / fr->nuca_pda.cycle_time)*1e3);
44310234Syasuko.eckert@amd.com    printf("Cache dimension (mm x mm) - %g x %g\n",
44410234Syasuko.eckert@amd.com           fr->nuca_pda.area.h,
44510234Syasuko.eckert@amd.com           fr->nuca_pda.area.w);
44610152Satgutier@umich.edu
44710234Syasuko.eckert@amd.com    fr->router->print_router();
44810152Satgutier@umich.edu
44910234Syasuko.eckert@amd.com    printf("\n\nWire stats:\n");
45010234Syasuko.eckert@amd.com    if (fr->h_wire->wt == Global) {
45110234Syasuko.eckert@amd.com        printf("\tWire type - Full swing global wires with least "
45210234Syasuko.eckert@amd.com               "possible delay\n");
45310234Syasuko.eckert@amd.com    } else if (fr->h_wire->wt == Global_5) {
45410234Syasuko.eckert@amd.com        printf("\tWire type - Full swing global wires with "
45510234Syasuko.eckert@amd.com               "5%% delay penalty\n");
45610234Syasuko.eckert@amd.com    } else if (fr->h_wire->wt == Global_10) {
45710234Syasuko.eckert@amd.com        printf("\tWire type - Full swing global wires with "
45810234Syasuko.eckert@amd.com               "10%% delay penalty\n");
45910234Syasuko.eckert@amd.com    } else if (fr->h_wire->wt == Global_20) {
46010234Syasuko.eckert@amd.com        printf("\tWire type - Full swing global wires with "
46110234Syasuko.eckert@amd.com               "20%% delay penalty\n");
46210234Syasuko.eckert@amd.com    } else if (fr->h_wire->wt == Global_30) {
46310234Syasuko.eckert@amd.com        printf("\tWire type - Full swing global wires with "
46410234Syasuko.eckert@amd.com               "30%% delay penalty\n");
46510234Syasuko.eckert@amd.com    } else if (fr->h_wire->wt == Low_swing) {
46610234Syasuko.eckert@amd.com        printf("\tWire type - Low swing wires\n");
46710234Syasuko.eckert@amd.com    }
46810152Satgutier@umich.edu
46910234Syasuko.eckert@amd.com    printf("\tHorizontal link delay - %g (ns)\n",
47010234Syasuko.eckert@amd.com           fr->h_wire->delay*1e9);
47110234Syasuko.eckert@amd.com    printf("\tVertical link delay - %g (ns)\n",
47210234Syasuko.eckert@amd.com           fr->v_wire->delay*1e9);
47310234Syasuko.eckert@amd.com    printf("\tDelay/length - %g (ns/mm)\n",
47410234Syasuko.eckert@amd.com           fr->h_wire->delay*1e9 / fr->bank_pda.area.w);
47510234Syasuko.eckert@amd.com    printf("\tHorizontal link energy -dynamic/access %g (nJ)\n"
47610234Syasuko.eckert@amd.com           "\t                       -leakage %g (nW)\n\n",
47710234Syasuko.eckert@amd.com           fr->h_wire->power.readOp.dynamic*1e9,
47810234Syasuko.eckert@amd.com           fr->h_wire->power.readOp.leakage*1e9);
47910234Syasuko.eckert@amd.com    printf("\tVertical link energy -dynamic/access %g (nJ)\n"
48010234Syasuko.eckert@amd.com           "\t                     -leakage %g (nW)\n\n",
48110234Syasuko.eckert@amd.com           fr->v_wire->power.readOp.dynamic*1e9,
48210234Syasuko.eckert@amd.com           fr->v_wire->power.readOp.leakage*1e9);
48310234Syasuko.eckert@amd.com    printf("\n\n");
48410234Syasuko.eckert@amd.com    fr->v_wire->print_wire();
48510234Syasuko.eckert@amd.com    printf("\n\nBank stats:\n");
48610152Satgutier@umich.edu}
48710152Satgutier@umich.edu
48810152Satgutier@umich.edu
48910234Syasuko.eckert@amd.comnuca_org_t *
49010234Syasuko.eckert@amd.comNuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval) {
49110234Syasuko.eckert@amd.com    double cost = 0;
49210234Syasuko.eckert@amd.com    double min_cost = BIGNUM;
49310234Syasuko.eckert@amd.com    nuca_org_t *res = NULL;
49410234Syasuko.eckert@amd.com    float d, a, dp, lp, c;
49510234Syasuko.eckert@amd.com    int v;
49610234Syasuko.eckert@amd.com    dp = g_ip->dynamic_power_wt_nuca;
49710234Syasuko.eckert@amd.com    lp = g_ip->leakage_power_wt_nuca;
49810234Syasuko.eckert@amd.com    a = g_ip->area_wt_nuca;
49910234Syasuko.eckert@amd.com    d = g_ip->delay_wt_nuca;
50010234Syasuko.eckert@amd.com    c = g_ip->cycle_time_wt_nuca;
50110152Satgutier@umich.edu
50210234Syasuko.eckert@amd.com    list<nuca_org_t *>::iterator niter;
50310152Satgutier@umich.edu
50410152Satgutier@umich.edu
50510234Syasuko.eckert@amd.com    for (niter = n->begin(); niter != n->end(); niter++) {
50610234Syasuko.eckert@amd.com        fprintf(stderr, "\n-----------------------------"
50710234Syasuko.eckert@amd.com                "---------------\n");
50810152Satgutier@umich.edu
50910152Satgutier@umich.edu
51010234Syasuko.eckert@amd.com        printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t "
51110234Syasuko.eckert@amd.com               "bank_dpower = %g \tleak = %g \tcycle = %g\n",
51210234Syasuko.eckert@amd.com               (*niter)->bank_count,
51310234Syasuko.eckert@amd.com               (*niter)->nuca_pda.delay,
51410234Syasuko.eckert@amd.com               (*niter)->nuca_pda.power.readOp.dynamic,
51510234Syasuko.eckert@amd.com               (*niter)->h_wire->wt,
51610234Syasuko.eckert@amd.com               (*niter)->bank_pda.power.readOp.dynamic,
51710234Syasuko.eckert@amd.com               (*niter)->nuca_pda.power.readOp.leakage,
51810234Syasuko.eckert@amd.com               (*niter)->nuca_pda.cycle_time);
51910152Satgutier@umich.edu
52010152Satgutier@umich.edu
52110234Syasuko.eckert@amd.com        if (g_ip->ed == 1) {
52210234Syasuko.eckert@amd.com            cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
52310234Syasuko.eckert@amd.com                   ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
52410234Syasuko.eckert@amd.com            if (min_cost > cost) {
52510234Syasuko.eckert@amd.com                min_cost = cost;
52610234Syasuko.eckert@amd.com                res = ((*niter));
52710234Syasuko.eckert@amd.com            }
52810234Syasuko.eckert@amd.com        } else if (g_ip->ed == 2) {
52910234Syasuko.eckert@amd.com            cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
53010234Syasuko.eckert@amd.com                   ((*niter)->nuca_pda.delay / minval->min_delay) *
53110234Syasuko.eckert@amd.com                   ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
53210234Syasuko.eckert@amd.com            if (min_cost > cost) {
53310234Syasuko.eckert@amd.com                min_cost = cost;
53410234Syasuko.eckert@amd.com                res = ((*niter));
53510234Syasuko.eckert@amd.com            }
53610234Syasuko.eckert@amd.com        } else {
53710234Syasuko.eckert@amd.com            /*
53810234Syasuko.eckert@amd.com             * check whether the current organization
53910234Syasuko.eckert@amd.com             * meets the input deviation constraints
54010234Syasuko.eckert@amd.com             */
54110234Syasuko.eckert@amd.com            v = check_nuca_org((*niter), minval);
54210234Syasuko.eckert@amd.com            if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
54310234Syasuko.eckert@amd.com
54410234Syasuko.eckert@amd.com            if (v) {
54510234Syasuko.eckert@amd.com                cost = (d  * ((*niter)->nuca_pda.delay / minval->min_delay) +
54610234Syasuko.eckert@amd.com                        c  * ((*niter)->nuca_pda.cycle_time / minval->min_cyc) +
54710234Syasuko.eckert@amd.com                        dp * ((*niter)->nuca_pda.power.readOp.dynamic /
54810234Syasuko.eckert@amd.com                              minval->min_dyn) +
54910234Syasuko.eckert@amd.com                        lp * ((*niter)->nuca_pda.power.readOp.leakage /
55010234Syasuko.eckert@amd.com                              minval->min_leakage) +
55110234Syasuko.eckert@amd.com                        a  * ((*niter)->nuca_pda.area.get_area() /
55210234Syasuko.eckert@amd.com                              minval->min_area));
55310234Syasuko.eckert@amd.com                fprintf(stderr, "cost = %g\n", cost);
55410234Syasuko.eckert@amd.com
55510234Syasuko.eckert@amd.com                if (min_cost > cost) {
55610234Syasuko.eckert@amd.com                    min_cost = cost;
55710234Syasuko.eckert@amd.com                    res = ((*niter));
55810234Syasuko.eckert@amd.com                }
55910234Syasuko.eckert@amd.com            } else {
56010234Syasuko.eckert@amd.com                niter = n->erase(niter);
56110234Syasuko.eckert@amd.com                if (niter != n->begin())
56210234Syasuko.eckert@amd.com                    niter --;
56310234Syasuko.eckert@amd.com            }
56410234Syasuko.eckert@amd.com        }
56510152Satgutier@umich.edu    }
56610234Syasuko.eckert@amd.com    return res;
56710152Satgutier@umich.edu}
56810152Satgutier@umich.edu
56910234Syasuko.eckert@amd.comint
57010234Syasuko.eckert@amd.comNuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) {
57110234Syasuko.eckert@amd.com    if (((n->nuca_pda.delay - minval->min_delay)*100 / minval->min_delay) >
57210234Syasuko.eckert@amd.com        g_ip->delay_dev_nuca) {
57310234Syasuko.eckert@amd.com        return 0;
57410234Syasuko.eckert@amd.com    }
57510234Syasuko.eckert@amd.com    if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn) /
57610234Syasuko.eckert@amd.com         minval->min_dyn)*100 >
57710234Syasuko.eckert@amd.com        g_ip->dynamic_power_dev_nuca) {
57810234Syasuko.eckert@amd.com        return 0;
57910234Syasuko.eckert@amd.com    }
58010234Syasuko.eckert@amd.com    if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage) /
58110234Syasuko.eckert@amd.com         minval->min_leakage)*100 >
58210234Syasuko.eckert@amd.com        g_ip->leakage_power_dev_nuca) {
58310234Syasuko.eckert@amd.com        return 0;
58410234Syasuko.eckert@amd.com    }
58510234Syasuko.eckert@amd.com    if (((n->nuca_pda.cycle_time - minval->min_cyc) / minval->min_cyc)*100 >
58610234Syasuko.eckert@amd.com        g_ip->cycle_time_dev_nuca) {
58710234Syasuko.eckert@amd.com        return 0;
58810234Syasuko.eckert@amd.com    }
58910234Syasuko.eckert@amd.com    if (((n->nuca_pda.area.get_area() - minval->min_area) / minval->min_area) *
59010234Syasuko.eckert@amd.com        100 >
59110234Syasuko.eckert@amd.com        g_ip->area_dev_nuca) {
59210234Syasuko.eckert@amd.com        return 0;
59310234Syasuko.eckert@amd.com    }
59410234Syasuko.eckert@amd.com    return 1;
59510152Satgutier@umich.edu}
59610152Satgutier@umich.edu
59710234Syasuko.eckert@amd.comvoid
59810234Syasuko.eckert@amd.comNuca::calculate_nuca_area (nuca_org_t *nuca) {
59910234Syasuko.eckert@amd.com    nuca->nuca_pda.area.h =
60010234Syasuko.eckert@amd.com        nuca->rows * ((nuca->h_wire->wire_width +
60110234Syasuko.eckert@amd.com                       nuca->h_wire->wire_spacing)
60210234Syasuko.eckert@amd.com                      * nuca->router->flit_size +
60310234Syasuko.eckert@amd.com                      nuca->bank_pda.area.h);
60410152Satgutier@umich.edu
60510234Syasuko.eckert@amd.com    nuca->nuca_pda.area.w =
60610234Syasuko.eckert@amd.com        nuca->columns * ((nuca->v_wire->wire_width +
60710234Syasuko.eckert@amd.com                          nuca->v_wire->wire_spacing)
60810234Syasuko.eckert@amd.com                         * nuca->router->flit_size +
60910234Syasuko.eckert@amd.com                         nuca->bank_pda.area.w);
61010152Satgutier@umich.edu}
61110152Satgutier@umich.edu
612