110152Satgutier@umich.edu/***************************************************************************** 210152Satgutier@umich.edu * McPAT/CACTI 310152Satgutier@umich.edu * SOFTWARE LICENSE AGREEMENT 410152Satgutier@umich.edu * Copyright 2012 Hewlett-Packard Development Company, L.P. 510234Syasuko.eckert@amd.com * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. 610152Satgutier@umich.edu * All Rights Reserved 710152Satgutier@umich.edu * 810152Satgutier@umich.edu * Redistribution and use in source and binary forms, with or without 910152Satgutier@umich.edu * modification, are permitted provided that the following conditions are 1010152Satgutier@umich.edu * met: redistributions of source code must retain the above copyright 1110152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer; 1210152Satgutier@umich.edu * redistributions in binary form must reproduce the above copyright 1310152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer in the 1410152Satgutier@umich.edu * documentation and/or other materials provided with the distribution; 1510152Satgutier@umich.edu * neither the name of the copyright holders nor the names of its 1610152Satgutier@umich.edu * contributors may be used to endorse or promote products derived from 1710152Satgutier@umich.edu * this software without specific prior written permission. 1810152Satgutier@umich.edu 1910152Satgutier@umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2010152Satgutier@umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2110152Satgutier@umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2210152Satgutier@umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2310152Satgutier@umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2410152Satgutier@umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 2510152Satgutier@umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2610152Satgutier@umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2710152Satgutier@umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2810152Satgutier@umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 2910234Syasuko.eckert@amd.com * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3010152Satgutier@umich.edu * 3110152Satgutier@umich.edu ***************************************************************************/ 3210152Satgutier@umich.edu 3310152Satgutier@umich.edu 3410152Satgutier@umich.edu 3510152Satgutier@umich.edu#include <cassert> 3610152Satgutier@umich.edu 3710152Satgutier@umich.edu#include "Ucache.h" 3810152Satgutier@umich.edu#include "nuca.h" 3910152Satgutier@umich.edu 4010234Syasuko.eckert@amd.comunsigned int MIN_BANKSIZE = 65536; 4110152Satgutier@umich.edu#define FIXED_OVERHEAD 55e-12 /* clock skew and jitter in s. Ref: Hrishikesh et al ISCA 01 */ 4210152Satgutier@umich.edu#define LATCH_DELAY 28e-12 /* latch delay in s (later should use FO4 TODO) */ 4310152Satgutier@umich.edu#define CONTR_2_BANK_LAT 0 4410152Satgutier@umich.edu 4510152Satgutier@umich.eduint cont_stats[2 /*l2 or l3*/][5/* cores */][ROUTER_TYPES][7 /*banks*/][8 /* cycle time */]; 4610152Satgutier@umich.edu 4710234Syasuko.eckert@amd.comNuca::Nuca( 4810234Syasuko.eckert@amd.com TechnologyParameter::DeviceType *dt = &(g_tp.peri_global) 4910234Syasuko.eckert@amd.com): deviceType(dt) { 5010234Syasuko.eckert@amd.com init_cont(); 5110152Satgutier@umich.edu} 5210152Satgutier@umich.edu 5310152Satgutier@umich.eduvoid 5410234Syasuko.eckert@amd.comNuca::init_cont() { 5510234Syasuko.eckert@amd.com FILE *cont; 5610234Syasuko.eckert@amd.com char line[5000]; 5710234Syasuko.eckert@amd.com char jk[5000]; 5810234Syasuko.eckert@amd.com cont = fopen("contention.dat", "r"); 5910234Syasuko.eckert@amd.com if (!cont) { 6010234Syasuko.eckert@amd.com cout << "contention.dat file is missing!\n"; 6110234Syasuko.eckert@amd.com exit(0); 6210234Syasuko.eckert@amd.com } 6310152Satgutier@umich.edu 6410234Syasuko.eckert@amd.com for (int i = 0; i < 2; i++) { 6510234Syasuko.eckert@amd.com for (int j = 2; j < 5; j++) { 6610234Syasuko.eckert@amd.com for (int k = 0; k < ROUTER_TYPES; k++) { 6710234Syasuko.eckert@amd.com for (int l = 0; l < 7; l++) { 6810234Syasuko.eckert@amd.com int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/]; 6910234Syasuko.eckert@amd.com assert(fscanf(cont, "%[^\n]\n", line) != EOF); 7010234Syasuko.eckert@amd.com sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d", jk, 7110234Syasuko.eckert@amd.com &temp[0], &temp[1], &temp[2], &temp[3], 7210234Syasuko.eckert@amd.com &temp[4], &temp[5], &temp[6], &temp[7]); 7310234Syasuko.eckert@amd.com } 7410234Syasuko.eckert@amd.com } 7510152Satgutier@umich.edu } 7610152Satgutier@umich.edu } 7710234Syasuko.eckert@amd.com fclose(cont); 7810152Satgutier@umich.edu} 7910152Satgutier@umich.edu 8010234Syasuko.eckert@amd.comvoid 8110234Syasuko.eckert@amd.comNuca::print_cont_stats() { 8210234Syasuko.eckert@amd.com for (int i = 0; i < 2; i++) { 8310234Syasuko.eckert@amd.com for (int j = 2; j < 5; j++) { 8410234Syasuko.eckert@amd.com for (int k = 0; k < ROUTER_TYPES; k++) { 8510234Syasuko.eckert@amd.com for (int l = 0; l < 7; l++) { 8610234Syasuko.eckert@amd.com for (int m = 0; l < 7; l++) { 8710234Syasuko.eckert@amd.com cout << cont_stats[i][j][k][l][m] << " "; 8810234Syasuko.eckert@amd.com } 8910234Syasuko.eckert@amd.com cout << endl; 9010234Syasuko.eckert@amd.com } 9110234Syasuko.eckert@amd.com } 9210152Satgutier@umich.edu } 9310152Satgutier@umich.edu } 9410234Syasuko.eckert@amd.com cout << endl; 9510152Satgutier@umich.edu} 9610152Satgutier@umich.edu 9710234Syasuko.eckert@amd.comNuca::~Nuca() { 9810234Syasuko.eckert@amd.com for (int i = wt_min; i <= wt_max; i++) { 9910234Syasuko.eckert@amd.com delete wire_vertical[i]; 10010234Syasuko.eckert@amd.com delete wire_horizontal[i]; 10110234Syasuko.eckert@amd.com } 10210152Satgutier@umich.edu} 10310152Satgutier@umich.edu 10410152Satgutier@umich.edu/* converts latency (in s) to cycles depending upon the FREQUENCY (in GHz) */ 10510234Syasuko.eckert@amd.comint 10610234Syasuko.eckert@amd.comNuca::calc_cycles(double lat, double oper_freq) { 10710234Syasuko.eckert@amd.com //TODO: convert latch delay to FO4 */ 10810234Syasuko.eckert@amd.com double cycle_time = (1.0 / (oper_freq * 1e9)); /*s*/ 10910234Syasuko.eckert@amd.com cycle_time -= LATCH_DELAY; 11010234Syasuko.eckert@amd.com cycle_time -= FIXED_OVERHEAD; 11110152Satgutier@umich.edu 11210234Syasuko.eckert@amd.com return (int)ceil(lat / cycle_time); 11310152Satgutier@umich.edu} 11410152Satgutier@umich.edu 11510152Satgutier@umich.edu 11610152Satgutier@umich.edunuca_org_t::~nuca_org_t() { 11710234Syasuko.eckert@amd.com // if(h_wire) delete h_wire; 11810234Syasuko.eckert@amd.com // if(v_wire) delete v_wire; 11910234Syasuko.eckert@amd.com // if(router) delete router; 12010152Satgutier@umich.edu} 12110152Satgutier@umich.edu 12210152Satgutier@umich.edu/* 12310152Satgutier@umich.edu * Version - 6.0 12410152Satgutier@umich.edu * 12510152Satgutier@umich.edu * Perform exhaustive search across different bank organizatons, 12610152Satgutier@umich.edu * router configurations, grid organizations, and wire models and 12710152Satgutier@umich.edu * find an optimal NUCA organization 12810152Satgutier@umich.edu * For different bank count values 12910152Satgutier@umich.edu * 1. Optimal bank organization is calculated 13010152Satgutier@umich.edu * 2. For each bank organization, find different NUCA organizations 13110152Satgutier@umich.edu * using various router configurations, grid organizations, 13210152Satgutier@umich.edu * and wire models. 13310152Satgutier@umich.edu * 3. NUCA model with the least cost is picked for 13410152Satgutier@umich.edu * this particular bank count 13510152Satgutier@umich.edu * Finally include contention statistics and find the optimal 13610152Satgutier@umich.edu * NUCA configuration 13710152Satgutier@umich.edu */ 13810234Syasuko.eckert@amd.comvoid 13910234Syasuko.eckert@amd.comNuca::sim_nuca() { 14010234Syasuko.eckert@amd.com /* temp variables */ 14110234Syasuko.eckert@amd.com int it, ro, wr; 14210234Syasuko.eckert@amd.com int num_cyc; 14310234Syasuko.eckert@amd.com unsigned int i, j, k; 14410234Syasuko.eckert@amd.com unsigned int r, c; 14510234Syasuko.eckert@amd.com int l2_c; 14610234Syasuko.eckert@amd.com int bank_count = 0; 14710234Syasuko.eckert@amd.com uca_org_t ures; 14810234Syasuko.eckert@amd.com nuca_org_t *opt_n; 14910234Syasuko.eckert@amd.com mem_array tag, data; 15010234Syasuko.eckert@amd.com list<nuca_org_t *> nuca_list; 15110234Syasuko.eckert@amd.com Router *router_s[ROUTER_TYPES]; 15210234Syasuko.eckert@amd.com router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global)); 15310234Syasuko.eckert@amd.com router_s[0]->print_router(); 15410234Syasuko.eckert@amd.com router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global)); 15510234Syasuko.eckert@amd.com router_s[1]->print_router(); 15610234Syasuko.eckert@amd.com router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global)); 15710234Syasuko.eckert@amd.com router_s[2]->print_router(); 15810152Satgutier@umich.edu 15910234Syasuko.eckert@amd.com int core_in; // to store no. of cores 16010152Satgutier@umich.edu 16110234Syasuko.eckert@amd.com /* to search diff grid organizations */ 16210234Syasuko.eckert@amd.com double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat, 16310234Syasuko.eckert@amd.com curr_acclat; 16410234Syasuko.eckert@amd.com double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power, 16510234Syasuko.eckert@amd.com avg_leakage_power; 16610152Satgutier@umich.edu 16710234Syasuko.eckert@amd.com double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF; 16810234Syasuko.eckert@amd.com int opt_rows = 0; 16910234Syasuko.eckert@amd.com int opt_columns = 0; 17010234Syasuko.eckert@amd.com double opt_totno_hops = 0; 17110234Syasuko.eckert@amd.com double opt_avg_hop = 0; 17210234Syasuko.eckert@amd.com double opt_dyn_power = 0, opt_leakage_power = 0; 17310234Syasuko.eckert@amd.com min_values_t minval; 17410152Satgutier@umich.edu 17510234Syasuko.eckert@amd.com int bank_start = 0; 17610152Satgutier@umich.edu 17710234Syasuko.eckert@amd.com int flit_width = 0; 17810152Satgutier@umich.edu 17910234Syasuko.eckert@amd.com /* vertical and horizontal hop latency values */ 18010234Syasuko.eckert@amd.com int ver_hop_lat, hor_hop_lat; /* in cycles */ 18110152Satgutier@umich.edu 18210152Satgutier@umich.edu 18310234Syasuko.eckert@amd.com /* no. of different bank sizes to consider */ 18410234Syasuko.eckert@amd.com int iterations; 18510152Satgutier@umich.edu 18610152Satgutier@umich.edu 18710234Syasuko.eckert@amd.com g_ip->nuca_cache_sz = g_ip->cache_sz; 18810234Syasuko.eckert@amd.com nuca_list.push_back(new nuca_org_t()); 18910152Satgutier@umich.edu 19010234Syasuko.eckert@amd.com if (g_ip->cache_level == 0) l2_c = 1; 19110234Syasuko.eckert@amd.com else l2_c = 0; 19210152Satgutier@umich.edu 19310234Syasuko.eckert@amd.com if (g_ip->cores <= 4) core_in = 2; 19410234Syasuko.eckert@amd.com else if (g_ip->cores <= 8) core_in = 3; 19510234Syasuko.eckert@amd.com else if (g_ip->cores <= 16) core_in = 4; 19610234Syasuko.eckert@amd.com else { 19710234Syasuko.eckert@amd.com cout << "Number of cores should be <= 16!\n"; 19810234Syasuko.eckert@amd.com exit(0); 19910234Syasuko.eckert@amd.com } 20010152Satgutier@umich.edu 20110152Satgutier@umich.edu 20210234Syasuko.eckert@amd.com // set the lower bound to an appropriate value. this depends on cache associativity 20310234Syasuko.eckert@amd.com if (g_ip->assoc > 2) { 20410234Syasuko.eckert@amd.com i = 2; 20510234Syasuko.eckert@amd.com while (i != g_ip->assoc) { 20610234Syasuko.eckert@amd.com MIN_BANKSIZE *= 2; 20710234Syasuko.eckert@amd.com i *= 2; 20810234Syasuko.eckert@amd.com } 20910152Satgutier@umich.edu } 21010152Satgutier@umich.edu 21110234Syasuko.eckert@amd.com iterations = (int)logtwo((int)g_ip->cache_sz / MIN_BANKSIZE); 21210152Satgutier@umich.edu 21310234Syasuko.eckert@amd.com if (g_ip->force_wiretype) { 21410234Syasuko.eckert@amd.com if (g_ip->wt == Low_swing) { 21510234Syasuko.eckert@amd.com wt_min = Low_swing; 21610234Syasuko.eckert@amd.com wt_max = Low_swing; 21710234Syasuko.eckert@amd.com } else { 21810234Syasuko.eckert@amd.com wt_min = Global; 21910234Syasuko.eckert@amd.com wt_max = Low_swing - 1; 22010234Syasuko.eckert@amd.com } 22110234Syasuko.eckert@amd.com } else { 22210234Syasuko.eckert@amd.com wt_min = Global; 22310234Syasuko.eckert@amd.com wt_max = Low_swing; 22410152Satgutier@umich.edu } 22510234Syasuko.eckert@amd.com if (g_ip->nuca_bank_count != 0) { // simulate just one bank 22610234Syasuko.eckert@amd.com if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 && 22710234Syasuko.eckert@amd.com g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 && 22810234Syasuko.eckert@amd.com g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) { 22910234Syasuko.eckert@amd.com fprintf(stderr, "Incorrect bank count value! Please fix the ", 23010234Syasuko.eckert@amd.com "value in cache.cfg\n"); 23110234Syasuko.eckert@amd.com } 23210234Syasuko.eckert@amd.com bank_start = (int)logtwo((double)g_ip->nuca_bank_count); 23310234Syasuko.eckert@amd.com iterations = bank_start + 1; 23410234Syasuko.eckert@amd.com g_ip->cache_sz = g_ip->cache_sz / g_ip->nuca_bank_count; 23510152Satgutier@umich.edu } 23610234Syasuko.eckert@amd.com cout << "Simulating various NUCA configurations\n"; 23710234Syasuko.eckert@amd.com for (it = bank_start; it < iterations; it++) { 23810234Syasuko.eckert@amd.com /* different bank count values */ 23910234Syasuko.eckert@amd.com ures.tag_array2 = &tag; 24010234Syasuko.eckert@amd.com ures.data_array2 = &data; 24110234Syasuko.eckert@amd.com /* 24210234Syasuko.eckert@amd.com * find the optimal bank organization 24310234Syasuko.eckert@amd.com */ 24410234Syasuko.eckert@amd.com solve(&ures); 24510234Syasuko.eckert@amd.com// output_UCA(&ures); 24610234Syasuko.eckert@amd.com bank_count = g_ip->nuca_cache_sz / g_ip->cache_sz; 24710234Syasuko.eckert@amd.com cout << "====" << g_ip->cache_sz << "\n"; 24810234Syasuko.eckert@amd.com 24910234Syasuko.eckert@amd.com for (wr = wt_min; wr <= wt_max; wr++) { 25010234Syasuko.eckert@amd.com 25110234Syasuko.eckert@amd.com for (ro = 0; ro < ROUTER_TYPES; ro++) { 25210234Syasuko.eckert@amd.com flit_width = (int) router_s[ro]->flit_size; //initialize router 25310234Syasuko.eckert@amd.com nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time; 25410234Syasuko.eckert@amd.com 25510234Syasuko.eckert@amd.com /* calculate router and wire parameters */ 25610234Syasuko.eckert@amd.com 25710234Syasuko.eckert@amd.com double vlength = ures.cache_ht; /* length of the wire (u)*/ 25810234Syasuko.eckert@amd.com double hlength = ures.cache_len; // u 25910234Syasuko.eckert@amd.com 26010234Syasuko.eckert@amd.com /* find delay, area, and power for wires */ 26110234Syasuko.eckert@amd.com wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength); 26210234Syasuko.eckert@amd.com wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength); 26310234Syasuko.eckert@amd.com 26410234Syasuko.eckert@amd.com 26510234Syasuko.eckert@amd.com hor_hop_lat = 26610234Syasuko.eckert@amd.com calc_cycles(wire_horizontal[wr]->delay, 26710234Syasuko.eckert@amd.com 1 /(nuca_list.back()->nuca_pda.cycle_time * 26810234Syasuko.eckert@amd.com .001)); 26910234Syasuko.eckert@amd.com ver_hop_lat = 27010234Syasuko.eckert@amd.com calc_cycles(wire_vertical[wr]->delay, 27110234Syasuko.eckert@amd.com 1 / (nuca_list.back()->nuca_pda.cycle_time * 27210234Syasuko.eckert@amd.com .001)); 27310234Syasuko.eckert@amd.com 27410234Syasuko.eckert@amd.com /* 27510234Syasuko.eckert@amd.com * assume a grid like topology and explore for optimal network 27610234Syasuko.eckert@amd.com * configuration using different row and column count values. 27710234Syasuko.eckert@amd.com */ 27810234Syasuko.eckert@amd.com for (c = 1; c <= (unsigned int)bank_count; c++) { 27910234Syasuko.eckert@amd.com while (bank_count % c != 0) c++; 28010234Syasuko.eckert@amd.com r = bank_count / c; 28110234Syasuko.eckert@amd.com 28210234Syasuko.eckert@amd.com /* 28310234Syasuko.eckert@amd.com * to find the avg access latency of a NUCA cache, uncontended 28410234Syasuko.eckert@amd.com * access time to each bank from the 28510234Syasuko.eckert@amd.com * cache controller is calculated. 28610234Syasuko.eckert@amd.com * avg latency = 28710234Syasuko.eckert@amd.com * sum of the access latencies to individual banks)/bank 28810234Syasuko.eckert@amd.com * count value. 28910234Syasuko.eckert@amd.com */ 29010234Syasuko.eckert@amd.com totno_hops = totno_hhops = totno_vhops = tot_lat = 0; 29110234Syasuko.eckert@amd.com k = 1; 29210234Syasuko.eckert@amd.com for (i = 0; i < r; i++) { 29310234Syasuko.eckert@amd.com for (j = 0; j < c; j++) { 29410234Syasuko.eckert@amd.com /* 29510234Syasuko.eckert@amd.com * vertical hops including the 29610234Syasuko.eckert@amd.com * first hop from the cache controller 29710234Syasuko.eckert@amd.com */ 29810234Syasuko.eckert@amd.com curr_hop = i + 1; 29910234Syasuko.eckert@amd.com curr_hop += j; /* horizontal hops */ 30010234Syasuko.eckert@amd.com totno_hhops += j; 30110234Syasuko.eckert@amd.com totno_vhops += (i + 1); 30210234Syasuko.eckert@amd.com curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT + 30310234Syasuko.eckert@amd.com j * hor_hop_lat); 30410234Syasuko.eckert@amd.com 30510234Syasuko.eckert@amd.com tot_lat += curr_acclat; 30610234Syasuko.eckert@amd.com totno_hops += curr_hop; 30710234Syasuko.eckert@amd.com } 30810234Syasuko.eckert@amd.com } 30910234Syasuko.eckert@amd.com avg_lat = tot_lat / bank_count; 31010234Syasuko.eckert@amd.com avg_hop = totno_hops / bank_count; 31110234Syasuko.eckert@amd.com avg_hhop = totno_hhops / bank_count; 31210234Syasuko.eckert@amd.com avg_vhop = totno_vhops / bank_count; 31310234Syasuko.eckert@amd.com 31410234Syasuko.eckert@amd.com /* net access latency */ 31510234Syasuko.eckert@amd.com curr_acclat = 2 * avg_lat + 2 * (router_s[ro]->delay * 31610234Syasuko.eckert@amd.com avg_hop) + 31710234Syasuko.eckert@amd.com calc_cycles(ures.access_time, 31810234Syasuko.eckert@amd.com 1 / 31910234Syasuko.eckert@amd.com (nuca_list.back()->nuca_pda.cycle_time * 32010234Syasuko.eckert@amd.com .001)); 32110234Syasuko.eckert@amd.com 32210234Syasuko.eckert@amd.com /* avg access lat of nuca */ 32310234Syasuko.eckert@amd.com avg_dyn_power = 32410234Syasuko.eckert@amd.com avg_hop * 32510234Syasuko.eckert@amd.com (router_s[ro]->power.readOp.dynamic) + avg_hhop * 32610234Syasuko.eckert@amd.com (wire_horizontal[wr]->power.readOp.dynamic) * 32710234Syasuko.eckert@amd.com (g_ip->block_sz * 8 + 64) + avg_vhop * 32810234Syasuko.eckert@amd.com (wire_vertical[wr]->power.readOp.dynamic) * 32910234Syasuko.eckert@amd.com (g_ip->block_sz * 8 + 64) + ures.power.readOp.dynamic; 33010234Syasuko.eckert@amd.com 33110234Syasuko.eckert@amd.com avg_leakage_power = 33210234Syasuko.eckert@amd.com bank_count * router_s[ro]->power.readOp.leakage + 33310234Syasuko.eckert@amd.com avg_hhop * (wire_horizontal[wr]->power.readOp.leakage * 33410234Syasuko.eckert@amd.com wire_horizontal[wr]->delay) * flit_width + 33510234Syasuko.eckert@amd.com avg_vhop * (wire_vertical[wr]->power.readOp.leakage * 33610234Syasuko.eckert@amd.com wire_horizontal[wr]->delay); 33710234Syasuko.eckert@amd.com 33810234Syasuko.eckert@amd.com if (curr_acclat < opt_acclat) { 33910234Syasuko.eckert@amd.com opt_acclat = curr_acclat; 34010234Syasuko.eckert@amd.com opt_tot_lat = tot_lat; 34110234Syasuko.eckert@amd.com opt_avg_lat = avg_lat; 34210234Syasuko.eckert@amd.com opt_totno_hops = totno_hops; 34310234Syasuko.eckert@amd.com opt_avg_hop = avg_hop; 34410234Syasuko.eckert@amd.com opt_rows = r; 34510234Syasuko.eckert@amd.com opt_columns = c; 34610234Syasuko.eckert@amd.com opt_dyn_power = avg_dyn_power; 34710234Syasuko.eckert@amd.com opt_leakage_power = avg_leakage_power; 34810234Syasuko.eckert@amd.com } 34910234Syasuko.eckert@amd.com totno_hops = 0; 35010234Syasuko.eckert@amd.com tot_lat = 0; 35110234Syasuko.eckert@amd.com totno_hhops = 0; 35210234Syasuko.eckert@amd.com totno_vhops = 0; 35310234Syasuko.eckert@amd.com } 35410234Syasuko.eckert@amd.com nuca_list.back()->wire_pda.power.readOp.dynamic = 35510234Syasuko.eckert@amd.com opt_avg_hop * flit_width * 35610234Syasuko.eckert@amd.com (wire_horizontal[wr]->power.readOp.dynamic + 35710234Syasuko.eckert@amd.com wire_vertical[wr]->power.readOp.dynamic); 35810234Syasuko.eckert@amd.com nuca_list.back()->avg_hops = opt_avg_hop; 35910234Syasuko.eckert@amd.com /* network delay/power */ 36010234Syasuko.eckert@amd.com nuca_list.back()->h_wire = wire_horizontal[wr]; 36110234Syasuko.eckert@amd.com nuca_list.back()->v_wire = wire_vertical[wr]; 36210234Syasuko.eckert@amd.com nuca_list.back()->router = router_s[ro]; 36310234Syasuko.eckert@amd.com /* bank delay/power */ 36410234Syasuko.eckert@amd.com 36510234Syasuko.eckert@amd.com nuca_list.back()->bank_pda.delay = ures.access_time; 36610234Syasuko.eckert@amd.com nuca_list.back()->bank_pda.power = ures.power; 36710234Syasuko.eckert@amd.com nuca_list.back()->bank_pda.area.h = ures.cache_ht; 36810234Syasuko.eckert@amd.com nuca_list.back()->bank_pda.area.w = ures.cache_len; 36910234Syasuko.eckert@amd.com nuca_list.back()->bank_pda.cycle_time = ures.cycle_time; 37010234Syasuko.eckert@amd.com 37110234Syasuko.eckert@amd.com num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/, 37210234Syasuko.eckert@amd.com 1 / 37310234Syasuko.eckert@amd.com (nuca_list.back()->nuca_pda.cycle_time * 37410234Syasuko.eckert@amd.com .001/*GHz*/)); 37510234Syasuko.eckert@amd.com if (num_cyc % 2 != 0) num_cyc++; 37610234Syasuko.eckert@amd.com if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles 37710234Syasuko.eckert@amd.com 37810234Syasuko.eckert@amd.com if (it < 7) { 37910234Syasuko.eckert@amd.com nuca_list.back()->nuca_pda.delay = opt_acclat + 38010234Syasuko.eckert@amd.com cont_stats[l2_c][core_in][ro][it][num_cyc/2-1]; 38110234Syasuko.eckert@amd.com nuca_list.back()->contention = 38210234Syasuko.eckert@amd.com cont_stats[l2_c][core_in][ro][it][num_cyc/2-1]; 38310234Syasuko.eckert@amd.com } else { 38410234Syasuko.eckert@amd.com nuca_list.back()->nuca_pda.delay = opt_acclat + 38510234Syasuko.eckert@amd.com cont_stats[l2_c][core_in][ro][7][num_cyc/2-1]; 38610234Syasuko.eckert@amd.com nuca_list.back()->contention = 38710234Syasuko.eckert@amd.com cont_stats[l2_c][core_in][ro][7][num_cyc/2-1]; 38810234Syasuko.eckert@amd.com } 38910234Syasuko.eckert@amd.com nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power; 39010234Syasuko.eckert@amd.com nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power; 39110234Syasuko.eckert@amd.com 39210234Syasuko.eckert@amd.com /* array organization */ 39310234Syasuko.eckert@amd.com nuca_list.back()->bank_count = bank_count; 39410234Syasuko.eckert@amd.com nuca_list.back()->rows = opt_rows; 39510234Syasuko.eckert@amd.com nuca_list.back()->columns = opt_columns; 39610234Syasuko.eckert@amd.com calculate_nuca_area (nuca_list.back()); 39710234Syasuko.eckert@amd.com 39810234Syasuko.eckert@amd.com minval.update_min_values(nuca_list.back()); 39910234Syasuko.eckert@amd.com nuca_list.push_back(new nuca_org_t()); 40010234Syasuko.eckert@amd.com opt_acclat = BIGNUM; 40110234Syasuko.eckert@amd.com 40210234Syasuko.eckert@amd.com } 40310234Syasuko.eckert@amd.com } 40410234Syasuko.eckert@amd.com g_ip->cache_sz /= 2; 40510152Satgutier@umich.edu } 40610152Satgutier@umich.edu 40710234Syasuko.eckert@amd.com delete(nuca_list.back()); 40810234Syasuko.eckert@amd.com nuca_list.pop_back(); 40910234Syasuko.eckert@amd.com opt_n = find_optimal_nuca(&nuca_list, &minval); 41010234Syasuko.eckert@amd.com print_nuca(opt_n); 41110234Syasuko.eckert@amd.com g_ip->cache_sz = g_ip->nuca_cache_sz / opt_n->bank_count; 41210152Satgutier@umich.edu 41310234Syasuko.eckert@amd.com list<nuca_org_t *>::iterator niter; 41410234Syasuko.eckert@amd.com for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) { 41510234Syasuko.eckert@amd.com delete *niter; 41610234Syasuko.eckert@amd.com } 41710234Syasuko.eckert@amd.com nuca_list.clear(); 41810152Satgutier@umich.edu 41910234Syasuko.eckert@amd.com for (int i = 0; i < ROUTER_TYPES; i++) { 42010234Syasuko.eckert@amd.com delete router_s[i]; 42110152Satgutier@umich.edu } 42210234Syasuko.eckert@amd.com g_ip->display_ip(); 42310234Syasuko.eckert@amd.com // g_ip->force_cache_config = true; 42410234Syasuko.eckert@amd.com // g_ip->ndwl = 8; 42510234Syasuko.eckert@amd.com // g_ip->ndbl = 16; 42610234Syasuko.eckert@amd.com // g_ip->nspd = 4; 42710234Syasuko.eckert@amd.com // g_ip->ndcm = 1; 42810234Syasuko.eckert@amd.com // g_ip->ndsam1 = 8; 42910234Syasuko.eckert@amd.com // g_ip->ndsam2 = 32; 43010152Satgutier@umich.edu 43110152Satgutier@umich.edu} 43210152Satgutier@umich.edu 43310152Satgutier@umich.edu 43410234Syasuko.eckert@amd.comvoid 43510234Syasuko.eckert@amd.comNuca::print_nuca (nuca_org_t *fr) { 43610234Syasuko.eckert@amd.com printf("\n---------- CACTI version 6.5, Non-uniform Cache Access " 43710234Syasuko.eckert@amd.com "----------\n\n"); 43810234Syasuko.eckert@amd.com printf("Optimal number of banks - %d\n", fr->bank_count); 43910234Syasuko.eckert@amd.com printf("Grid organization rows x columns - %d x %d\n", 44010234Syasuko.eckert@amd.com fr->rows, fr->columns); 44110234Syasuko.eckert@amd.com printf("Network frequency - %g GHz\n", 44210234Syasuko.eckert@amd.com (1 / fr->nuca_pda.cycle_time)*1e3); 44310234Syasuko.eckert@amd.com printf("Cache dimension (mm x mm) - %g x %g\n", 44410234Syasuko.eckert@amd.com fr->nuca_pda.area.h, 44510234Syasuko.eckert@amd.com fr->nuca_pda.area.w); 44610152Satgutier@umich.edu 44710234Syasuko.eckert@amd.com fr->router->print_router(); 44810152Satgutier@umich.edu 44910234Syasuko.eckert@amd.com printf("\n\nWire stats:\n"); 45010234Syasuko.eckert@amd.com if (fr->h_wire->wt == Global) { 45110234Syasuko.eckert@amd.com printf("\tWire type - Full swing global wires with least " 45210234Syasuko.eckert@amd.com "possible delay\n"); 45310234Syasuko.eckert@amd.com } else if (fr->h_wire->wt == Global_5) { 45410234Syasuko.eckert@amd.com printf("\tWire type - Full swing global wires with " 45510234Syasuko.eckert@amd.com "5%% delay penalty\n"); 45610234Syasuko.eckert@amd.com } else if (fr->h_wire->wt == Global_10) { 45710234Syasuko.eckert@amd.com printf("\tWire type - Full swing global wires with " 45810234Syasuko.eckert@amd.com "10%% delay penalty\n"); 45910234Syasuko.eckert@amd.com } else if (fr->h_wire->wt == Global_20) { 46010234Syasuko.eckert@amd.com printf("\tWire type - Full swing global wires with " 46110234Syasuko.eckert@amd.com "20%% delay penalty\n"); 46210234Syasuko.eckert@amd.com } else if (fr->h_wire->wt == Global_30) { 46310234Syasuko.eckert@amd.com printf("\tWire type - Full swing global wires with " 46410234Syasuko.eckert@amd.com "30%% delay penalty\n"); 46510234Syasuko.eckert@amd.com } else if (fr->h_wire->wt == Low_swing) { 46610234Syasuko.eckert@amd.com printf("\tWire type - Low swing wires\n"); 46710234Syasuko.eckert@amd.com } 46810152Satgutier@umich.edu 46910234Syasuko.eckert@amd.com printf("\tHorizontal link delay - %g (ns)\n", 47010234Syasuko.eckert@amd.com fr->h_wire->delay*1e9); 47110234Syasuko.eckert@amd.com printf("\tVertical link delay - %g (ns)\n", 47210234Syasuko.eckert@amd.com fr->v_wire->delay*1e9); 47310234Syasuko.eckert@amd.com printf("\tDelay/length - %g (ns/mm)\n", 47410234Syasuko.eckert@amd.com fr->h_wire->delay*1e9 / fr->bank_pda.area.w); 47510234Syasuko.eckert@amd.com printf("\tHorizontal link energy -dynamic/access %g (nJ)\n" 47610234Syasuko.eckert@amd.com "\t -leakage %g (nW)\n\n", 47710234Syasuko.eckert@amd.com fr->h_wire->power.readOp.dynamic*1e9, 47810234Syasuko.eckert@amd.com fr->h_wire->power.readOp.leakage*1e9); 47910234Syasuko.eckert@amd.com printf("\tVertical link energy -dynamic/access %g (nJ)\n" 48010234Syasuko.eckert@amd.com "\t -leakage %g (nW)\n\n", 48110234Syasuko.eckert@amd.com fr->v_wire->power.readOp.dynamic*1e9, 48210234Syasuko.eckert@amd.com fr->v_wire->power.readOp.leakage*1e9); 48310234Syasuko.eckert@amd.com printf("\n\n"); 48410234Syasuko.eckert@amd.com fr->v_wire->print_wire(); 48510234Syasuko.eckert@amd.com printf("\n\nBank stats:\n"); 48610152Satgutier@umich.edu} 48710152Satgutier@umich.edu 48810152Satgutier@umich.edu 48910234Syasuko.eckert@amd.comnuca_org_t * 49010234Syasuko.eckert@amd.comNuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval) { 49110234Syasuko.eckert@amd.com double cost = 0; 49210234Syasuko.eckert@amd.com double min_cost = BIGNUM; 49310234Syasuko.eckert@amd.com nuca_org_t *res = NULL; 49410234Syasuko.eckert@amd.com float d, a, dp, lp, c; 49510234Syasuko.eckert@amd.com int v; 49610234Syasuko.eckert@amd.com dp = g_ip->dynamic_power_wt_nuca; 49710234Syasuko.eckert@amd.com lp = g_ip->leakage_power_wt_nuca; 49810234Syasuko.eckert@amd.com a = g_ip->area_wt_nuca; 49910234Syasuko.eckert@amd.com d = g_ip->delay_wt_nuca; 50010234Syasuko.eckert@amd.com c = g_ip->cycle_time_wt_nuca; 50110152Satgutier@umich.edu 50210234Syasuko.eckert@amd.com list<nuca_org_t *>::iterator niter; 50310152Satgutier@umich.edu 50410152Satgutier@umich.edu 50510234Syasuko.eckert@amd.com for (niter = n->begin(); niter != n->end(); niter++) { 50610234Syasuko.eckert@amd.com fprintf(stderr, "\n-----------------------------" 50710234Syasuko.eckert@amd.com "---------------\n"); 50810152Satgutier@umich.edu 50910152Satgutier@umich.edu 51010234Syasuko.eckert@amd.com printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t " 51110234Syasuko.eckert@amd.com "bank_dpower = %g \tleak = %g \tcycle = %g\n", 51210234Syasuko.eckert@amd.com (*niter)->bank_count, 51310234Syasuko.eckert@amd.com (*niter)->nuca_pda.delay, 51410234Syasuko.eckert@amd.com (*niter)->nuca_pda.power.readOp.dynamic, 51510234Syasuko.eckert@amd.com (*niter)->h_wire->wt, 51610234Syasuko.eckert@amd.com (*niter)->bank_pda.power.readOp.dynamic, 51710234Syasuko.eckert@amd.com (*niter)->nuca_pda.power.readOp.leakage, 51810234Syasuko.eckert@amd.com (*niter)->nuca_pda.cycle_time); 51910152Satgutier@umich.edu 52010152Satgutier@umich.edu 52110234Syasuko.eckert@amd.com if (g_ip->ed == 1) { 52210234Syasuko.eckert@amd.com cost = ((*niter)->nuca_pda.delay / minval->min_delay) * 52310234Syasuko.eckert@amd.com ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn); 52410234Syasuko.eckert@amd.com if (min_cost > cost) { 52510234Syasuko.eckert@amd.com min_cost = cost; 52610234Syasuko.eckert@amd.com res = ((*niter)); 52710234Syasuko.eckert@amd.com } 52810234Syasuko.eckert@amd.com } else if (g_ip->ed == 2) { 52910234Syasuko.eckert@amd.com cost = ((*niter)->nuca_pda.delay / minval->min_delay) * 53010234Syasuko.eckert@amd.com ((*niter)->nuca_pda.delay / minval->min_delay) * 53110234Syasuko.eckert@amd.com ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn); 53210234Syasuko.eckert@amd.com if (min_cost > cost) { 53310234Syasuko.eckert@amd.com min_cost = cost; 53410234Syasuko.eckert@amd.com res = ((*niter)); 53510234Syasuko.eckert@amd.com } 53610234Syasuko.eckert@amd.com } else { 53710234Syasuko.eckert@amd.com /* 53810234Syasuko.eckert@amd.com * check whether the current organization 53910234Syasuko.eckert@amd.com * meets the input deviation constraints 54010234Syasuko.eckert@amd.com */ 54110234Syasuko.eckert@amd.com v = check_nuca_org((*niter), minval); 54210234Syasuko.eckert@amd.com if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling 54310234Syasuko.eckert@amd.com 54410234Syasuko.eckert@amd.com if (v) { 54510234Syasuko.eckert@amd.com cost = (d * ((*niter)->nuca_pda.delay / minval->min_delay) + 54610234Syasuko.eckert@amd.com c * ((*niter)->nuca_pda.cycle_time / minval->min_cyc) + 54710234Syasuko.eckert@amd.com dp * ((*niter)->nuca_pda.power.readOp.dynamic / 54810234Syasuko.eckert@amd.com minval->min_dyn) + 54910234Syasuko.eckert@amd.com lp * ((*niter)->nuca_pda.power.readOp.leakage / 55010234Syasuko.eckert@amd.com minval->min_leakage) + 55110234Syasuko.eckert@amd.com a * ((*niter)->nuca_pda.area.get_area() / 55210234Syasuko.eckert@amd.com minval->min_area)); 55310234Syasuko.eckert@amd.com fprintf(stderr, "cost = %g\n", cost); 55410234Syasuko.eckert@amd.com 55510234Syasuko.eckert@amd.com if (min_cost > cost) { 55610234Syasuko.eckert@amd.com min_cost = cost; 55710234Syasuko.eckert@amd.com res = ((*niter)); 55810234Syasuko.eckert@amd.com } 55910234Syasuko.eckert@amd.com } else { 56010234Syasuko.eckert@amd.com niter = n->erase(niter); 56110234Syasuko.eckert@amd.com if (niter != n->begin()) 56210234Syasuko.eckert@amd.com niter --; 56310234Syasuko.eckert@amd.com } 56410234Syasuko.eckert@amd.com } 56510152Satgutier@umich.edu } 56610234Syasuko.eckert@amd.com return res; 56710152Satgutier@umich.edu} 56810152Satgutier@umich.edu 56910234Syasuko.eckert@amd.comint 57010234Syasuko.eckert@amd.comNuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) { 57110234Syasuko.eckert@amd.com if (((n->nuca_pda.delay - minval->min_delay)*100 / minval->min_delay) > 57210234Syasuko.eckert@amd.com g_ip->delay_dev_nuca) { 57310234Syasuko.eckert@amd.com return 0; 57410234Syasuko.eckert@amd.com } 57510234Syasuko.eckert@amd.com if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn) / 57610234Syasuko.eckert@amd.com minval->min_dyn)*100 > 57710234Syasuko.eckert@amd.com g_ip->dynamic_power_dev_nuca) { 57810234Syasuko.eckert@amd.com return 0; 57910234Syasuko.eckert@amd.com } 58010234Syasuko.eckert@amd.com if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage) / 58110234Syasuko.eckert@amd.com minval->min_leakage)*100 > 58210234Syasuko.eckert@amd.com g_ip->leakage_power_dev_nuca) { 58310234Syasuko.eckert@amd.com return 0; 58410234Syasuko.eckert@amd.com } 58510234Syasuko.eckert@amd.com if (((n->nuca_pda.cycle_time - minval->min_cyc) / minval->min_cyc)*100 > 58610234Syasuko.eckert@amd.com g_ip->cycle_time_dev_nuca) { 58710234Syasuko.eckert@amd.com return 0; 58810234Syasuko.eckert@amd.com } 58910234Syasuko.eckert@amd.com if (((n->nuca_pda.area.get_area() - minval->min_area) / minval->min_area) * 59010234Syasuko.eckert@amd.com 100 > 59110234Syasuko.eckert@amd.com g_ip->area_dev_nuca) { 59210234Syasuko.eckert@amd.com return 0; 59310234Syasuko.eckert@amd.com } 59410234Syasuko.eckert@amd.com return 1; 59510152Satgutier@umich.edu} 59610152Satgutier@umich.edu 59710234Syasuko.eckert@amd.comvoid 59810234Syasuko.eckert@amd.comNuca::calculate_nuca_area (nuca_org_t *nuca) { 59910234Syasuko.eckert@amd.com nuca->nuca_pda.area.h = 60010234Syasuko.eckert@amd.com nuca->rows * ((nuca->h_wire->wire_width + 60110234Syasuko.eckert@amd.com nuca->h_wire->wire_spacing) 60210234Syasuko.eckert@amd.com * nuca->router->flit_size + 60310234Syasuko.eckert@amd.com nuca->bank_pda.area.h); 60410152Satgutier@umich.edu 60510234Syasuko.eckert@amd.com nuca->nuca_pda.area.w = 60610234Syasuko.eckert@amd.com nuca->columns * ((nuca->v_wire->wire_width + 60710234Syasuko.eckert@amd.com nuca->v_wire->wire_spacing) 60810234Syasuko.eckert@amd.com * nuca->router->flit_size + 60910234Syasuko.eckert@amd.com nuca->bank_pda.area.w); 61010152Satgutier@umich.edu} 61110152Satgutier@umich.edu 612