110152Satgutier@umich.edu/***************************************************************************** 210152Satgutier@umich.edu * McPAT 310152Satgutier@umich.edu * SOFTWARE LICENSE AGREEMENT 410152Satgutier@umich.edu * Copyright 2012 Hewlett-Packard Development Company, L.P. 510234Syasuko.eckert@amd.com * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. 610152Satgutier@umich.edu * All Rights Reserved 710152Satgutier@umich.edu * 810152Satgutier@umich.edu * Redistribution and use in source and binary forms, with or without 910152Satgutier@umich.edu * modification, are permitted provided that the following conditions are 1010152Satgutier@umich.edu * met: redistributions of source code must retain the above copyright 1110152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer; 1210152Satgutier@umich.edu * redistributions in binary form must reproduce the above copyright 1310152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer in the 1410152Satgutier@umich.edu * documentation and/or other materials provided with the distribution; 1510152Satgutier@umich.edu * neither the name of the copyright holders nor the names of its 1610152Satgutier@umich.edu * contributors may be used to endorse or promote products derived from 1710152Satgutier@umich.edu * this software without specific prior written permission. 1810152Satgutier@umich.edu 1910152Satgutier@umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2010152Satgutier@umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2110152Satgutier@umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2210152Satgutier@umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2310152Satgutier@umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2410152Satgutier@umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 2510152Satgutier@umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2610152Satgutier@umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2710152Satgutier@umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2810152Satgutier@umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 2910234Syasuko.eckert@amd.com * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3010152Satgutier@umich.edu * 3110152Satgutier@umich.edu ***************************************************************************/ 3210152Satgutier@umich.edu 3310152Satgutier@umich.edu#include <iostream> 3410234Syasuko.eckert@amd.com#include <math.h> 3510152Satgutier@umich.edu 3610152Satgutier@umich.edu#include "area.h" 3710152Satgutier@umich.edu#include "array.h" 3810234Syasuko.eckert@amd.com#include "common.h" 3910152Satgutier@umich.edu#include "decoder.h" 4010152Satgutier@umich.edu#include "parameter.h" 4110152Satgutier@umich.edu 4210152Satgutier@umich.eduusing namespace std; 4310152Satgutier@umich.edu 4410234Syasuko.eckert@amd.comdouble ArrayST::area_efficiency_threshold = 20.0; 4510234Syasuko.eckert@amd.comint ArrayST::ed = 0; 4610234Syasuko.eckert@amd.com//Fixed number, make sure timing can be satisfied. 4710234Syasuko.eckert@amd.comint ArrayST::delay_wt = 100; 4810234Syasuko.eckert@amd.comint ArrayST::cycle_time_wt = 1000; 4910234Syasuko.eckert@amd.com//Fixed number, This is used to exhaustive search for individual components. 5010234Syasuko.eckert@amd.comint ArrayST::area_wt = 10; 5110234Syasuko.eckert@amd.com//Fixed number, This is used to exhaustive search for individual components. 5210234Syasuko.eckert@amd.comint ArrayST::dynamic_power_wt = 10; 5310234Syasuko.eckert@amd.comint ArrayST::leakage_power_wt = 10; 5410234Syasuko.eckert@amd.com//Fixed number, make sure timing can be satisfied. 5510234Syasuko.eckert@amd.comint ArrayST::delay_dev = 1000000; 5610234Syasuko.eckert@amd.comint ArrayST::cycle_time_dev = 100; 5710234Syasuko.eckert@amd.com//Fixed number, This is used to exhaustive search for individual components. 5810234Syasuko.eckert@amd.comint ArrayST::area_dev = 1000000; 5910234Syasuko.eckert@amd.com//Fixed number, This is used to exhaustive search for individual components. 6010234Syasuko.eckert@amd.comint ArrayST::dynamic_power_dev = 1000000; 6110234Syasuko.eckert@amd.comint ArrayST::leakage_power_dev = 1000000; 6210234Syasuko.eckert@amd.comint ArrayST::cycle_time_dev_threshold = 10; 6310152Satgutier@umich.edu 6410152Satgutier@umich.edu 6510234Syasuko.eckert@amd.comArrayST::ArrayST(XMLNode* _xml_data, 6610234Syasuko.eckert@amd.com const InputParameter *configure_interface, string _name, 6710234Syasuko.eckert@amd.com enum Device_ty device_ty_, double _clockRate, 6810234Syasuko.eckert@amd.com bool opt_local_, enum Core_type core_ty_, bool _is_default) 6910234Syasuko.eckert@amd.com : McPATComponent(_xml_data), l_ip(*configure_interface), 7010234Syasuko.eckert@amd.com device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_), 7110234Syasuko.eckert@amd.com is_default(_is_default) { 7210234Syasuko.eckert@amd.com name = _name; 7310234Syasuko.eckert@amd.com clockRate = _clockRate; 7410234Syasuko.eckert@amd.com if (l_ip.cache_sz < MIN_BUFFER_SIZE) 7510234Syasuko.eckert@amd.com l_ip.cache_sz = MIN_BUFFER_SIZE; 7610234Syasuko.eckert@amd.com 7710234Syasuko.eckert@amd.com if (!l_ip.error_checking(name)) { 7810234Syasuko.eckert@amd.com exit(1); 7910234Syasuko.eckert@amd.com } 8010234Syasuko.eckert@amd.com 8110234Syasuko.eckert@amd.com output_data.reset(); 8210234Syasuko.eckert@amd.com 8310234Syasuko.eckert@amd.com computeEnergy(); 8410234Syasuko.eckert@amd.com computeArea(); 8510152Satgutier@umich.edu} 8610152Satgutier@umich.edu 8710234Syasuko.eckert@amd.comvoid ArrayST::compute_base_power() { 8810234Syasuko.eckert@amd.com local_result = cacti_interface(&l_ip); 8910234Syasuko.eckert@amd.com} 9010152Satgutier@umich.edu 9110234Syasuko.eckert@amd.comvoid ArrayST::computeArea() { 9210234Syasuko.eckert@amd.com area.set_area(local_result.area); 9310234Syasuko.eckert@amd.com output_data.area = local_result.area / 1e6; 9410234Syasuko.eckert@amd.com} 9510152Satgutier@umich.edu 9610234Syasuko.eckert@amd.comvoid ArrayST::computeEnergy() { 9710234Syasuko.eckert@amd.com list<uca_org_t > candidate_solutions(0); 9810234Syasuko.eckert@amd.com list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter; 9910234Syasuko.eckert@amd.com 10010234Syasuko.eckert@amd.com uca_org_t* temp_res = NULL; 10110234Syasuko.eckert@amd.com local_result.valid = false; 10210234Syasuko.eckert@amd.com 10310234Syasuko.eckert@amd.com double throughput = l_ip.throughput; 10410234Syasuko.eckert@amd.com double latency = l_ip.latency; 10510234Syasuko.eckert@amd.com bool throughput_overflow = true; 10610234Syasuko.eckert@amd.com bool latency_overflow = true; 10710234Syasuko.eckert@amd.com compute_base_power(); 10810234Syasuko.eckert@amd.com 10910234Syasuko.eckert@amd.com if ((local_result.cycle_time - throughput) <= 1e-10 ) 11010234Syasuko.eckert@amd.com throughput_overflow = false; 11110234Syasuko.eckert@amd.com if ((local_result.access_time - latency) <= 1e-10) 11210234Syasuko.eckert@amd.com latency_overflow = false; 11310234Syasuko.eckert@amd.com 11410234Syasuko.eckert@amd.com if (opt_for_clk && opt_local) { 11510234Syasuko.eckert@amd.com if (throughput_overflow || latency_overflow) { 11610234Syasuko.eckert@amd.com l_ip.ed = ed; 11710234Syasuko.eckert@amd.com 11810234Syasuko.eckert@amd.com l_ip.delay_wt = delay_wt; 11910234Syasuko.eckert@amd.com l_ip.cycle_time_wt = cycle_time_wt; 12010234Syasuko.eckert@amd.com 12110234Syasuko.eckert@amd.com l_ip.area_wt = area_wt; 12210234Syasuko.eckert@amd.com l_ip.dynamic_power_wt = dynamic_power_wt; 12310234Syasuko.eckert@amd.com l_ip.leakage_power_wt = leakage_power_wt; 12410234Syasuko.eckert@amd.com 12510234Syasuko.eckert@amd.com l_ip.delay_dev = delay_dev; 12610234Syasuko.eckert@amd.com l_ip.cycle_time_dev = cycle_time_dev; 12710234Syasuko.eckert@amd.com 12810234Syasuko.eckert@amd.com l_ip.area_dev = area_dev; 12910234Syasuko.eckert@amd.com l_ip.dynamic_power_dev = dynamic_power_dev; 13010234Syasuko.eckert@amd.com l_ip.leakage_power_dev = leakage_power_dev; 13110234Syasuko.eckert@amd.com 13210234Syasuko.eckert@amd.com //Reset overflow flag before start optimization iterations 13310234Syasuko.eckert@amd.com throughput_overflow = true; 13410234Syasuko.eckert@amd.com latency_overflow = true; 13510234Syasuko.eckert@amd.com 13610234Syasuko.eckert@amd.com //Clean up the result for optimized for ED^2P 13710234Syasuko.eckert@amd.com temp_res = &local_result; 13810234Syasuko.eckert@amd.com temp_res->cleanup(); 13910234Syasuko.eckert@amd.com } 14010234Syasuko.eckert@amd.com 14110234Syasuko.eckert@amd.com 14210234Syasuko.eckert@amd.com while ((throughput_overflow || latency_overflow) && 14310234Syasuko.eckert@amd.com l_ip.cycle_time_dev > cycle_time_dev_threshold) { 14410234Syasuko.eckert@amd.com compute_base_power(); 14510234Syasuko.eckert@amd.com 14610234Syasuko.eckert@amd.com //This is the time_dev to be used for next iteration 14710234Syasuko.eckert@amd.com l_ip.cycle_time_dev -= cycle_time_dev_threshold; 14810234Syasuko.eckert@amd.com 14910234Syasuko.eckert@amd.com // from best area to worst area -->worst timing to best timing 15010234Syasuko.eckert@amd.com if ((((local_result.cycle_time - throughput) <= 1e-10 ) && 15110234Syasuko.eckert@amd.com (local_result.access_time - latency) <= 1e-10) || 15210234Syasuko.eckert@amd.com (local_result.data_array2->area_efficiency < 15310234Syasuko.eckert@amd.com area_efficiency_threshold && l_ip.assoc == 0)) { 15410234Syasuko.eckert@amd.com //if no satisfiable solution is found,the most aggressive one 15510234Syasuko.eckert@amd.com //is left 15610234Syasuko.eckert@amd.com candidate_solutions.push_back(local_result); 15710234Syasuko.eckert@amd.com if (((local_result.cycle_time - throughput) <= 1e-10) && 15810234Syasuko.eckert@amd.com ((local_result.access_time - latency) <= 1e-10)) { 15910234Syasuko.eckert@amd.com //ensure stop opt not because of cam 16010234Syasuko.eckert@amd.com throughput_overflow = false; 16110234Syasuko.eckert@amd.com latency_overflow = false; 16210234Syasuko.eckert@amd.com } 16310234Syasuko.eckert@amd.com 16410234Syasuko.eckert@amd.com } else { 16510234Syasuko.eckert@amd.com if ((local_result.cycle_time - throughput) <= 1e-10) 16610234Syasuko.eckert@amd.com throughput_overflow = false; 16710234Syasuko.eckert@amd.com if ((local_result.access_time - latency) <= 1e-10) 16810234Syasuko.eckert@amd.com latency_overflow = false; 16910234Syasuko.eckert@amd.com 17010234Syasuko.eckert@amd.com //if not >10 local_result is the last result, it cannot be 17110234Syasuko.eckert@amd.com //cleaned up 17210234Syasuko.eckert@amd.com if (l_ip.cycle_time_dev > cycle_time_dev_threshold) { 17310234Syasuko.eckert@amd.com //Only solutions not saved in the list need to be 17410234Syasuko.eckert@amd.com //cleaned up 17510234Syasuko.eckert@amd.com temp_res = &local_result; 17610234Syasuko.eckert@amd.com temp_res->cleanup(); 17710234Syasuko.eckert@amd.com } 17810234Syasuko.eckert@amd.com } 17910234Syasuko.eckert@amd.com } 18010234Syasuko.eckert@amd.com 18110234Syasuko.eckert@amd.com 18210234Syasuko.eckert@amd.com if (l_ip.assoc > 0) { 18310234Syasuko.eckert@amd.com //For array structures except CAM and FA, Give warning but still 18410234Syasuko.eckert@amd.com //provide a result with best timing found 18510234Syasuko.eckert@amd.com if (throughput_overflow == true) 18610234Syasuko.eckert@amd.com cout << "Warning: " << name 18710234Syasuko.eckert@amd.com << " array structure cannot satisfy throughput constraint." 18810234Syasuko.eckert@amd.com << endl; 18910234Syasuko.eckert@amd.com if (latency_overflow == true) 19010234Syasuko.eckert@amd.com cout << "Warning: " << name 19110234Syasuko.eckert@amd.com << " array structure cannot satisfy latency constraint." 19210234Syasuko.eckert@amd.com << endl; 19310234Syasuko.eckert@amd.com } 19410234Syasuko.eckert@amd.com 19510234Syasuko.eckert@amd.com double min_dynamic_energy = BIGNUM; 19610234Syasuko.eckert@amd.com if (candidate_solutions.empty() == false) { 19710234Syasuko.eckert@amd.com local_result.valid = true; 19810234Syasuko.eckert@amd.com for (candidate_iter = candidate_solutions.begin(); 19910234Syasuko.eckert@amd.com candidate_iter != candidate_solutions.end(); 20010234Syasuko.eckert@amd.com ++candidate_iter) { 20110234Syasuko.eckert@amd.com if (min_dynamic_energy > 20210234Syasuko.eckert@amd.com (candidate_iter)->power.readOp.dynamic) { 20310234Syasuko.eckert@amd.com min_dynamic_energy = 20410234Syasuko.eckert@amd.com (candidate_iter)->power.readOp.dynamic; 20510234Syasuko.eckert@amd.com min_dynamic_energy_iter = candidate_iter; 20610234Syasuko.eckert@amd.com local_result = *(min_dynamic_energy_iter); 20710234Syasuko.eckert@amd.com } else { 20810234Syasuko.eckert@amd.com candidate_iter->cleanup() ; 20910234Syasuko.eckert@amd.com } 21010234Syasuko.eckert@amd.com 21110234Syasuko.eckert@amd.com } 21210234Syasuko.eckert@amd.com 21310234Syasuko.eckert@amd.com 21410234Syasuko.eckert@amd.com } 21510234Syasuko.eckert@amd.com candidate_solutions.clear(); 21610152Satgutier@umich.edu } 21710152Satgutier@umich.edu 21810234Syasuko.eckert@amd.com double long_channel_device_reduction = 21910234Syasuko.eckert@amd.com longer_channel_device_reduction(device_ty, core_ty); 22010152Satgutier@umich.edu 22110234Syasuko.eckert@amd.com double macro_layout_overhead = g_tp.macro_layout_overhead; 22210234Syasuko.eckert@amd.com double chip_PR_overhead = g_tp.chip_layout_overhead; 22310234Syasuko.eckert@amd.com double total_overhead = macro_layout_overhead * chip_PR_overhead; 22410234Syasuko.eckert@amd.com local_result.area *= total_overhead; 22510152Satgutier@umich.edu 22610234Syasuko.eckert@amd.com //maintain constant power density 22710234Syasuko.eckert@amd.com double pppm_t[4] = {total_overhead, 1, 1, total_overhead}; 22810152Satgutier@umich.edu 22910234Syasuko.eckert@amd.com double sckRation = g_tp.sckt_co_eff; 23010234Syasuko.eckert@amd.com local_result.power.readOp.dynamic *= sckRation; 23110234Syasuko.eckert@amd.com local_result.power.writeOp.dynamic *= sckRation; 23210234Syasuko.eckert@amd.com local_result.power.searchOp.dynamic *= sckRation; 23310234Syasuko.eckert@amd.com local_result.power.readOp.leakage *= l_ip.nbanks; 23410234Syasuko.eckert@amd.com local_result.power.readOp.longer_channel_leakage = 23510234Syasuko.eckert@amd.com local_result.power.readOp.leakage * long_channel_device_reduction; 23610234Syasuko.eckert@amd.com local_result.power = local_result.power * pppm_t; 23710152Satgutier@umich.edu 23810234Syasuko.eckert@amd.com local_result.data_array2->power.readOp.dynamic *= sckRation; 23910234Syasuko.eckert@amd.com local_result.data_array2->power.writeOp.dynamic *= sckRation; 24010234Syasuko.eckert@amd.com local_result.data_array2->power.searchOp.dynamic *= sckRation; 24110234Syasuko.eckert@amd.com local_result.data_array2->power.readOp.leakage *= l_ip.nbanks; 24210234Syasuko.eckert@amd.com local_result.data_array2->power.readOp.longer_channel_leakage = 24310234Syasuko.eckert@amd.com local_result.data_array2->power.readOp.leakage * 24410234Syasuko.eckert@amd.com long_channel_device_reduction; 24510234Syasuko.eckert@amd.com local_result.data_array2->power = local_result.data_array2->power * pppm_t; 24610152Satgutier@umich.edu 24710152Satgutier@umich.edu 24810234Syasuko.eckert@amd.com if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) { 24910234Syasuko.eckert@amd.com local_result.tag_array2->power.readOp.dynamic *= sckRation; 25010234Syasuko.eckert@amd.com local_result.tag_array2->power.writeOp.dynamic *= sckRation; 25110234Syasuko.eckert@amd.com local_result.tag_array2->power.searchOp.dynamic *= sckRation; 25210234Syasuko.eckert@amd.com local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks; 25310234Syasuko.eckert@amd.com local_result.tag_array2->power.readOp.longer_channel_leakage = 25410234Syasuko.eckert@amd.com local_result.tag_array2->power.readOp.leakage * 25510234Syasuko.eckert@amd.com long_channel_device_reduction; 25610234Syasuko.eckert@amd.com local_result.tag_array2->power = 25710234Syasuko.eckert@amd.com local_result.tag_array2->power * pppm_t; 25810234Syasuko.eckert@amd.com } 25910152Satgutier@umich.edu 26010234Syasuko.eckert@amd.com power = local_result.power; 26110152Satgutier@umich.edu 26210234Syasuko.eckert@amd.com output_data.peak_dynamic_power = power.readOp.dynamic * clockRate; 26310234Syasuko.eckert@amd.com output_data.subthreshold_leakage_power = power.readOp.leakage; 26410234Syasuko.eckert@amd.com output_data.gate_leakage_power = power.readOp.gate_leakage; 26510152Satgutier@umich.edu} 26610152Satgutier@umich.edu 26710152Satgutier@umich.eduvoid ArrayST::leakage_feedback(double temperature) 26810152Satgutier@umich.edu{ 26910152Satgutier@umich.edu // Update the temperature. l_ip is already set and error-checked in the creator function. 27010152Satgutier@umich.edu l_ip.temp = (unsigned int)round(temperature/10.0)*10; 27110152Satgutier@umich.edu 27210152Satgutier@umich.edu // This corresponds to cacti_interface() in the initialization process. Leakage power is updated here. 27310152Satgutier@umich.edu reconfigure(&l_ip,&local_result); 27410152Satgutier@umich.edu 27510152Satgutier@umich.edu // Scale the power values. This is part of ArrayST::optimize_array(). 27610152Satgutier@umich.edu double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); 27710152Satgutier@umich.edu 27810152Satgutier@umich.edu double macro_layout_overhead = g_tp.macro_layout_overhead; 27910152Satgutier@umich.edu double chip_PR_overhead = g_tp.chip_layout_overhead; 28010152Satgutier@umich.edu double total_overhead = macro_layout_overhead*chip_PR_overhead; 28110152Satgutier@umich.edu 28210152Satgutier@umich.edu double pppm_t[4] = {total_overhead,1,1,total_overhead}; 28310152Satgutier@umich.edu 28410152Satgutier@umich.edu double sckRation = g_tp.sckt_co_eff; 28510152Satgutier@umich.edu local_result.power.readOp.dynamic *= sckRation; 28610152Satgutier@umich.edu local_result.power.writeOp.dynamic *= sckRation; 28710152Satgutier@umich.edu local_result.power.searchOp.dynamic *= sckRation; 28810152Satgutier@umich.edu local_result.power.readOp.leakage *= l_ip.nbanks; 28910152Satgutier@umich.edu local_result.power.readOp.longer_channel_leakage = local_result.power.readOp.leakage*long_channel_device_reduction; 29010152Satgutier@umich.edu local_result.power = local_result.power* pppm_t; 29110152Satgutier@umich.edu 29210152Satgutier@umich.edu local_result.data_array2->power.readOp.dynamic *= sckRation; 29310152Satgutier@umich.edu local_result.data_array2->power.writeOp.dynamic *= sckRation; 29410152Satgutier@umich.edu local_result.data_array2->power.searchOp.dynamic *= sckRation; 29510152Satgutier@umich.edu local_result.data_array2->power.readOp.leakage *= l_ip.nbanks; 29610152Satgutier@umich.edu local_result.data_array2->power.readOp.longer_channel_leakage = local_result.data_array2->power.readOp.leakage*long_channel_device_reduction; 29710152Satgutier@umich.edu local_result.data_array2->power = local_result.data_array2->power* pppm_t; 29810152Satgutier@umich.edu 29910152Satgutier@umich.edu if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) 30010152Satgutier@umich.edu { 30110152Satgutier@umich.edu local_result.tag_array2->power.readOp.dynamic *= sckRation; 30210152Satgutier@umich.edu local_result.tag_array2->power.writeOp.dynamic *= sckRation; 30310152Satgutier@umich.edu local_result.tag_array2->power.searchOp.dynamic *= sckRation; 30410152Satgutier@umich.edu local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks; 30510152Satgutier@umich.edu local_result.tag_array2->power.readOp.longer_channel_leakage = local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction; 30610152Satgutier@umich.edu local_result.tag_array2->power = local_result.tag_array2->power* pppm_t; 30710152Satgutier@umich.edu } 30810152Satgutier@umich.edu} 30910152Satgutier@umich.edu 31010234Syasuko.eckert@amd.comArrayST::~ArrayST() { 31110234Syasuko.eckert@amd.com local_result.cleanup(); 31210152Satgutier@umich.edu} 313