array.cc revision 10152:52c552138ba1
1/***************************************************************************** 2 * McPAT 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. 5 * All Rights Reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer; 11 * redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution; 14 * neither the name of the copyright holders nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 * 30 ***************************************************************************/ 31 32#define GLOBALVAR 33#include <cassert> 34#include <cmath> 35#include <iostream> 36 37#include "area.h" 38#include "array.h" 39#include "decoder.h" 40#include "globalvar.h" 41#include "parameter.h" 42 43using namespace std; 44 45ArrayST::ArrayST(const InputParameter *configure_interface, 46 string _name, 47 enum Device_ty device_ty_, 48 bool opt_local_, 49 enum Core_type core_ty_, 50 bool _is_default) 51:l_ip(*configure_interface), 52 name(_name), 53 device_ty(device_ty_), 54 opt_local(opt_local_), 55 core_ty(core_ty_), 56 is_default(_is_default) 57 { 58 59 if (l_ip.cache_sz<64) l_ip.cache_sz=64; 60 l_ip.error_checking();//not only do the error checking but also fill some missing parameters 61 optimize_array(); 62 63} 64 65 66void ArrayST::compute_base_power() 67 { 68 //l_ip.out_w =l_ip.line_sz*8; 69 local_result=cacti_interface(&l_ip); 70 71 } 72 73void ArrayST::optimize_array() 74{ 75 list<uca_org_t > candidate_solutions(0); 76 list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter; 77 78 uca_org_t * temp_res = 0; 79 local_result.valid=false; 80 81 double throughput=l_ip.throughput, latency=l_ip.latency; 82 double area_efficiency_threshold = 20.0; 83 bool throughput_overflow=true, latency_overflow=true; 84 compute_base_power(); 85 86 if ((local_result.cycle_time - throughput) <= 1e-10 ) 87 throughput_overflow=false; 88 if ((local_result.access_time - latency)<= 1e-10) 89 latency_overflow=false; 90 91 if (opt_for_clk && opt_local) 92 { 93 if (throughput_overflow || latency_overflow) 94 { 95 l_ip.ed=0; 96 97 l_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied. 98 l_ip.cycle_time_wt = 1000; 99 100 l_ip.area_wt = 10;//Fixed number, This is used to exhaustive search for individual components. 101 l_ip.dynamic_power_wt = 10;//Fixed number, This is used to exhaustive search for individual components. 102 l_ip.leakage_power_wt = 10; 103 104 l_ip.delay_dev = 1000000;//Fixed number, make sure timing can be satisfied. 105 l_ip.cycle_time_dev = 100; 106 107 l_ip.area_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components. 108 l_ip.dynamic_power_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components. 109 l_ip.leakage_power_dev = 1000000; 110 111 throughput_overflow=true; //Reset overflow flag before start optimization iterations 112 latency_overflow=true; 113 114 temp_res = &local_result; //Clean up the result for optimized for ED^2P 115 temp_res->cleanup(); 116 } 117 118 119 while ((throughput_overflow || latency_overflow)&&l_ip.cycle_time_dev > 10)// && l_ip.delay_dev > 10 120 { 121 compute_base_power(); 122 123 l_ip.cycle_time_dev-=10;//This is the time_dev to be used for next iteration 124 125 // from best area to worst area -->worst timing to best timing 126 if ((((local_result.cycle_time - throughput) <= 1e-10 ) && (local_result.access_time - latency)<= 1e-10)|| 127 (local_result.data_array2->area_efficiency < area_efficiency_threshold && l_ip.assoc == 0)) 128 { //if no satisfiable solution is found,the most aggressive one is left 129 candidate_solutions.push_back(local_result); 130 //output_data_csv(candidate_solutions.back()); 131 if (((local_result.cycle_time - throughput) <= 1e-10) && ((local_result.access_time - latency)<= 1e-10)) 132 //ensure stop opt not because of cam 133 { 134 throughput_overflow=false; 135 latency_overflow=false; 136 } 137 138 } 139 else 140 { 141 //TODO: whether checking the partial satisfied results too, or just change the mark??? 142 if ((local_result.cycle_time - throughput) <= 1e-10) 143 throughput_overflow=false; 144 if ((local_result.access_time - latency)<= 1e-10) 145 latency_overflow=false; 146 147 if (l_ip.cycle_time_dev > 10) 148 { //if not >10 local_result is the last result, it cannot be cleaned up 149 temp_res = &local_result; //Only solutions not saved in the list need to be cleaned up 150 temp_res->cleanup(); 151 } 152 } 153// l_ip.cycle_time_dev-=10; 154// l_ip.delay_dev-=10; 155 156 } 157 158 159 if (l_ip.assoc > 0) 160 { 161 //For array structures except CAM and FA, Give warning but still provide a result with best timing found 162 if (throughput_overflow==true) 163 cout<< "Warning: " << name<<" array structure cannot satisfy throughput constraint." << endl; 164 if (latency_overflow==true) 165 cout<< "Warning: " << name<<" array structure cannot satisfy latency constraint." << endl; 166 } 167 168// else 169// { 170// /*According to "Content-Addressable Memory (CAM) Circuits and 171// Architectures": A Tutorial and Survey 172// by Kostas Pagiamtzis et al. 173// CAM structures can be heavily pipelined and use look-ahead techniques, 174// therefore timing can be relaxed. But McPAT does not model the advanced 175// techniques. If continue optimizing, the area efficiency will be too low 176// */ 177// //For CAM and FA, stop opt if area efficiency is too low 178// if (throughput_overflow==true) 179// cout<< "Warning: " <<" McPAT stopped optimization on throughput for "<< name 180// <<" array structure because its area efficiency is below "<<area_efficiency_threshold<<"% " << endl; 181// if (latency_overflow==true) 182// cout<< "Warning: " <<" McPAT stopped optimization on latency for "<< name 183// <<" array structure because its area efficiency is below "<<area_efficiency_threshold<<"% " << endl; 184// } 185 186 //double min_dynamic_energy, min_dynamic_power, min_leakage_power, min_cycle_time; 187 double min_dynamic_energy=BIGNUM; 188 if (candidate_solutions.empty()==false) 189 { 190 local_result.valid=true; 191 for (candidate_iter = candidate_solutions.begin(); candidate_iter != candidate_solutions.end(); ++candidate_iter) 192 193 { 194 if (min_dynamic_energy > (candidate_iter)->power.readOp.dynamic) 195 { 196 min_dynamic_energy = (candidate_iter)->power.readOp.dynamic; 197 min_dynamic_energy_iter = candidate_iter; 198 local_result = *(min_dynamic_energy_iter); 199 //TODO: since results are reordered results and l_ip may miss match. Therefore, the final output spread sheets may show the miss match. 200 201 } 202 else 203 { 204 candidate_iter->cleanup() ; 205 } 206 207 } 208 209 210 } 211 candidate_solutions.clear(); 212 } 213 214 double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); 215 216 double macro_layout_overhead = g_tp.macro_layout_overhead; 217 double chip_PR_overhead = g_tp.chip_layout_overhead; 218 double total_overhead = macro_layout_overhead*chip_PR_overhead; 219 local_result.area *= total_overhead; 220 221 //maintain constant power density 222 double pppm_t[4] = {total_overhead,1,1,total_overhead}; 223 224 double sckRation = g_tp.sckt_co_eff; 225 local_result.power.readOp.dynamic *= sckRation; 226 local_result.power.writeOp.dynamic *= sckRation; 227 local_result.power.searchOp.dynamic *= sckRation; 228 local_result.power.readOp.leakage *= l_ip.nbanks; 229 local_result.power.readOp.longer_channel_leakage = 230 local_result.power.readOp.leakage*long_channel_device_reduction; 231 local_result.power = local_result.power* pppm_t; 232 233 local_result.data_array2->power.readOp.dynamic *= sckRation; 234 local_result.data_array2->power.writeOp.dynamic *= sckRation; 235 local_result.data_array2->power.searchOp.dynamic *= sckRation; 236 local_result.data_array2->power.readOp.leakage *= l_ip.nbanks; 237 local_result.data_array2->power.readOp.longer_channel_leakage = 238 local_result.data_array2->power.readOp.leakage*long_channel_device_reduction; 239 local_result.data_array2->power = local_result.data_array2->power* pppm_t; 240 241 242 if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) 243 { 244 local_result.tag_array2->power.readOp.dynamic *= sckRation; 245 local_result.tag_array2->power.writeOp.dynamic *= sckRation; 246 local_result.tag_array2->power.searchOp.dynamic *= sckRation; 247 local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks; 248 local_result.tag_array2->power.readOp.longer_channel_leakage = 249 local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction; 250 local_result.tag_array2->power = local_result.tag_array2->power* pppm_t; 251 } 252 253 254} 255 256void ArrayST::leakage_feedback(double temperature) 257{ 258 // Update the temperature. l_ip is already set and error-checked in the creator function. 259 l_ip.temp = (unsigned int)round(temperature/10.0)*10; 260 261 // This corresponds to cacti_interface() in the initialization process. Leakage power is updated here. 262 reconfigure(&l_ip,&local_result); 263 264 // Scale the power values. This is part of ArrayST::optimize_array(). 265 double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); 266 267 double macro_layout_overhead = g_tp.macro_layout_overhead; 268 double chip_PR_overhead = g_tp.chip_layout_overhead; 269 double total_overhead = macro_layout_overhead*chip_PR_overhead; 270 271 double pppm_t[4] = {total_overhead,1,1,total_overhead}; 272 273 double sckRation = g_tp.sckt_co_eff; 274 local_result.power.readOp.dynamic *= sckRation; 275 local_result.power.writeOp.dynamic *= sckRation; 276 local_result.power.searchOp.dynamic *= sckRation; 277 local_result.power.readOp.leakage *= l_ip.nbanks; 278 local_result.power.readOp.longer_channel_leakage = local_result.power.readOp.leakage*long_channel_device_reduction; 279 local_result.power = local_result.power* pppm_t; 280 281 local_result.data_array2->power.readOp.dynamic *= sckRation; 282 local_result.data_array2->power.writeOp.dynamic *= sckRation; 283 local_result.data_array2->power.searchOp.dynamic *= sckRation; 284 local_result.data_array2->power.readOp.leakage *= l_ip.nbanks; 285 local_result.data_array2->power.readOp.longer_channel_leakage = local_result.data_array2->power.readOp.leakage*long_channel_device_reduction; 286 local_result.data_array2->power = local_result.data_array2->power* pppm_t; 287 288 if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) 289 { 290 local_result.tag_array2->power.readOp.dynamic *= sckRation; 291 local_result.tag_array2->power.writeOp.dynamic *= sckRation; 292 local_result.tag_array2->power.searchOp.dynamic *= sckRation; 293 local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks; 294 local_result.tag_array2->power.readOp.longer_channel_leakage = local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction; 295 local_result.tag_array2->power = local_result.tag_array2->power* pppm_t; 296 } 297} 298 299ArrayST:: ~ArrayST() 300{ 301 local_result.cleanup(); 302} 303