array.cc revision 10152:52c552138ba1
1/*****************************************************************************
2 *                                McPAT
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *                          All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution;
14 * neither the name of the copyright holders nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 *
30 ***************************************************************************/
31
32#define  GLOBALVAR
33#include <cassert>
34#include <cmath>
35#include <iostream>
36
37#include "area.h"
38#include "array.h"
39#include "decoder.h"
40#include "globalvar.h"
41#include "parameter.h"
42
43using namespace std;
44
45ArrayST::ArrayST(const InputParameter *configure_interface,
46                               string _name,
47                               enum Device_ty device_ty_,
48                               bool opt_local_,
49                               enum Core_type core_ty_,
50                               bool _is_default)
51:l_ip(*configure_interface),
52 name(_name),
53 device_ty(device_ty_),
54 opt_local(opt_local_),
55 core_ty(core_ty_),
56 is_default(_is_default)
57    {
58
59        if (l_ip.cache_sz<64) l_ip.cache_sz=64;
60        l_ip.error_checking();//not only do the error checking but also fill some missing parameters
61        optimize_array();
62
63}
64
65
66void ArrayST::compute_base_power()
67    {
68        //l_ip.out_w               =l_ip.line_sz*8;
69    local_result=cacti_interface(&l_ip);
70
71    }
72
73void ArrayST::optimize_array()
74{
75        list<uca_org_t > candidate_solutions(0);
76        list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
77
78        uca_org_t * temp_res = 0;
79        local_result.valid=false;
80
81        double 	throughput=l_ip.throughput, latency=l_ip.latency;
82        double  area_efficiency_threshold = 20.0;
83        bool 	throughput_overflow=true, latency_overflow=true;
84        compute_base_power();
85
86        if ((local_result.cycle_time - throughput) <= 1e-10 )
87                throughput_overflow=false;
88        if ((local_result.access_time - latency)<= 1e-10)
89                latency_overflow=false;
90
91        if (opt_for_clk && opt_local)
92        {
93                if (throughput_overflow || latency_overflow)
94                {
95                        l_ip.ed=0;
96
97                        l_ip.delay_wt                = 100;//Fixed number, make sure timing can be satisfied.
98                        l_ip.cycle_time_wt           = 1000;
99
100                        l_ip.area_wt                 = 10;//Fixed number, This is used to exhaustive search for individual components.
101                        l_ip.dynamic_power_wt        = 10;//Fixed number, This is used to exhaustive search for individual components.
102                        l_ip.leakage_power_wt        = 10;
103
104                        l_ip.delay_dev               = 1000000;//Fixed number, make sure timing can be satisfied.
105                        l_ip.cycle_time_dev          = 100;
106
107                        l_ip.area_dev                = 1000000;//Fixed number, This is used to exhaustive search for individual components.
108                        l_ip.dynamic_power_dev       = 1000000;//Fixed number, This is used to exhaustive search for individual components.
109                        l_ip.leakage_power_dev       = 1000000;
110
111                        throughput_overflow=true; //Reset overflow flag before start optimization iterations
112                        latency_overflow=true;
113
114                        temp_res = &local_result; //Clean up the result for optimized for ED^2P
115                        temp_res->cleanup();
116                }
117
118
119                while ((throughput_overflow || latency_overflow)&&l_ip.cycle_time_dev > 10)// && l_ip.delay_dev > 10
120                {
121                        compute_base_power();
122
123                        l_ip.cycle_time_dev-=10;//This is the time_dev to be used for next iteration
124
125                        //		from best area to worst area -->worst timing to best timing
126                        if ((((local_result.cycle_time - throughput) <= 1e-10 ) && (local_result.access_time - latency)<= 1e-10)||
127                                        (local_result.data_array2->area_efficiency < area_efficiency_threshold && l_ip.assoc == 0))
128                        {  //if no satisfiable solution is found,the most aggressive one is left
129                                candidate_solutions.push_back(local_result);
130                                //output_data_csv(candidate_solutions.back());
131                                if (((local_result.cycle_time - throughput) <= 1e-10) && ((local_result.access_time - latency)<= 1e-10))
132                                        //ensure stop opt not because of cam
133                                {
134                                        throughput_overflow=false;
135                                        latency_overflow=false;
136                                }
137
138                        }
139                        else
140                        {
141                                //TODO: whether checking the partial satisfied results too, or just change the mark???
142                                if ((local_result.cycle_time - throughput) <= 1e-10)
143                                                                                throughput_overflow=false;
144                                if ((local_result.access_time - latency)<= 1e-10)
145                                                                                latency_overflow=false;
146
147                                if (l_ip.cycle_time_dev > 10)
148                                {   //if not >10 local_result is the last result, it cannot be cleaned up
149                                        temp_res = &local_result; //Only solutions not saved in the list need to be cleaned up
150                                        temp_res->cleanup();
151                                }
152                        }
153//			l_ip.cycle_time_dev-=10;
154//			l_ip.delay_dev-=10;
155
156                }
157
158
159        if (l_ip.assoc > 0)
160        {
161                //For array structures except CAM and FA, Give warning but still provide a result with best timing found
162                if (throughput_overflow==true)
163                        cout<< "Warning: " << name<<" array structure cannot satisfy throughput constraint." << endl;
164                if (latency_overflow==true)
165                        cout<< "Warning: " << name<<" array structure cannot satisfy latency constraint." << endl;
166        }
167
168//	else
169//	{
170//		/*According to "Content-Addressable Memory (CAM) Circuits and
171//				Architectures": A Tutorial and Survey
172//				by Kostas Pagiamtzis et al.
173//				CAM structures can be heavily pipelined and use look-ahead techniques,
174//				therefore timing can be relaxed. But McPAT does not model the advanced
175//				techniques. If continue optimizing, the area efficiency will be too low
176//		*/
177//		//For CAM and FA, stop opt if area efficiency is too low
178//		if (throughput_overflow==true)
179//			cout<< "Warning: " <<" McPAT stopped optimization on throughput for "<< name
180//				<<" array structure because its area efficiency is below "<<area_efficiency_threshold<<"% " << endl;
181//		if (latency_overflow==true)
182//			cout<< "Warning: " <<" McPAT stopped optimization on latency for "<< name
183//				<<" array structure because its area efficiency is below "<<area_efficiency_threshold<<"% " << endl;
184//	}
185
186                //double min_dynamic_energy, min_dynamic_power, min_leakage_power, min_cycle_time;
187                double min_dynamic_energy=BIGNUM;
188                if (candidate_solutions.empty()==false)
189                {
190                        local_result.valid=true;
191                        for (candidate_iter = candidate_solutions.begin(); candidate_iter != candidate_solutions.end(); ++candidate_iter)
192
193                        {
194                                if (min_dynamic_energy > (candidate_iter)->power.readOp.dynamic)
195                                {
196                                        min_dynamic_energy = (candidate_iter)->power.readOp.dynamic;
197                                        min_dynamic_energy_iter = candidate_iter;
198                                        local_result = *(min_dynamic_energy_iter);
199                                        //TODO: since results are reordered results and l_ip may miss match. Therefore, the final output spread sheets may show the miss match.
200
201                                }
202                                else
203                                {
204                                        candidate_iter->cleanup() ;
205                                }
206
207                        }
208
209
210                }
211        candidate_solutions.clear();
212        }
213
214        double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
215
216        double macro_layout_overhead   = g_tp.macro_layout_overhead;
217        double chip_PR_overhead        = g_tp.chip_layout_overhead;
218        double total_overhead          = macro_layout_overhead*chip_PR_overhead;
219        local_result.area *= total_overhead;
220
221        //maintain constant power density
222        double pppm_t[4]    = {total_overhead,1,1,total_overhead};
223
224        double sckRation = g_tp.sckt_co_eff;
225        local_result.power.readOp.dynamic *= sckRation;
226        local_result.power.writeOp.dynamic *= sckRation;
227        local_result.power.searchOp.dynamic *= sckRation;
228        local_result.power.readOp.leakage *= l_ip.nbanks;
229        local_result.power.readOp.longer_channel_leakage =
230                local_result.power.readOp.leakage*long_channel_device_reduction;
231        local_result.power = local_result.power* pppm_t;
232
233        local_result.data_array2->power.readOp.dynamic *= sckRation;
234        local_result.data_array2->power.writeOp.dynamic *= sckRation;
235        local_result.data_array2->power.searchOp.dynamic *= sckRation;
236        local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
237        local_result.data_array2->power.readOp.longer_channel_leakage =
238                local_result.data_array2->power.readOp.leakage*long_channel_device_reduction;
239        local_result.data_array2->power = local_result.data_array2->power* pppm_t;
240
241
242        if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache)
243        {
244                local_result.tag_array2->power.readOp.dynamic *= sckRation;
245                local_result.tag_array2->power.writeOp.dynamic *= sckRation;
246                local_result.tag_array2->power.searchOp.dynamic *= sckRation;
247                local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
248                local_result.tag_array2->power.readOp.longer_channel_leakage =
249                        local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction;
250                local_result.tag_array2->power = local_result.tag_array2->power* pppm_t;
251        }
252
253
254}
255
256void ArrayST::leakage_feedback(double temperature)
257{
258  // Update the temperature. l_ip is already set and error-checked in the creator function.
259  l_ip.temp = (unsigned int)round(temperature/10.0)*10;
260
261  // This corresponds to cacti_interface() in the initialization process. Leakage power is updated here.
262  reconfigure(&l_ip,&local_result);
263
264  // Scale the power values. This is part of ArrayST::optimize_array().
265  double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
266
267  double macro_layout_overhead   = g_tp.macro_layout_overhead;
268  double chip_PR_overhead        = g_tp.chip_layout_overhead;
269  double total_overhead          = macro_layout_overhead*chip_PR_overhead;
270
271  double pppm_t[4]    = {total_overhead,1,1,total_overhead};
272
273  double sckRation = g_tp.sckt_co_eff;
274  local_result.power.readOp.dynamic *= sckRation;
275  local_result.power.writeOp.dynamic *= sckRation;
276  local_result.power.searchOp.dynamic *= sckRation;
277  local_result.power.readOp.leakage *= l_ip.nbanks;
278  local_result.power.readOp.longer_channel_leakage = local_result.power.readOp.leakage*long_channel_device_reduction;
279  local_result.power = local_result.power* pppm_t;
280
281  local_result.data_array2->power.readOp.dynamic *= sckRation;
282  local_result.data_array2->power.writeOp.dynamic *= sckRation;
283  local_result.data_array2->power.searchOp.dynamic *= sckRation;
284  local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
285  local_result.data_array2->power.readOp.longer_channel_leakage = local_result.data_array2->power.readOp.leakage*long_channel_device_reduction;
286  local_result.data_array2->power = local_result.data_array2->power* pppm_t;
287
288  if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache)
289  {
290    local_result.tag_array2->power.readOp.dynamic *= sckRation;
291    local_result.tag_array2->power.writeOp.dynamic *= sckRation;
292    local_result.tag_array2->power.searchOp.dynamic *= sckRation;
293    local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
294    local_result.tag_array2->power.readOp.longer_channel_leakage = local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction;
295    local_result.tag_array2->power = local_result.tag_array2->power* pppm_t;
296  }
297}
298
299ArrayST:: ~ArrayST()
300{
301        local_result.cleanup();
302}
303