1/*****************************************************************************
2 *                                McPAT
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
6 *                          All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 ***************************************************************************/
32
33#include <iostream>
34#include <math.h>
35
36#include "area.h"
37#include "array.h"
38#include "common.h"
39#include "decoder.h"
40#include "parameter.h"
41
42using namespace std;
43
44double ArrayST::area_efficiency_threshold = 20.0;
45int ArrayST::ed = 0;
46//Fixed number, make sure timing can be satisfied.
47int ArrayST::delay_wt = 100;
48int ArrayST::cycle_time_wt = 1000;
49//Fixed number, This is used to exhaustive search for individual components.
50int ArrayST::area_wt = 10;
51//Fixed number, This is used to exhaustive search for individual components.
52int ArrayST::dynamic_power_wt = 10;
53int ArrayST::leakage_power_wt = 10;
54//Fixed number, make sure timing can be satisfied.
55int ArrayST::delay_dev = 1000000;
56int ArrayST::cycle_time_dev = 100;
57//Fixed number, This is used to exhaustive search for individual components.
58int ArrayST::area_dev = 1000000;
59//Fixed number, This is used to exhaustive search for individual components.
60int ArrayST::dynamic_power_dev = 1000000;
61int ArrayST::leakage_power_dev = 1000000;
62int ArrayST::cycle_time_dev_threshold = 10;
63
64
65ArrayST::ArrayST(XMLNode* _xml_data,
66                 const InputParameter *configure_interface, string _name,
67                 enum Device_ty device_ty_, double _clockRate,
68                 bool opt_local_, enum Core_type core_ty_, bool _is_default)
69        : McPATComponent(_xml_data), l_ip(*configure_interface),
70        device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_),
71        is_default(_is_default) {
72    name = _name;
73    clockRate = _clockRate;
74    if (l_ip.cache_sz < MIN_BUFFER_SIZE)
75        l_ip.cache_sz = MIN_BUFFER_SIZE;
76
77    if (!l_ip.error_checking(name)) {
78        exit(1);
79    }
80
81    output_data.reset();
82
83    computeEnergy();
84    computeArea();
85}
86
87void ArrayST::compute_base_power() {
88    local_result = cacti_interface(&l_ip);
89}
90
91void ArrayST::computeArea() {
92    area.set_area(local_result.area);
93    output_data.area = local_result.area / 1e6;
94}
95
96void ArrayST::computeEnergy() {
97    list<uca_org_t > candidate_solutions(0);
98    list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
99
100    uca_org_t* temp_res = NULL;
101    local_result.valid = false;
102
103    double throughput = l_ip.throughput;
104    double latency = l_ip.latency;
105    bool throughput_overflow = true;
106    bool latency_overflow = true;
107    compute_base_power();
108
109    if ((local_result.cycle_time - throughput) <= 1e-10 )
110        throughput_overflow = false;
111    if ((local_result.access_time - latency) <= 1e-10)
112        latency_overflow = false;
113
114    if (opt_for_clk && opt_local) {
115        if (throughput_overflow || latency_overflow) {
116            l_ip.ed = ed;
117
118            l_ip.delay_wt = delay_wt;
119            l_ip.cycle_time_wt = cycle_time_wt;
120
121            l_ip.area_wt = area_wt;
122            l_ip.dynamic_power_wt = dynamic_power_wt;
123            l_ip.leakage_power_wt = leakage_power_wt;
124
125            l_ip.delay_dev = delay_dev;
126            l_ip.cycle_time_dev = cycle_time_dev;
127
128            l_ip.area_dev = area_dev;
129            l_ip.dynamic_power_dev = dynamic_power_dev;
130            l_ip.leakage_power_dev = leakage_power_dev;
131
132            //Reset overflow flag before start optimization iterations
133            throughput_overflow = true;
134            latency_overflow = true;
135
136            //Clean up the result for optimized for ED^2P
137            temp_res = &local_result;
138            temp_res->cleanup();
139        }
140
141
142        while ((throughput_overflow || latency_overflow) &&
143               l_ip.cycle_time_dev > cycle_time_dev_threshold) {
144            compute_base_power();
145
146            //This is the time_dev to be used for next iteration
147            l_ip.cycle_time_dev -= cycle_time_dev_threshold;
148
149            //		from best area to worst area -->worst timing to best timing
150            if ((((local_result.cycle_time - throughput) <= 1e-10 ) &&
151                 (local_result.access_time - latency) <= 1e-10) ||
152                (local_result.data_array2->area_efficiency <
153                 area_efficiency_threshold && l_ip.assoc == 0)) {
154                //if no satisfiable solution is found,the most aggressive one
155                //is left
156                candidate_solutions.push_back(local_result);
157                if (((local_result.cycle_time - throughput) <= 1e-10) &&
158                    ((local_result.access_time - latency) <= 1e-10)) {
159                    //ensure stop opt not because of cam
160                    throughput_overflow = false;
161                    latency_overflow = false;
162                }
163
164            } else {
165                if ((local_result.cycle_time - throughput) <= 1e-10)
166                    throughput_overflow = false;
167                if ((local_result.access_time - latency) <= 1e-10)
168                    latency_overflow = false;
169
170                //if not >10 local_result is the last result, it cannot be
171                //cleaned up
172                if (l_ip.cycle_time_dev > cycle_time_dev_threshold) {
173                    //Only solutions not saved in the list need to be
174                    //cleaned up
175                    temp_res = &local_result;
176                    temp_res->cleanup();
177                }
178            }
179        }
180
181
182        if (l_ip.assoc > 0) {
183            //For array structures except CAM and FA, Give warning but still
184            //provide a result with best timing found
185            if (throughput_overflow == true)
186                cout << "Warning: " << name
187                     << " array structure cannot satisfy throughput constraint."
188                     << endl;
189            if (latency_overflow == true)
190                cout << "Warning: " << name
191                     << " array structure cannot satisfy latency constraint."
192                     << endl;
193        }
194
195        double min_dynamic_energy = BIGNUM;
196        if (candidate_solutions.empty() == false) {
197            local_result.valid = true;
198            for (candidate_iter = candidate_solutions.begin();
199                 candidate_iter != candidate_solutions.end();
200                 ++candidate_iter) {
201                if (min_dynamic_energy >
202                    (candidate_iter)->power.readOp.dynamic) {
203                    min_dynamic_energy =
204                        (candidate_iter)->power.readOp.dynamic;
205                    min_dynamic_energy_iter = candidate_iter;
206                    local_result = *(min_dynamic_energy_iter);
207                } else {
208                    candidate_iter->cleanup() ;
209                }
210
211            }
212
213
214        }
215        candidate_solutions.clear();
216    }
217
218    double long_channel_device_reduction =
219        longer_channel_device_reduction(device_ty, core_ty);
220
221    double macro_layout_overhead = g_tp.macro_layout_overhead;
222    double chip_PR_overhead = g_tp.chip_layout_overhead;
223    double total_overhead = macro_layout_overhead * chip_PR_overhead;
224    local_result.area *= total_overhead;
225
226    //maintain constant power density
227    double pppm_t[4] = {total_overhead, 1, 1, total_overhead};
228
229    double sckRation = g_tp.sckt_co_eff;
230    local_result.power.readOp.dynamic *= sckRation;
231    local_result.power.writeOp.dynamic *= sckRation;
232    local_result.power.searchOp.dynamic *= sckRation;
233    local_result.power.readOp.leakage *= l_ip.nbanks;
234    local_result.power.readOp.longer_channel_leakage =
235        local_result.power.readOp.leakage * long_channel_device_reduction;
236    local_result.power = local_result.power * pppm_t;
237
238    local_result.data_array2->power.readOp.dynamic *= sckRation;
239    local_result.data_array2->power.writeOp.dynamic *= sckRation;
240    local_result.data_array2->power.searchOp.dynamic *= sckRation;
241    local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
242    local_result.data_array2->power.readOp.longer_channel_leakage =
243        local_result.data_array2->power.readOp.leakage *
244        long_channel_device_reduction;
245    local_result.data_array2->power = local_result.data_array2->power * pppm_t;
246
247
248    if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) {
249        local_result.tag_array2->power.readOp.dynamic *= sckRation;
250        local_result.tag_array2->power.writeOp.dynamic *= sckRation;
251        local_result.tag_array2->power.searchOp.dynamic *= sckRation;
252        local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
253        local_result.tag_array2->power.readOp.longer_channel_leakage =
254            local_result.tag_array2->power.readOp.leakage *
255            long_channel_device_reduction;
256        local_result.tag_array2->power =
257            local_result.tag_array2->power * pppm_t;
258    }
259
260    power = local_result.power;
261
262    output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
263    output_data.subthreshold_leakage_power = power.readOp.leakage;
264    output_data.gate_leakage_power = power.readOp.gate_leakage;
265}
266
267void ArrayST::leakage_feedback(double temperature)
268{
269  // Update the temperature. l_ip is already set and error-checked in the creator function.
270  l_ip.temp = (unsigned int)round(temperature/10.0)*10;
271
272  // This corresponds to cacti_interface() in the initialization process. Leakage power is updated here.
273  reconfigure(&l_ip,&local_result);
274
275  // Scale the power values. This is part of ArrayST::optimize_array().
276  double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
277
278  double macro_layout_overhead   = g_tp.macro_layout_overhead;
279  double chip_PR_overhead        = g_tp.chip_layout_overhead;
280  double total_overhead          = macro_layout_overhead*chip_PR_overhead;
281
282  double pppm_t[4]    = {total_overhead,1,1,total_overhead};
283
284  double sckRation = g_tp.sckt_co_eff;
285  local_result.power.readOp.dynamic *= sckRation;
286  local_result.power.writeOp.dynamic *= sckRation;
287  local_result.power.searchOp.dynamic *= sckRation;
288  local_result.power.readOp.leakage *= l_ip.nbanks;
289  local_result.power.readOp.longer_channel_leakage = local_result.power.readOp.leakage*long_channel_device_reduction;
290  local_result.power = local_result.power* pppm_t;
291
292  local_result.data_array2->power.readOp.dynamic *= sckRation;
293  local_result.data_array2->power.writeOp.dynamic *= sckRation;
294  local_result.data_array2->power.searchOp.dynamic *= sckRation;
295  local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
296  local_result.data_array2->power.readOp.longer_channel_leakage = local_result.data_array2->power.readOp.leakage*long_channel_device_reduction;
297  local_result.data_array2->power = local_result.data_array2->power* pppm_t;
298
299  if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache)
300  {
301    local_result.tag_array2->power.readOp.dynamic *= sckRation;
302    local_result.tag_array2->power.writeOp.dynamic *= sckRation;
303    local_result.tag_array2->power.searchOp.dynamic *= sckRation;
304    local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
305    local_result.tag_array2->power.readOp.longer_channel_leakage = local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction;
306    local_result.tag_array2->power = local_result.tag_array2->power* pppm_t;
307  }
308}
309
310ArrayST::~ArrayST() {
311    local_result.cleanup();
312}
313