Ucache.cc revision 10152
1/*****************************************************************************
2 *                                McPAT/CACTI
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *                          All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution;
14 * neither the name of the copyright holders nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 *
30 ***************************************************************************/
31
32
33#include <pthread.h>
34
35#include <algorithm>
36#include <cmath>
37#include <ctime>
38#include <iostream>
39#include <list>
40
41#include "Ucache.h"
42#include "area.h"
43#include "bank.h"
44#include "basic_circuit.h"
45#include "component.h"
46#include "const.h"
47#include "decoder.h"
48#include "parameter.h"
49#include "subarray.h"
50#include "uca.h"
51
52using namespace std;
53
54const uint32_t nthreads = NTHREADS;
55
56
57void min_values_t::update_min_values(const min_values_t * val)
58{
59  min_delay   = (min_delay > val->min_delay) ? val->min_delay : min_delay;
60  min_dyn     = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn;
61  min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage;
62  min_area    = (min_area > val->min_area) ? val->min_area : min_area;
63  min_cyc     = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc;
64}
65
66
67
68void min_values_t::update_min_values(const uca_org_t & res)
69{
70  min_delay   = (min_delay > res.access_time) ? res.access_time : min_delay;
71  min_dyn     = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn;
72  min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage;
73  min_area    = (min_area > res.area) ? res.area : min_area;
74  min_cyc     = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc;
75}
76
77void min_values_t::update_min_values(const nuca_org_t * res)
78{
79  min_delay   = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay;
80  min_dyn     = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn;
81  min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage;
82  min_area    = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area;
83  min_cyc     = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc;
84}
85
86void min_values_t::update_min_values(const mem_array * res)
87{
88  min_delay   = (min_delay > res->access_time) ? res->access_time : min_delay;
89  min_dyn     = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn;
90  min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage;
91  min_area    = (min_area > res->area) ? res->area : min_area;
92  min_cyc     = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc;
93}
94
95
96
97void * calc_time_mt_wrapper(void * void_obj)
98{
99  calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj;
100  uint32_t tid                   = calc_obj->tid;
101  list<mem_array *> & data_arr   = calc_obj->data_arr;
102  list<mem_array *> & tag_arr    = calc_obj->tag_arr;
103  bool is_tag                    = calc_obj->is_tag;
104  bool pure_ram                  = calc_obj->pure_ram;
105  bool pure_cam					 = calc_obj->pure_cam;
106  bool is_main_mem               = calc_obj->is_main_mem;
107  double Nspd_min                = calc_obj->Nspd_min;
108  min_values_t * data_res        = calc_obj->data_res;
109  min_values_t * tag_res         = calc_obj->tag_res;
110
111  data_arr.clear();
112  data_arr.push_back(new mem_array);
113  tag_arr.clear();
114  tag_arr.push_back(new mem_array);
115
116  uint32_t Ndwl_niter = _log2(MAXDATAN) + 1;
117  uint32_t Ndbl_niter = _log2(MAXDATAN) + 1;
118  uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1;
119  uint32_t niter      = Ndwl_niter * Ndbl_niter * Ndcm_niter;
120
121
122  bool is_valid_partition;
123  int wt_min, wt_max;
124
125  if (g_ip->force_wiretype) {
126    if (g_ip->wt == 0) {
127      wt_min = Low_swing;
128      wt_max = Low_swing;
129    }
130    else {
131      wt_min = Global;
132      wt_max = Low_swing-1;
133    }
134  }
135  else {
136    wt_min = Global;
137    wt_max = Low_swing;
138  }
139
140  for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2)
141  {
142    for (int wr = wt_min; wr <= wt_max; wr++)
143    {
144      for (uint32_t iter = tid; iter < niter; iter += nthreads)
145      {
146        // reconstruct Ndwl, Ndbl, Ndcm
147        unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter));
148        unsigned int Ndbl = 1 << ((iter / (Ndcm_niter))%Ndbl_niter);
149        unsigned int Ndcm = 1 << (iter % Ndcm_niter);
150        for(unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; Ndsam_lev_1 *= 2)
151        {
152          for(unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2)
153          {
154            //for debuging
155            if (g_ip->force_cache_config && is_tag == false)
156            {
157              wr   = g_ip->wt;
158              Ndwl = g_ip->ndwl;
159              Ndbl = g_ip->ndbl;
160              Ndcm = g_ip->ndcm;
161              if(g_ip->nspd != 0) {
162                  Nspd = g_ip->nspd;
163              }
164              if(g_ip->ndsam1 != 0) {
165                  Ndsam_lev_1 = g_ip->ndsam1;
166                  Ndsam_lev_2 = g_ip->ndsam2;
167              }
168            }
169
170            if (is_tag == true)
171            {
172              is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl,
173                  Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
174                  tag_arr.back(), 0, NULL, NULL,
175                  is_main_mem);
176            }
177            // If it's a fully-associative cache, the data array partition parameters are identical to that of
178            // the tag array, so compute data array partition properties also here.
179            if (is_tag == false || g_ip->fully_assoc)
180            {
181              is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl,
182                  Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
183                  data_arr.back(), 0, NULL, NULL,
184                  is_main_mem);
185            }
186
187            if (is_valid_partition)
188            {
189              if (is_tag == true)
190              {
191                tag_arr.back()->wt = (enum Wire_type) wr;
192                tag_res->update_min_values(tag_arr.back());
193                tag_arr.push_back(new mem_array);
194              }
195              if (is_tag == false || g_ip->fully_assoc)
196              {
197                data_arr.back()->wt = (enum Wire_type) wr;
198                data_res->update_min_values(data_arr.back());
199                data_arr.push_back(new mem_array);
200              }
201            }
202
203            if (g_ip->force_cache_config && is_tag == false)
204            {
205                wr   = wt_max;
206                iter = niter;
207                if(g_ip->nspd != 0) {
208                        Nspd = MAXDATASPD;
209                }
210                if (g_ip->ndsam1 != 0) {
211                        Ndsam_lev_1 = MAX_COL_MUX+1;
212                        Ndsam_lev_2 = MAX_COL_MUX+1;
213                }
214            }
215          }
216        }
217      }
218    }
219  }
220
221  delete data_arr.back();
222  delete tag_arr.back();
223  data_arr.pop_back();
224  tag_arr.pop_back();
225
226  pthread_exit(NULL);
227}
228
229
230
231bool calculate_time(
232    bool is_tag,
233    int pure_ram,
234    bool pure_cam,
235    double Nspd,
236    unsigned int Ndwl,
237    unsigned int Ndbl,
238    unsigned int Ndcm,
239    unsigned int Ndsam_lev_1,
240    unsigned int Ndsam_lev_2,
241    mem_array *ptr_array,
242    int flag_results_populate,
243    results_mem_array *ptr_results,
244    uca_org_t *ptr_fin_res,
245    bool is_main_mem)
246{
247  DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem);
248
249  if (dyn_p.is_valid == false)
250  {
251    return false;
252  }
253
254  UCA * uca = new UCA(dyn_p);
255
256
257  if (flag_results_populate)
258  { //For the final solution, populate the ptr_results data structure  -- TODO: copy only necessary variables
259  }
260  else
261  {
262          int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir;
263          int num_mats = uca->bank.dp.num_mats;
264          bool is_fa = uca->bank.dp.fully_assoc;
265          bool pure_cam = uca->bank.dp.pure_cam;
266        ptr_array->Ndwl = Ndwl;
267    ptr_array->Ndbl = Ndbl;
268    ptr_array->Nspd = Nspd;
269    ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing;
270    ptr_array->Ndsam_lev_1 = Ndsam_lev_1;
271    ptr_array->Ndsam_lev_2 = Ndsam_lev_2;
272    ptr_array->access_time = uca->access_time;
273    ptr_array->cycle_time = uca->cycle_time;
274    ptr_array->multisubbank_interleave_cycle_time = uca->multisubbank_interleave_cycle_time;
275    ptr_array->area_ram_cells = uca->area_all_dataramcells;
276    ptr_array->area   = uca->area.get_area();
277    ptr_array->height = uca->area.h;
278    ptr_array->width  = uca->area.w;
279    ptr_array->mat_height = uca->bank.mat.area.h;
280    ptr_array->mat_length = uca->bank.mat.area.w;
281    ptr_array->subarray_height = uca->bank.mat.subarray.area.h;
282    ptr_array->subarray_length = uca->bank.mat.subarray.area.w;
283    ptr_array->power  = uca->power;
284    ptr_array->delay_senseamp_mux_decoder =
285      MAX(uca->delay_array_to_sa_mux_lev_1_decoder,
286          uca->delay_array_to_sa_mux_lev_2_decoder);
287    ptr_array->delay_before_subarray_output_driver         = uca->delay_before_subarray_output_driver;
288    ptr_array->delay_from_subarray_output_driver_to_output = uca->delay_from_subarray_out_drv_to_out;
289
290    ptr_array->delay_route_to_bank          = uca->htree_in_add->delay;
291    ptr_array->delay_input_htree            = uca->bank.htree_in_add->delay;
292    ptr_array->delay_row_predecode_driver_and_block = uca->bank.mat.r_predec->delay;
293    ptr_array->delay_row_decoder            = uca->bank.mat.row_dec->delay;
294    ptr_array->delay_bitlines               = uca->bank.mat.delay_bitline;
295    ptr_array->delay_matchlines               = uca->bank.mat.delay_matchchline;
296    ptr_array->delay_sense_amp              = uca->bank.mat.delay_sa;
297    ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv_htree;
298    ptr_array->delay_dout_htree             = uca->bank.htree_out_data->delay;
299    ptr_array->delay_comparator             = uca->bank.mat.delay_comparator;
300
301    ptr_array->all_banks_height = uca->area.h;
302    ptr_array->all_banks_width  = uca->area.w;
303    ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / (uca->area.get_area());
304
305    ptr_array->power_routing_to_bank = uca->power_routing_to_bank;
306    ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power;
307    ptr_array->power_data_input_htree = uca->bank.htree_in_data->power;
308//    cout<<"power_data_input_htree"<<uca->bank.htree_in_data->power.readOp.leakage<<endl;
309    ptr_array->power_data_output_htree = uca->bank.htree_out_data->power;
310//    cout<<"power_data_output_htree"<<uca->bank.htree_out_data->power.readOp.leakage<<endl;
311    ptr_array->power_row_predecoder_drivers = uca->bank.mat.r_predec->driver_power;
312    ptr_array->power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
313    ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
314    ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
315
316    ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power;
317    ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
318    ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
319    ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
320
321    ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders;
322    ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir;
323    ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
324    ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
325
326    ptr_array->power_bit_mux_predecoder_drivers = uca->bank.mat.b_mux_predec->driver_power;
327    ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
328    ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
329    ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
330
331    ptr_array->power_bit_mux_predecoder_blocks  = uca->bank.mat.b_mux_predec->block_power;
332    ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
333    ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
334    ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
335
336    ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders;
337    ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir;
338    ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
339    ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
340
341    ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = uca->bank.mat.sa_mux_lev_1_predec->driver_power;
342    ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= num_act_mats_hor_dir;
343    ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= num_act_mats_hor_dir;
344    ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= num_act_mats_hor_dir;
345
346    ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = uca->bank.mat.sa_mux_lev_1_predec->block_power;
347    ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
348    ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
349    ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
350
351    ptr_array->power_senseamp_mux_lev_1_decoders = uca->bank.mat.power_sa_mux_lev_1_decoders;
352    ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir;
353    ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
354    ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
355
356    ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = uca->bank.mat.sa_mux_lev_2_predec->driver_power;
357    ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
358    ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
359    ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
360
361    ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = uca->bank.mat.sa_mux_lev_2_predec->block_power;
362    ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
363    ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
364    ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
365
366    ptr_array->power_senseamp_mux_lev_2_decoders = uca->bank.mat.power_sa_mux_lev_2_decoders;
367    ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= num_act_mats_hor_dir;
368    ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= num_act_mats_hor_dir;
369    ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= num_act_mats_hor_dir;
370
371    ptr_array->power_bitlines = uca->bank.mat.power_bitline;
372    ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir;
373    ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir;
374    ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir;
375
376    ptr_array->power_sense_amps = uca->bank.mat.power_sa;
377    ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir;
378    ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir;
379    ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir;
380
381    ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv;
382    ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir;
383    ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
384    ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
385
386    ptr_array->power_output_drivers_at_subarray = uca->bank.mat.power_subarray_out_drv;
387    ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir;
388    ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir;
389    ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir;
390
391    ptr_array->power_comparators = uca->bank.mat.power_comparator;
392    ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir;
393    ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir;
394    ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir;
395
396//    cout <<  "  num of mats: " << dyn_p.num_mats << endl;
397    if (is_fa || pure_cam)
398    {
399    ptr_array->power_htree_in_search = uca->bank.htree_in_search->power;
400//    cout<<"power_htree_in_search"<<uca->bank.htree_in_search->power.readOp.leakage<<endl;
401    ptr_array->power_htree_out_search = uca->bank.htree_out_search->power;
402//    cout<<"power_htree_out_search"<<uca->bank.htree_out_search->power.readOp.leakage<<endl;
403    ptr_array->power_searchline = uca->bank.mat.power_searchline;
404//    cout<<"power_searchlineh"<<uca->bank.mat.power_searchline.readOp.leakage<<endl;
405    ptr_array->power_searchline.searchOp.dynamic *= num_mats;
406    ptr_array->power_searchline_precharge = uca->bank.mat.power_searchline_precharge;
407    ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats;
408    ptr_array->power_matchlines = uca->bank.mat.power_matchline;
409    ptr_array->power_matchlines.searchOp.dynamic *= num_mats;
410    ptr_array->power_matchline_precharge = uca->bank.mat.power_matchline_precharge;
411    ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats;
412    ptr_array->power_matchline_to_wordline_drv = uca->bank.mat.power_ml_to_ram_wl_drv;
413//    cout<<"power_matchline.searchOp.leakage"<<uca->bank.mat.power_matchline.searchOp.leakage<<endl;
414    }
415
416    ptr_array->activate_energy = uca->activate_energy;
417    ptr_array->read_energy = uca->read_energy;
418    ptr_array->write_energy = uca->write_energy;
419    ptr_array->precharge_energy = uca->precharge_energy;
420    ptr_array->refresh_power = uca->refresh_power;
421    ptr_array->leak_power_subbank_closed_page = uca->leak_power_subbank_closed_page;
422    ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page;
423    ptr_array->leak_power_request_and_reply_networks = uca->leak_power_request_and_reply_networks;
424
425    ptr_array->precharge_delay = uca->precharge_delay;
426
427
428//      cout<<"power_matchline.searchOp.leakage"<<uca->bank.mat.<<endl;
429//
430//    if (!(is_fa || pure_cam))
431//    {
432//     cout <<  "  num of cols: " << dyn_p.num_c_subarray << endl;
433//    }
434//    else if (is_fa)
435//    {
436//  	  cout <<  "  num of cols: " << dyn_p.tag_num_c_subarray+ dyn_p.data_num_c_subarray<< endl;
437//    } else
438//  	  cout <<  "  num of cols: " << dyn_p.tag_num_c_subarray<< endl;
439//      cout <<  uca->bank.mat.subarray.get_total_cell_area()<<endl;
440  }
441
442
443  delete uca;
444  return true;
445}
446
447
448
449bool check_uca_org(uca_org_t & u, min_values_t *minval)
450{
451  if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
452    return false;
453  }
454  if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
455      g_ip->dynamic_power_dev) {
456    return false;
457  }
458  if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
459      g_ip->leakage_power_dev) {
460    return false;
461  }
462  if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
463      g_ip->cycle_time_dev) {
464    return false;
465  }
466  if (((u.area - minval->min_area)/minval->min_area)*100 >
467      g_ip->area_dev) {
468    return false;
469  }
470  return true;
471}
472
473bool check_mem_org(mem_array & u, const min_values_t *minval)
474{
475  if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
476    return false;
477  }
478  if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
479      g_ip->dynamic_power_dev) {
480    return false;
481  }
482  if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
483      g_ip->leakage_power_dev) {
484    return false;
485  }
486  if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
487      g_ip->cycle_time_dev) {
488    return false;
489  }
490  if (((u.area - minval->min_area)/minval->min_area)*100 >
491      g_ip->area_dev) {
492    return false;
493  }
494  return true;
495}
496
497
498
499
500void find_optimal_uca(uca_org_t *res, min_values_t * minval, list<uca_org_t> & ulist)
501{
502  double cost = 0;
503  double min_cost = BIGNUM;
504  float d, a, dp, lp, c;
505
506  dp = g_ip->dynamic_power_wt;
507  lp = g_ip->leakage_power_wt;
508  a  = g_ip->area_wt;
509  d  = g_ip->delay_wt;
510  c  = g_ip->cycle_time_wt;
511
512  if (ulist.empty() == true)
513  {
514    cout << "ERROR: no valid cache organizations found" << endl;
515    exit(0);
516  }
517
518  for (list<uca_org_t>::iterator niter = ulist.begin(); niter != ulist.end(); niter++)
519  {
520    if (g_ip->ed == 1)
521    {
522      cost = ((niter)->access_time/minval->min_delay) * ((niter)->power.readOp.dynamic/minval->min_dyn);
523      if (min_cost > cost)
524      {
525        min_cost = cost;
526        *res = (*(niter));
527      }
528    }
529    else if (g_ip->ed == 2)
530    {
531      cost = ((niter)->access_time/minval->min_delay)*
532             ((niter)->access_time/minval->min_delay)*
533             ((niter)->power.readOp.dynamic/minval->min_dyn);
534      if (min_cost > cost)
535      {
536        min_cost = cost;
537        *res = (*(niter));
538      }
539    }
540    else
541    {
542      /*
543       * check whether the current organization
544       * meets the input deviation constraints
545       */
546      bool v = check_uca_org(*niter, minval);
547      //if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
548
549      if (v)
550      {
551        cost = (d  * ((niter)->access_time/minval->min_delay) +
552                c  * ((niter)->cycle_time/minval->min_cyc) +
553                dp * ((niter)->power.readOp.dynamic/minval->min_dyn) +
554                lp * ((niter)->power.readOp.leakage/minval->min_leakage) +
555                a  * ((niter)->area/minval->min_area));
556        //fprintf(stderr, "cost = %g\n", cost);
557
558        if (min_cost > cost) {
559          min_cost = cost;
560          *res = (*(niter));
561          niter = ulist.erase(niter);
562          if (niter!=ulist.begin())
563                  niter--;
564        }
565      }
566      else {
567        niter = ulist.erase(niter);
568        if (niter!=ulist.begin())
569                niter--;
570      }
571    }
572  }
573
574  if (min_cost == BIGNUM)
575  {
576    cout << "ERROR: no cache organizations met optimization criteria" << endl;
577    exit(0);
578  }
579}
580
581
582
583void filter_tag_arr(const min_values_t * min, list<mem_array *> & list)
584{
585  double cost = BIGNUM;
586  double cur_cost;
587  double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, wt_area = g_ip->area_wt;
588  mem_array * res = NULL;
589
590  if (list.empty() == true)
591  {
592    cout << "ERROR: no valid tag organizations found" << endl;
593    exit(1);
594  }
595
596
597  while (list.empty() != true)
598  {
599    bool v = check_mem_org(*list.back(), min);
600    if (v)
601    {
602      cur_cost = wt_delay   * (list.back()->access_time/min->min_delay) +
603        wt_dyn     * (list.back()->power.readOp.dynamic/min->min_dyn) +
604        wt_leakage * (list.back()->power.readOp.leakage/min->min_leakage) +
605        wt_area    * (list.back()->area/min->min_area) +
606        wt_cyc     * (list.back()->cycle_time/min->min_cyc);
607    }
608    else
609    {
610      cur_cost = BIGNUM;
611    }
612    if (cur_cost < cost)
613    {
614      if (res != NULL)
615      {
616        delete res;
617      }
618      cost = cur_cost;
619      res  = list.back();
620    }
621    else
622    {
623      delete list.back();
624    }
625    list.pop_back();
626  }
627  if(!res)
628  {
629    cout << "ERROR: no valid tag organizations found" << endl;
630    exit(0);
631  }
632
633  list.push_back(res);
634}
635
636
637
638void filter_data_arr(list<mem_array *> & curr_list)
639{
640  if (curr_list.empty() == true)
641  {
642    cout << "ERROR: no valid data array organizations found" << endl;
643    exit(1);
644  }
645
646  list<mem_array *>::iterator iter;
647
648  for (iter = curr_list.begin(); iter != curr_list.end(); ++iter)
649  {
650    mem_array * m = *iter;
651
652    if (m == NULL) exit(1);
653
654    if(((m->access_time - m->arr_min->min_delay)/m->arr_min->min_delay > 0.5) &&
655       ((m->power.readOp.dynamic - m->arr_min->min_dyn)/m->arr_min->min_dyn > 0.5))
656    {
657      delete m;
658      iter = curr_list.erase(iter);
659      iter --;
660    }
661  }
662}
663
664
665
666/*
667 * Performs exhaustive search across different sub-array sizes,
668 * wire types and aspect ratios to find an optimal UCA organization
669 * 1. First different valid tag array organizations are calculated
670 *    and stored in tag_arr array
671 * 2. The exhaustive search is repeated to find valid data array
672 *    organizations and stored in data_arr array
673 * 3. Cache area, delay, power, and cycle time for different
674 *    cache organizations are calculated based on the
675 *    above results
676 * 4. Cache model with least cost is picked from sol_list
677 */
678void solve(uca_org_t *fin_res)
679{
680  bool   is_dram  = false;
681  int    pure_ram = g_ip->pure_ram;
682  bool   pure_cam = g_ip->pure_cam;
683
684  init_tech_params(g_ip->F_sz_um, false);
685
686
687  list<mem_array *> tag_arr (0);
688  list<mem_array *> data_arr(0);
689  list<mem_array *>::iterator miter;
690  list<uca_org_t> sol_list(1, uca_org_t());
691
692  fin_res->tag_array.access_time = 0;
693  fin_res->tag_array.Ndwl = 0;
694  fin_res->tag_array.Ndbl = 0;
695  fin_res->tag_array.Nspd = 0;
696  fin_res->tag_array.deg_bl_muxing = 0;
697  fin_res->tag_array.Ndsam_lev_1 = 0;
698  fin_res->tag_array.Ndsam_lev_2 = 0;
699
700
701  // distribute calculate_time() execution to multiple threads
702  calc_time_mt_wrapper_struct * calc_array = new calc_time_mt_wrapper_struct[nthreads];
703  pthread_t threads[nthreads];
704
705  for (uint32_t t = 0; t < nthreads; t++)
706  {
707    calc_array[t].tid         = t;
708    calc_array[t].pure_ram    = pure_ram;
709    calc_array[t].pure_cam    = pure_cam;
710    calc_array[t].data_res    = new min_values_t();
711    calc_array[t].tag_res     = new min_values_t();
712  }
713
714  bool     is_tag;
715  uint32_t ram_cell_tech_type;
716
717  // If it's a cache, first calculate the area, delay and power for all tag array partitions.
718  if (!(pure_ram||pure_cam||g_ip->fully_assoc))
719  { //cache
720    is_tag              = true;
721    ram_cell_tech_type  = g_ip->tag_arr_ram_cell_tech_type;
722    is_dram             = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
723    init_tech_params(g_ip->F_sz_um, is_tag);
724
725    for (uint32_t t = 0; t < nthreads; t++)
726    {
727      calc_array[t].is_tag      = is_tag;
728      calc_array[t].is_main_mem = false;
729      calc_array[t].Nspd_min    = 0.125;
730      pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
731    }
732
733    for (uint32_t t = 0; t < nthreads; t++)
734    {
735      pthread_join(threads[t], NULL);
736    }
737
738    for (uint32_t t = 0; t < nthreads; t++)
739    {
740      calc_array[t].data_arr.sort(mem_array::lt);
741      data_arr.merge(calc_array[t].data_arr, mem_array::lt);
742      calc_array[t].tag_arr.sort(mem_array::lt);
743      tag_arr.merge(calc_array[t].tag_arr, mem_array::lt);
744    }
745  }
746
747
748  // calculate the area, delay and power for all data array partitions (for cache or plain RAM).
749//  if (!g_ip->fully_assoc)
750// {//in the new cacti, cam, fully_associative cache are processed as single array in the data portion
751    is_tag              = false;
752    ram_cell_tech_type  = g_ip->data_arr_ram_cell_tech_type;
753    is_dram             = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
754    init_tech_params(g_ip->F_sz_um, is_tag);
755
756    for (uint32_t t = 0; t < nthreads; t++)
757    {
758      calc_array[t].is_tag      = is_tag;
759      calc_array[t].is_main_mem = g_ip->is_main_mem;
760      if (!(pure_cam||g_ip->fully_assoc))
761      {
762          calc_array[t].Nspd_min    = (double)(g_ip->out_w)/(double)(g_ip->block_sz*8);
763      }
764      else
765      {
766          calc_array[t].Nspd_min    = 1;
767      }
768
769      pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
770    }
771
772    for (uint32_t t = 0; t < nthreads; t++)
773    {
774      pthread_join(threads[t], NULL);
775    }
776
777    data_arr.clear();
778    for (uint32_t t = 0; t < nthreads; t++)
779    {
780      calc_array[t].data_arr.sort(mem_array::lt);
781      data_arr.merge(calc_array[t].data_arr, mem_array::lt);
782    }
783//  }
784
785
786  min_values_t * d_min = new min_values_t();
787  min_values_t * t_min = new min_values_t();
788  min_values_t * cache_min = new min_values_t();
789
790  for (uint32_t t = 0; t < nthreads; t++)
791  {
792    d_min->update_min_values(calc_array[t].data_res);
793    t_min->update_min_values(calc_array[t].tag_res);
794  }
795
796  for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
797  {
798    (*miter)->arr_min = d_min;
799  }
800
801
802  //cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n";
803  filter_data_arr(data_arr);
804  if(!(pure_ram||pure_cam||g_ip->fully_assoc))
805  {
806    filter_tag_arr(t_min, tag_arr);
807  }
808  //cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n";
809
810
811  if (pure_ram||pure_cam||g_ip->fully_assoc)
812  {
813    for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
814    {
815      uca_org_t & curr_org  = sol_list.back();
816      curr_org.tag_array2  = NULL;
817      curr_org.data_array2 = (*miter);
818
819      curr_org.find_delay();
820      curr_org.find_energy();
821      curr_org.find_area();
822      curr_org.find_cyc();
823
824      //update min values for the entire cache
825      cache_min->update_min_values(curr_org);
826
827      sol_list.push_back(uca_org_t());
828    }
829  }
830  else
831  {
832    while (tag_arr.empty() != true)
833    {
834      mem_array * arr_temp = (tag_arr.back());
835      //delete tag_arr.back();
836      tag_arr.pop_back();
837
838      for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
839      {
840        uca_org_t & curr_org  = sol_list.back();
841        curr_org.tag_array2  = arr_temp;
842        curr_org.data_array2 = (*miter);
843
844        curr_org.find_delay();
845        curr_org.find_energy();
846        curr_org.find_area();
847        curr_org.find_cyc();
848
849        //update min values for the entire cache
850        cache_min->update_min_values(curr_org);
851
852        sol_list.push_back(uca_org_t());
853      }
854    }
855  }
856
857  sol_list.pop_back();
858
859  find_optimal_uca(fin_res, cache_min, sol_list);
860
861  sol_list.clear();
862
863  for (miter = data_arr.begin(); miter != data_arr.end(); ++miter)
864  {
865    if (*miter != fin_res->data_array2)
866    {
867      delete *miter;
868    }
869  }
870  data_arr.clear();
871
872  for (uint32_t t = 0; t < nthreads; t++)
873  {
874    delete calc_array[t].data_res;
875    delete calc_array[t].tag_res;
876  }
877
878  delete [] calc_array;
879  delete cache_min;
880  delete d_min;
881  delete t_min;
882}
883
884void update(uca_org_t *fin_res)
885{
886  if(fin_res->tag_array2)
887  {
888    init_tech_params(g_ip->F_sz_um,true);
889    DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem);
890    if(tag_arr_dyn_p.is_valid)
891    {
892      UCA * tag_arr = new UCA(tag_arr_dyn_p);
893      fin_res->tag_array2->power = tag_arr->power;
894    }
895    else
896    {
897      cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
898      exit(1);
899    }
900  }
901  init_tech_params(g_ip->F_sz_um,false);
902  DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem);
903  if(data_arr_dyn_p.is_valid)
904  {
905    UCA * data_arr = new UCA(data_arr_dyn_p);
906    fin_res->data_array2->power = data_arr->power;
907  }
908  else
909  {
910    cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
911    exit(1);
912  }
913
914  fin_res->find_energy();
915}
916
917