uca.cc revision 10152:52c552138ba1
1/*****************************************************************************
2 *                                McPAT/CACTI
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *                          All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution;
14 * neither the name of the copyright holders nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 *
30 ***************************************************************************/
31
32
33
34#include <cmath>
35#include <iostream>
36
37#include "uca.h"
38
39UCA::UCA(const DynamicParameter & dyn_p)
40 :dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0)
41{
42  int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2));
43  int num_banks_hor_dir = nbanks/num_banks_ver_dir;
44
45  if (dp.use_inp_params)
46  {
47          RWP  = dp.num_rw_ports;
48          ERP  = dp.num_rd_ports;
49          EWP  = dp.num_wr_ports;
50          SCHP = dp.num_search_ports;
51  }
52  else
53  {
54          RWP  = g_ip->num_rw_ports;
55          ERP  = g_ip->num_rd_ports;
56          EWP  = g_ip->num_wr_ports;
57          SCHP = g_ip->num_search_ports;
58  }
59
60  num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
61  num_di_b_bank   = dp.num_di_b_bank_per_port * (RWP + EWP);
62  num_do_b_bank   = dp.num_do_b_bank_per_port * (RWP + ERP);
63  num_si_b_bank   = dp.num_si_b_bank_per_port * SCHP;
64  num_so_b_bank   = dp.num_so_b_bank_per_port * SCHP;
65
66  if (!dp.fully_assoc && !dp.pure_cam)
67  {
68
69          if (g_ip->fast_access && dp.is_tag == false)
70          {
71                  num_do_b_bank *= g_ip->data_assoc;
72          }
73
74          htree_in_add   = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
75                          num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
76          htree_in_data  = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
77                          num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
78          htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
79                          num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
80  }
81
82  else
83  {
84
85          htree_in_add   = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
86                          num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
87          htree_in_data  = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
88                          num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
89          htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
90                          num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
91          htree_in_search  = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
92                          num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
93          htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
94                          num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
95  }
96
97  area.w = htree_in_data->area.w;
98  area.h = htree_in_data->area.h;
99
100  area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks;
101//  cout<<"area cell"<<area_all_dataramcells<<endl;
102//  cout<<area.get_area()<<endl;
103  // delay calculation
104  double inrisetime = 0.0;
105  compute_delays(inrisetime);
106  compute_power_energy();
107}
108
109
110
111UCA::~UCA()
112{
113  delete htree_in_add;
114  delete htree_in_data;
115  delete htree_out_data;
116}
117
118
119
120double UCA::compute_delays(double inrisetime)
121{
122  double outrisetime = bank.compute_delays(inrisetime);
123
124  double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay;
125  double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay;
126  delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat +
127    bank.mat.sa_mux_lev_1_predec->delay +
128    bank.mat.sa_mux_lev_1_dec->delay;
129  delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat +
130    bank.mat.sa_mux_lev_2_predec->delay +
131    bank.mat.sa_mux_lev_2_dec->delay;
132  double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa;
133
134  delay_before_subarray_output_driver =
135    MAX(MAX(max_delay_before_row_decoder + delay_inside_mat,  // row_path
136            delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa),  // col_path
137        MAX(delay_array_to_sa_mux_lev_1_decoder,    // sa_mux_lev_1_path
138            delay_array_to_sa_mux_lev_2_decoder));  // sa_mux_lev_2_path
139  delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree +
140                                       bank.htree_out_data->delay + htree_out_data->delay;
141  access_time                        = bank.mat.delay_comparator;
142
143  double ram_delay_inside_mat;
144  if (dp.fully_assoc)
145  {
146    //delay of FA contains both CAM tag and RAM data
147    { //delay of CAM
148      ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
149      access_time = htree_in_add->delay + bank.htree_in_add->delay;
150      //delay of fully-associative data array
151      access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out;
152    }
153  }
154  else
155  {
156    access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path
157  }
158
159  if (dp.is_main_mem)
160  {
161    double t_rcd       = max_delay_before_row_decoder + delay_inside_mat;
162    double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) +
163                         delay_from_subarray_out_drv_to_out;
164    access_time = t_rcd + cas_latency;
165  }
166
167  double temp;
168
169  if (!dp.fully_assoc)
170  {
171    temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit
172   if (dp.is_dram)
173    {
174      temp += bank.mat.delay_writeback;  // temp stores random cycle time
175    }
176
177
178  temp = MAX(temp, bank.mat.r_predec->delay);
179  temp = MAX(temp, bank.mat.b_mux_predec->delay);
180  temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
181  temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
182  }
183  else
184   {
185          ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
186          temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore
187                 + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset;
188
189          temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc.
190          temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
191          temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
192   }
193
194  // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav
195  if (g_ip->rpters_in_htree == false)
196  {
197    temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay);
198  }
199  cycle_time = temp;
200
201  double delay_req_network = max_delay_before_row_decoder;
202  double delay_rep_network = delay_from_subarray_out_drv_to_out;
203  multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network);
204
205  if (dp.is_main_mem)
206  {
207    multisubbank_interleave_cycle_time = htree_in_add->delay;
208    precharge_delay = htree_in_add->delay +
209                      bank.htree_in_add->delay + bank.mat.delay_writeback +
210                      bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;
211    cycle_time = access_time + precharge_delay;
212  }
213  else
214  {
215    precharge_delay = 0;
216  }
217
218  double dram_array_availability = 0;
219  if (dp.is_dram)
220  {
221    dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100;
222  }
223
224  return outrisetime;
225}
226
227
228
229// note: currently, power numbers are for a bank of an array
230void UCA::compute_power_energy()
231{
232  bank.compute_power_energy();
233  power = bank.power;
234
235  power_routing_to_bank.readOp.dynamic  = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic;
236  power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic;
237  if (dp.fully_assoc || dp.pure_cam)
238      power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic;
239
240  power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage +
241                                          htree_in_data->power.readOp.leakage +
242                                          htree_out_data->power.readOp.leakage;
243
244  power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage +
245                                          htree_in_data->power.readOp.gate_leakage +
246                                          htree_out_data->power.readOp.gate_leakage;
247  if (dp.fully_assoc || dp.pure_cam)
248  {
249        power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
250        power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
251  }
252
253  power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic;
254  power.readOp.dynamic += power_routing_to_bank.readOp.dynamic;
255  power.readOp.leakage += power_routing_to_bank.readOp.leakage;
256  power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage;
257
258  // calculate total write energy per access
259  power.writeOp.dynamic = power.readOp.dynamic
260                        - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
261                        + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
262                        - power_routing_to_bank.readOp.dynamic
263                        + power_routing_to_bank.writeOp.dynamic
264                        + bank.htree_in_data->power.readOp.dynamic
265                        - bank.htree_out_data->power.readOp.dynamic;
266
267  if (dp.is_dram == false)
268  {
269    power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
270  }
271
272  dyn_read_energy_from_closed_page = power.readOp.dynamic;
273  dyn_read_energy_from_open_page   = power.readOp.dynamic -
274                                     (bank.mat.r_predec->power.readOp.dynamic +
275                                      bank.mat.power_row_decoders.readOp.dynamic +
276                                      bank.mat.power_bl_precharge_eq_drv.readOp.dynamic +
277                                      bank.mat.power_sa.readOp.dynamic +
278                                      bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir;
279
280  dyn_read_energy_remaining_words_in_burst =
281    (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) *
282    ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
283      bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
284      bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
285      bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
286      bank.mat.power_subarray_out_drv.readOp.dynamic)     * dp.num_act_mats_hor_dir +
287     bank.htree_out_data->power.readOp.dynamic +
288     power_routing_to_bank.readOp.dynamic);
289  dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst;
290  dyn_read_energy_from_open_page   += dyn_read_energy_remaining_words_in_burst;
291
292  activate_energy = htree_in_add->power.readOp.dynamic +
293                    bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act +
294                    (bank.mat.r_predec->power.readOp.dynamic +
295                     bank.mat.power_row_decoders.readOp.dynamic +
296                     bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir;
297  read_energy    = (htree_in_add->power.readOp.dynamic +
298                    bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
299                    (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic  +
300                     bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic  +
301                     bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
302                     bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
303                     bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
304                    bank.htree_out_data->power.readOp.dynamic +
305                    htree_in_data->power.readOp.dynamic) * g_ip->burst_len;
306  write_energy   = (htree_in_add->power.readOp.dynamic +
307                    bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
308                    htree_in_data->power.readOp.dynamic +
309                    bank.htree_in_data->power.readOp.dynamic +
310                    (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic  +
311                     bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic  +
312                     bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
313                     bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len;
314  precharge_energy = (bank.mat.power_bitline.readOp.dynamic +
315                      bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir;
316
317  leak_power_subbank_closed_page =
318    (bank.mat.r_predec->power.readOp.leakage +
319     bank.mat.b_mux_predec->power.readOp.leakage +
320     bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
321     bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
322     bank.mat.power_row_decoders.readOp.leakage +
323     bank.mat.power_bit_mux_decoders.readOp.leakage +
324     bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
325     bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
326     bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
327
328  leak_power_subbank_closed_page +=
329    (bank.mat.r_predec->power.readOp.gate_leakage +
330     bank.mat.b_mux_predec->power.readOp.gate_leakage +
331     bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
332     bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
333     bank.mat.power_row_decoders.readOp.gate_leakage +
334     bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
335     bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
336     bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+
337     //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
338
339  leak_power_subbank_open_page =
340    (bank.mat.r_predec->power.readOp.leakage +
341     bank.mat.b_mux_predec->power.readOp.leakage +
342     bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
343     bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
344     bank.mat.power_row_decoders.readOp.leakage +
345     bank.mat.power_bit_mux_decoders.readOp.leakage +
346     bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
347     bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
348     bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
349
350  leak_power_subbank_open_page +=
351    (bank.mat.r_predec->power.readOp.gate_leakage +
352     bank.mat.b_mux_predec->power.readOp.gate_leakage +
353     bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
354     bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
355     bank.mat.power_row_decoders.readOp.gate_leakage +
356     bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
357     bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
358     bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir;
359     //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
360
361  leak_power_request_and_reply_networks =
362    power_routing_to_bank.readOp.leakage +
363    bank.htree_in_add->power.readOp.leakage +
364    bank.htree_in_data->power.readOp.leakage +
365    bank.htree_out_data->power.readOp.leakage;
366
367  leak_power_request_and_reply_networks +=
368    power_routing_to_bank.readOp.gate_leakage +
369    bank.htree_in_add->power.readOp.gate_leakage +
370    bank.htree_in_data->power.readOp.gate_leakage +
371    bank.htree_out_data->power.readOp.gate_leakage;
372
373  if (dp.fully_assoc || dp.pure_cam)
374  {
375        leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
376        leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
377  }
378
379
380  if (dp.is_dram)
381  { // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power
382    refresh_power  = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir +
383                      bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays;
384    refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays;
385    refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir;
386    refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
387    refresh_power /= dp.dram_refresh_period;
388  }
389
390
391  if (dp.is_tag == false)
392  {
393    power.readOp.dynamic  = dyn_read_energy_from_closed_page;
394    power.writeOp.dynamic = dyn_read_energy_from_closed_page
395      - dyn_read_energy_remaining_words_in_burst
396      - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
397      + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
398      + (power_routing_to_bank.writeOp.dynamic -
399         power_routing_to_bank.readOp.dynamic -
400         bank.htree_out_data->power.readOp.dynamic +
401         bank.htree_in_data->power.readOp.dynamic) *
402        (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME
403
404    if (dp.is_dram == false)
405    {
406      power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
407    }
408  }
409
410  // if DRAM, add refresh power to total leakage
411  if (dp.is_dram)
412  {
413    power.readOp.leakage += refresh_power;
414  }
415
416  // TODO: below should be  avoided.
417  /*if (dp.is_main_mem)
418  {
419    power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks;
420  }*/
421
422  assert(power.readOp.dynamic  > 0);
423  assert(power.writeOp.dynamic > 0);
424  assert(power.readOp.leakage  > 0);
425}
426
427