Deleted Added
sdiff udiff text old ( 10152:52c552138ba1 ) new ( 10234:5cb711fa6176 )
full compact
1/*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
5 * All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the

--- 7 unchanged lines hidden (view full) ---

20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 ***************************************************************************/
31
32
33
34#include <cmath>
35#include <iostream>
36
37#include "uca.h"
38
39UCA::UCA(const DynamicParameter & dyn_p)
40 :dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0)
41{
42 int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2));
43 int num_banks_hor_dir = nbanks/num_banks_ver_dir;
44
45 if (dp.use_inp_params)
46 {
47 RWP = dp.num_rw_ports;
48 ERP = dp.num_rd_ports;
49 EWP = dp.num_wr_ports;
50 SCHP = dp.num_search_ports;
51 }
52 else
53 {
54 RWP = g_ip->num_rw_ports;
55 ERP = g_ip->num_rd_ports;
56 EWP = g_ip->num_wr_ports;
57 SCHP = g_ip->num_search_ports;
58 }
59
60 num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
61 num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP);
62 num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP);
63 num_si_b_bank = dp.num_si_b_bank_per_port * SCHP;
64 num_so_b_bank = dp.num_so_b_bank_per_port * SCHP;
65
66 if (!dp.fully_assoc && !dp.pure_cam)
67 {
68
69 if (g_ip->fast_access && dp.is_tag == false)
70 {
71 num_do_b_bank *= g_ip->data_assoc;
72 }
73
74 htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
75 num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
76 htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
77 num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
78 htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
79 num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
80 }
81
82 else
83 {
84
85 htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
86 num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
87 htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
88 num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
89 htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
90 num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
91 htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
92 num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
93 htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
94 num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
95 }
96
97 area.w = htree_in_data->area.w;
98 area.h = htree_in_data->area.h;
99
100 area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks;
101// cout<<"area cell"<<area_all_dataramcells<<endl;
102// cout<<area.get_area()<<endl;
103 // delay calculation
104 double inrisetime = 0.0;
105 compute_delays(inrisetime);
106 compute_power_energy();
107}
108
109
110
111UCA::~UCA()
112{
113 delete htree_in_add;
114 delete htree_in_data;
115 delete htree_out_data;
116}
117
118
119
120double UCA::compute_delays(double inrisetime)
121{
122 double outrisetime = bank.compute_delays(inrisetime);
123
124 double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay;
125 double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay;
126 delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat +
127 bank.mat.sa_mux_lev_1_predec->delay +
128 bank.mat.sa_mux_lev_1_dec->delay;
129 delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat +
130 bank.mat.sa_mux_lev_2_predec->delay +
131 bank.mat.sa_mux_lev_2_dec->delay;
132 double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa;
133
134 delay_before_subarray_output_driver =
135 MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path
136 delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path
137 MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path
138 delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path
139 delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree +
140 bank.htree_out_data->delay + htree_out_data->delay;
141 access_time = bank.mat.delay_comparator;
142
143 double ram_delay_inside_mat;
144 if (dp.fully_assoc)
145 {
146 //delay of FA contains both CAM tag and RAM data
147 { //delay of CAM
148 ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
149 access_time = htree_in_add->delay + bank.htree_in_add->delay;
150 //delay of fully-associative data array
151 access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out;
152 }
153 }
154 else
155 {
156 access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path
157 }
158
159 if (dp.is_main_mem)
160 {
161 double t_rcd = max_delay_before_row_decoder + delay_inside_mat;
162 double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) +
163 delay_from_subarray_out_drv_to_out;
164 access_time = t_rcd + cas_latency;
165 }
166
167 double temp;
168
169 if (!dp.fully_assoc)
170 {
171 temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit
172 if (dp.is_dram)
173 {
174 temp += bank.mat.delay_writeback; // temp stores random cycle time
175 }
176
177
178 temp = MAX(temp, bank.mat.r_predec->delay);
179 temp = MAX(temp, bank.mat.b_mux_predec->delay);
180 temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
181 temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
182 }
183 else
184 {
185 ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
186 temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore
187 + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset;
188
189 temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc.
190 temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
191 temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
192 }
193
194 // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav
195 if (g_ip->rpters_in_htree == false)
196 {
197 temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay);
198 }
199 cycle_time = temp;
200
201 double delay_req_network = max_delay_before_row_decoder;
202 double delay_rep_network = delay_from_subarray_out_drv_to_out;
203 multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network);
204
205 if (dp.is_main_mem)
206 {
207 multisubbank_interleave_cycle_time = htree_in_add->delay;
208 precharge_delay = htree_in_add->delay +
209 bank.htree_in_add->delay + bank.mat.delay_writeback +
210 bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;
211 cycle_time = access_time + precharge_delay;
212 }
213 else
214 {
215 precharge_delay = 0;
216 }
217
218 double dram_array_availability = 0;
219 if (dp.is_dram)
220 {
221 dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100;
222 }
223
224 return outrisetime;
225}
226
227
228
229// note: currently, power numbers are for a bank of an array
230void UCA::compute_power_energy()
231{
232 bank.compute_power_energy();
233 power = bank.power;
234
235 power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic;
236 power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic;
237 if (dp.fully_assoc || dp.pure_cam)
238 power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic;
239
240 power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage +
241 htree_in_data->power.readOp.leakage +
242 htree_out_data->power.readOp.leakage;
243
244 power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage +
245 htree_in_data->power.readOp.gate_leakage +
246 htree_out_data->power.readOp.gate_leakage;
247 if (dp.fully_assoc || dp.pure_cam)
248 {
249 power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
250 power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
251 }
252
253 power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic;
254 power.readOp.dynamic += power_routing_to_bank.readOp.dynamic;
255 power.readOp.leakage += power_routing_to_bank.readOp.leakage;
256 power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage;
257
258 // calculate total write energy per access
259 power.writeOp.dynamic = power.readOp.dynamic
260 - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
261 + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
262 - power_routing_to_bank.readOp.dynamic
263 + power_routing_to_bank.writeOp.dynamic
264 + bank.htree_in_data->power.readOp.dynamic
265 - bank.htree_out_data->power.readOp.dynamic;
266
267 if (dp.is_dram == false)
268 {
269 power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
270 }
271
272 dyn_read_energy_from_closed_page = power.readOp.dynamic;
273 dyn_read_energy_from_open_page = power.readOp.dynamic -
274 (bank.mat.r_predec->power.readOp.dynamic +
275 bank.mat.power_row_decoders.readOp.dynamic +
276 bank.mat.power_bl_precharge_eq_drv.readOp.dynamic +
277 bank.mat.power_sa.readOp.dynamic +
278 bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir;
279
280 dyn_read_energy_remaining_words_in_burst =
281 (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) *
282 ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
283 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
284 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
285 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
286 bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
287 bank.htree_out_data->power.readOp.dynamic +
288 power_routing_to_bank.readOp.dynamic);
289 dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst;
290 dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst;
291
292 activate_energy = htree_in_add->power.readOp.dynamic +
293 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act +
294 (bank.mat.r_predec->power.readOp.dynamic +
295 bank.mat.power_row_decoders.readOp.dynamic +
296 bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir;
297 read_energy = (htree_in_add->power.readOp.dynamic +
298 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
299 (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
300 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
301 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
302 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
303 bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
304 bank.htree_out_data->power.readOp.dynamic +
305 htree_in_data->power.readOp.dynamic) * g_ip->burst_len;
306 write_energy = (htree_in_add->power.readOp.dynamic +
307 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
308 htree_in_data->power.readOp.dynamic +
309 bank.htree_in_data->power.readOp.dynamic +
310 (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
311 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
312 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
313 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len;
314 precharge_energy = (bank.mat.power_bitline.readOp.dynamic +
315 bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir;
316
317 leak_power_subbank_closed_page =
318 (bank.mat.r_predec->power.readOp.leakage +
319 bank.mat.b_mux_predec->power.readOp.leakage +
320 bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
321 bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
322 bank.mat.power_row_decoders.readOp.leakage +
323 bank.mat.power_bit_mux_decoders.readOp.leakage +
324 bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
325 bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
326 bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
327
328 leak_power_subbank_closed_page +=
329 (bank.mat.r_predec->power.readOp.gate_leakage +
330 bank.mat.b_mux_predec->power.readOp.gate_leakage +
331 bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
332 bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
333 bank.mat.power_row_decoders.readOp.gate_leakage +
334 bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
335 bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
336 bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+
337 //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
338
339 leak_power_subbank_open_page =
340 (bank.mat.r_predec->power.readOp.leakage +
341 bank.mat.b_mux_predec->power.readOp.leakage +
342 bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
343 bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
344 bank.mat.power_row_decoders.readOp.leakage +
345 bank.mat.power_bit_mux_decoders.readOp.leakage +
346 bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
347 bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
348 bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
349
350 leak_power_subbank_open_page +=
351 (bank.mat.r_predec->power.readOp.gate_leakage +
352 bank.mat.b_mux_predec->power.readOp.gate_leakage +
353 bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
354 bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
355 bank.mat.power_row_decoders.readOp.gate_leakage +
356 bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
357 bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
358 bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir;
359 //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
360
361 leak_power_request_and_reply_networks =
362 power_routing_to_bank.readOp.leakage +
363 bank.htree_in_add->power.readOp.leakage +
364 bank.htree_in_data->power.readOp.leakage +
365 bank.htree_out_data->power.readOp.leakage;
366
367 leak_power_request_and_reply_networks +=
368 power_routing_to_bank.readOp.gate_leakage +
369 bank.htree_in_add->power.readOp.gate_leakage +
370 bank.htree_in_data->power.readOp.gate_leakage +
371 bank.htree_out_data->power.readOp.gate_leakage;
372
373 if (dp.fully_assoc || dp.pure_cam)
374 {
375 leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
376 leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
377 }
378
379
380 if (dp.is_dram)
381 { // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power
382 refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir +
383 bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays;
384 refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays;
385 refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir;
386 refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
387 refresh_power /= dp.dram_refresh_period;
388 }
389
390
391 if (dp.is_tag == false)
392 {
393 power.readOp.dynamic = dyn_read_energy_from_closed_page;
394 power.writeOp.dynamic = dyn_read_energy_from_closed_page
395 - dyn_read_energy_remaining_words_in_burst
396 - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
397 + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
398 + (power_routing_to_bank.writeOp.dynamic -
399 power_routing_to_bank.readOp.dynamic -
400 bank.htree_out_data->power.readOp.dynamic +
401 bank.htree_in_data->power.readOp.dynamic) *
402 (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME
403
404 if (dp.is_dram == false)
405 {
406 power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
407 }
408 }
409
410 // if DRAM, add refresh power to total leakage
411 if (dp.is_dram)
412 {
413 power.readOp.leakage += refresh_power;
414 }
415
416 // TODO: below should be avoided.
417 /*if (dp.is_main_mem)
418 {
419 power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks;
420 }*/
421
422 assert(power.readOp.dynamic > 0);
423 assert(power.writeOp.dynamic > 0);
424 assert(power.readOp.leakage > 0);
425}
426