uca.cc (10152:52c552138ba1) | uca.cc (10234:5cb711fa6176) |
---|---|
1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. | 1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. |
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. |
|
5 * All Rights Reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer; 11 * redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the --- 7 unchanged lines hidden (view full) --- 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 6 * All Rights Reserved 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are 10 * met: redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer; 12 * redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the --- 7 unchanged lines hidden (view full) --- 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” | 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 * 30 ***************************************************************************/ 31 32 33 34#include <cmath> 35#include <iostream> 36 37#include "uca.h" 38 39UCA::UCA(const DynamicParameter & dyn_p) | 30 * 31 ***************************************************************************/ 32 33 34 35#include <cmath> 36#include <iostream> 37 38#include "uca.h" 39 40UCA::UCA(const DynamicParameter & dyn_p) |
40 :dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) 41{ 42 int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2)); 43 int num_banks_hor_dir = nbanks/num_banks_ver_dir; | 41 : dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) { 42 int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks) 43 / 2 : (_log2(nbanks) - _log2(nbanks) / 2)); 44 int num_banks_hor_dir = nbanks / num_banks_ver_dir; |
44 | 45 |
45 if (dp.use_inp_params) 46 { 47 RWP = dp.num_rw_ports; 48 ERP = dp.num_rd_ports; 49 EWP = dp.num_wr_ports; 50 SCHP = dp.num_search_ports; 51 } 52 else 53 { 54 RWP = g_ip->num_rw_ports; 55 ERP = g_ip->num_rd_ports; 56 EWP = g_ip->num_wr_ports; 57 SCHP = g_ip->num_search_ports; 58 } | 46 if (dp.use_inp_params) { 47 RWP = dp.num_rw_ports; 48 ERP = dp.num_rd_ports; 49 EWP = dp.num_wr_ports; 50 SCHP = dp.num_search_ports; 51 } else { 52 RWP = g_ip->num_rw_ports; 53 ERP = g_ip->num_rd_ports; 54 EWP = g_ip->num_wr_ports; 55 SCHP = g_ip->num_search_ports; 56 } |
59 | 57 |
60 num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP); 61 num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP); 62 num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP); 63 num_si_b_bank = dp.num_si_b_bank_per_port * SCHP; 64 num_so_b_bank = dp.num_so_b_bank_per_port * SCHP; | 58 num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode) * 59 (RWP + ERP + EWP); 60 num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP); 61 num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP); 62 num_si_b_bank = dp.num_si_b_bank_per_port * SCHP; 63 num_so_b_bank = dp.num_so_b_bank_per_port * SCHP; |
65 | 64 |
66 if (!dp.fully_assoc && !dp.pure_cam) 67 { | 65 if (!dp.fully_assoc && !dp.pure_cam) { |
68 | 66 |
69 if (g_ip->fast_access && dp.is_tag == false) 70 { 71 num_do_b_bank *= g_ip->data_assoc; 72 } | 67 if (g_ip->fast_access && dp.is_tag == false) { 68 num_do_b_bank *= g_ip->data_assoc; 69 } |
73 | 70 |
74 htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 75 num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true); 76 htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 77 num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); 78 htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 79 num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); 80 } | 71 htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 72 num_addr_b_bank, num_di_b_bank, 0, 73 num_do_b_bank, 0, num_banks_ver_dir * 2, 74 num_banks_hor_dir * 2, Add_htree, true); 75 htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 76 num_addr_b_bank, num_di_b_bank, 0, 77 num_do_b_bank, 0, num_banks_ver_dir * 2, 78 num_banks_hor_dir * 2, Data_in_htree, true); 79 htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 80 num_addr_b_bank, num_di_b_bank, 0, 81 num_do_b_bank, 0, num_banks_ver_dir * 2, 82 num_banks_hor_dir * 2, Data_out_htree, true); 83 } |
81 | 84 |
82 else 83 { | 85 else { |
84 | 86 |
85 htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 86 num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true); 87 htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 88 num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); 89 htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 90 num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); 91 htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 92 num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); 93 htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 94 num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); 95 } | 87 htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 88 num_addr_b_bank, num_di_b_bank, 89 num_si_b_bank, num_do_b_bank, num_so_b_bank, 90 num_banks_ver_dir * 2, num_banks_hor_dir * 2, 91 Add_htree, true); 92 htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 93 num_addr_b_bank, num_di_b_bank, 94 num_si_b_bank, num_do_b_bank, num_so_b_bank, 95 num_banks_ver_dir * 2, num_banks_hor_dir * 2, 96 Data_in_htree, true); 97 htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 98 num_addr_b_bank, num_di_b_bank, 99 num_si_b_bank, num_do_b_bank, 100 num_so_b_bank, num_banks_ver_dir * 2, 101 num_banks_hor_dir * 2, Data_out_htree, true); 102 htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 103 num_addr_b_bank, num_di_b_bank, 104 num_si_b_bank, num_do_b_bank, 105 num_so_b_bank, num_banks_ver_dir * 2, 106 num_banks_hor_dir * 2, Data_in_htree, true); 107 htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, 108 num_addr_b_bank, num_di_b_bank, 109 num_si_b_bank, num_do_b_bank, 110 num_so_b_bank, num_banks_ver_dir * 2, 111 num_banks_hor_dir * 2, Data_out_htree, 112 true); 113 } |
96 | 114 |
97 area.w = htree_in_data->area.w; 98 area.h = htree_in_data->area.h; | 115 area.w = htree_in_data->area.w; 116 area.h = htree_in_data->area.h; |
99 | 117 |
100 area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks; | 118 area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks; |
101// cout<<"area cell"<<area_all_dataramcells<<endl; 102// cout<<area.get_area()<<endl; | 119// cout<<"area cell"<<area_all_dataramcells<<endl; 120// cout<<area.get_area()<<endl; |
103 // delay calculation 104 double inrisetime = 0.0; 105 compute_delays(inrisetime); 106 compute_power_energy(); | 121 // delay calculation 122 double inrisetime = 0.0; 123 compute_delays(inrisetime); 124 compute_power_energy(); |
107} 108 109 110 | 125} 126 127 128 |
111UCA::~UCA() 112{ 113 delete htree_in_add; 114 delete htree_in_data; 115 delete htree_out_data; | 129UCA::~UCA() { 130 delete htree_in_add; 131 delete htree_in_data; 132 delete htree_out_data; |
116} 117 118 119 | 133} 134 135 136 |
120double UCA::compute_delays(double inrisetime) 121{ 122 double outrisetime = bank.compute_delays(inrisetime); | 137double UCA::compute_delays(double inrisetime) { 138 double outrisetime = bank.compute_delays(inrisetime); |
123 | 139 |
124 double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay; 125 double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay; 126 delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat + 127 bank.mat.sa_mux_lev_1_predec->delay + 128 bank.mat.sa_mux_lev_1_dec->delay; 129 delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat + 130 bank.mat.sa_mux_lev_2_predec->delay + 131 bank.mat.sa_mux_lev_2_dec->delay; 132 double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa; | 140 double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay; 141 double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay; 142 delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat + 143 bank.mat.sa_mux_lev_1_predec->delay + 144 bank.mat.sa_mux_lev_1_dec->delay; 145 delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat + 146 bank.mat.sa_mux_lev_2_predec->delay + 147 bank.mat.sa_mux_lev_2_dec->delay; 148 double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa; |
133 | 149 |
134 delay_before_subarray_output_driver = 135 MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path 136 delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path 137 MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path 138 delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path 139 delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree + 140 bank.htree_out_data->delay + htree_out_data->delay; 141 access_time = bank.mat.delay_comparator; | 150 delay_before_subarray_output_driver = 151 MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path 152 delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path 153 MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path 154 delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path 155 delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree + 156 bank.htree_out_data->delay + htree_out_data->delay; 157 access_time = bank.mat.delay_comparator; |
142 | 158 |
143 double ram_delay_inside_mat; 144 if (dp.fully_assoc) 145 { 146 //delay of FA contains both CAM tag and RAM data 147 { //delay of CAM 148 ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; 149 access_time = htree_in_add->delay + bank.htree_in_add->delay; 150 //delay of fully-associative data array 151 access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out; | 159 double ram_delay_inside_mat; 160 if (dp.fully_assoc) { 161 //delay of FA contains both CAM tag and RAM data 162 { //delay of CAM 163 ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; 164 access_time = htree_in_add->delay + bank.htree_in_add->delay; 165 //delay of fully-associative data array 166 access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out; 167 } 168 } else { 169 access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path |
152 } | 170 } |
153 } 154 else 155 { 156 access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path 157 } | |
158 | 171 |
159 if (dp.is_main_mem) 160 { 161 double t_rcd = max_delay_before_row_decoder + delay_inside_mat; 162 double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) + 163 delay_from_subarray_out_drv_to_out; 164 access_time = t_rcd + cas_latency; 165 } | 172 if (dp.is_main_mem) { 173 double t_rcd = max_delay_before_row_decoder + delay_inside_mat; 174 double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) + 175 delay_from_subarray_out_drv_to_out; 176 access_time = t_rcd + cas_latency; 177 } |
166 | 178 |
167 double temp; | 179 double temp; |
168 | 180 |
169 if (!dp.fully_assoc) 170 { 171 temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit 172 if (dp.is_dram) 173 { 174 temp += bank.mat.delay_writeback; // temp stores random cycle time 175 } | 181 if (!dp.fully_assoc) { 182 temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit 183 if (dp.is_dram) { 184 temp += bank.mat.delay_writeback; // temp stores random cycle time 185 } |
176 177 | 186 187 |
178 temp = MAX(temp, bank.mat.r_predec->delay); 179 temp = MAX(temp, bank.mat.b_mux_predec->delay); 180 temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); 181 temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); 182 } 183 else 184 { 185 ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; 186 temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore 187 + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset; | 188 temp = MAX(temp, bank.mat.r_predec->delay); 189 temp = MAX(temp, bank.mat.b_mux_predec->delay); 190 temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); 191 temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); 192 } else { 193 ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; 194 temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore 195 + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset; |
188 | 196 |
189 temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc. 190 temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); 191 temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); 192 } | 197 temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc. 198 temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); 199 temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); 200 } |
193 | 201 |
194 // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav 195 if (g_ip->rpters_in_htree == false) 196 { 197 temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay); 198 } 199 cycle_time = temp; | 202 // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav 203 if (g_ip->rpters_in_htree == false) { 204 temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay); 205 } 206 cycle_time = temp; |
200 | 207 |
201 double delay_req_network = max_delay_before_row_decoder; 202 double delay_rep_network = delay_from_subarray_out_drv_to_out; 203 multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network); | 208 double delay_req_network = max_delay_before_row_decoder; 209 double delay_rep_network = delay_from_subarray_out_drv_to_out; 210 multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network); |
204 | 211 |
205 if (dp.is_main_mem) 206 { 207 multisubbank_interleave_cycle_time = htree_in_add->delay; 208 precharge_delay = htree_in_add->delay + 209 bank.htree_in_add->delay + bank.mat.delay_writeback + 210 bank.mat.delay_wl_reset + bank.mat.delay_bl_restore; 211 cycle_time = access_time + precharge_delay; 212 } 213 else 214 { 215 precharge_delay = 0; 216 } | 212 if (dp.is_main_mem) { 213 multisubbank_interleave_cycle_time = htree_in_add->delay; 214 precharge_delay = htree_in_add->delay + 215 bank.htree_in_add->delay + bank.mat.delay_writeback + 216 bank.mat.delay_wl_reset + bank.mat.delay_bl_restore; 217 cycle_time = access_time + precharge_delay; 218 } else { 219 precharge_delay = 0; 220 } |
217 | 221 |
218 double dram_array_availability = 0; 219 if (dp.is_dram) 220 { 221 dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100; 222 } | 222 double dram_array_availability = 0; 223 if (dp.is_dram) { 224 dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100; 225 } |
223 | 226 |
224 return outrisetime; | 227 return outrisetime; |
225} 226 227 228 229// note: currently, power numbers are for a bank of an array | 228} 229 230 231 232// note: currently, power numbers are for a bank of an array |
230void UCA::compute_power_energy() 231{ 232 bank.compute_power_energy(); 233 power = bank.power; | 233void UCA::compute_power_energy() { 234 bank.compute_power_energy(); 235 power = bank.power; |
234 | 236 |
235 power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic; 236 power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic; 237 if (dp.fully_assoc || dp.pure_cam) 238 power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic; | 237 power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic; 238 power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic; 239 if (dp.fully_assoc || dp.pure_cam) 240 power_routing_to_bank.searchOp.dynamic = 241 htree_in_search->power.searchOp.dynamic + 242 htree_out_search->power.searchOp.dynamic; |
239 | 243 |
240 power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage + 241 htree_in_data->power.readOp.leakage + 242 htree_out_data->power.readOp.leakage; | 244 power_routing_to_bank.readOp.leakage += 245 htree_in_add->power.readOp.leakage + 246 htree_in_data->power.readOp.leakage + 247 htree_out_data->power.readOp.leakage; |
243 | 248 |
244 power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage + 245 htree_in_data->power.readOp.gate_leakage + 246 htree_out_data->power.readOp.gate_leakage; 247 if (dp.fully_assoc || dp.pure_cam) 248 { | 249 power_routing_to_bank.readOp.gate_leakage += 250 htree_in_add->power.readOp.gate_leakage + 251 htree_in_data->power.readOp.gate_leakage + 252 htree_out_data->power.readOp.gate_leakage; 253 if (dp.fully_assoc || dp.pure_cam) { |
249 power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; 250 power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; | 254 power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; 255 power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; |
251 } | 256 } |
252 | 257 |
253 power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic; 254 power.readOp.dynamic += power_routing_to_bank.readOp.dynamic; 255 power.readOp.leakage += power_routing_to_bank.readOp.leakage; 256 power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage; | 258 power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic; 259 power.readOp.dynamic += power_routing_to_bank.readOp.dynamic; 260 power.readOp.leakage += power_routing_to_bank.readOp.leakage; 261 power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage; |
257 | 262 |
258 // calculate total write energy per access 259 power.writeOp.dynamic = power.readOp.dynamic 260 - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir 261 + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir 262 - power_routing_to_bank.readOp.dynamic 263 + power_routing_to_bank.writeOp.dynamic 264 + bank.htree_in_data->power.readOp.dynamic 265 - bank.htree_out_data->power.readOp.dynamic; | 263 // calculate total write energy per access 264 power.writeOp.dynamic = power.readOp.dynamic 265 - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir 266 + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir 267 - power_routing_to_bank.readOp.dynamic 268 + power_routing_to_bank.writeOp.dynamic 269 + bank.htree_in_data->power.readOp.dynamic 270 - bank.htree_out_data->power.readOp.dynamic; |
266 | 271 |
267 if (dp.is_dram == false) 268 { 269 power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; 270 } | 272 if (dp.is_dram == false) { 273 power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; 274 } |
271 | 275 |
272 dyn_read_energy_from_closed_page = power.readOp.dynamic; 273 dyn_read_energy_from_open_page = power.readOp.dynamic - 274 (bank.mat.r_predec->power.readOp.dynamic + 275 bank.mat.power_row_decoders.readOp.dynamic + 276 bank.mat.power_bl_precharge_eq_drv.readOp.dynamic + 277 bank.mat.power_sa.readOp.dynamic + 278 bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir; | 276 dyn_read_energy_from_closed_page = power.readOp.dynamic; 277 dyn_read_energy_from_open_page = power.readOp.dynamic - 278 (bank.mat.r_predec->power.readOp.dynamic + 279 bank.mat.power_row_decoders.readOp.dynamic + 280 bank.mat.power_bl_precharge_eq_drv.readOp.dynamic + 281 bank.mat.power_sa.readOp.dynamic + 282 bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir; |
279 | 283 |
280 dyn_read_energy_remaining_words_in_burst = 281 (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) * 282 ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + 283 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + 284 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + 285 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + 286 bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + 287 bank.htree_out_data->power.readOp.dynamic + 288 power_routing_to_bank.readOp.dynamic); 289 dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst; 290 dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst; | 284 dyn_read_energy_remaining_words_in_burst = 285 (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) * 286 ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + 287 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + 288 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + 289 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + 290 bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + 291 bank.htree_out_data->power.readOp.dynamic + 292 power_routing_to_bank.readOp.dynamic); 293 dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst; 294 dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst; |
291 | 295 |
292 activate_energy = htree_in_add->power.readOp.dynamic + 293 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act + 294 (bank.mat.r_predec->power.readOp.dynamic + 295 bank.mat.power_row_decoders.readOp.dynamic + 296 bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir; 297 read_energy = (htree_in_add->power.readOp.dynamic + 298 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + 299 (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + 300 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + 301 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + 302 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + 303 bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + 304 bank.htree_out_data->power.readOp.dynamic + 305 htree_in_data->power.readOp.dynamic) * g_ip->burst_len; 306 write_energy = (htree_in_add->power.readOp.dynamic + 307 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + 308 htree_in_data->power.readOp.dynamic + 309 bank.htree_in_data->power.readOp.dynamic + 310 (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + 311 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + 312 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + 313 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len; 314 precharge_energy = (bank.mat.power_bitline.readOp.dynamic + 315 bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir; | 296 activate_energy = htree_in_add->power.readOp.dynamic + 297 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act + 298 (bank.mat.r_predec->power.readOp.dynamic + 299 bank.mat.power_row_decoders.readOp.dynamic + 300 bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir; 301 read_energy = (htree_in_add->power.readOp.dynamic + 302 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + 303 (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + 304 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + 305 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + 306 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + 307 bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + 308 bank.htree_out_data->power.readOp.dynamic + 309 htree_in_data->power.readOp.dynamic) * g_ip->burst_len; 310 write_energy = (htree_in_add->power.readOp.dynamic + 311 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + 312 htree_in_data->power.readOp.dynamic + 313 bank.htree_in_data->power.readOp.dynamic + 314 (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + 315 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + 316 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + 317 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len; 318 precharge_energy = (bank.mat.power_bitline.readOp.dynamic + 319 bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir; |
316 | 320 |
317 leak_power_subbank_closed_page = 318 (bank.mat.r_predec->power.readOp.leakage + 319 bank.mat.b_mux_predec->power.readOp.leakage + 320 bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + 321 bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + 322 bank.mat.power_row_decoders.readOp.leakage + 323 bank.mat.power_bit_mux_decoders.readOp.leakage + 324 bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + 325 bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + 326 bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; | 321 leak_power_subbank_closed_page = 322 (bank.mat.r_predec->power.readOp.leakage + 323 bank.mat.b_mux_predec->power.readOp.leakage + 324 bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + 325 bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + 326 bank.mat.power_row_decoders.readOp.leakage + 327 bank.mat.power_bit_mux_decoders.readOp.leakage + 328 bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + 329 bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + 330 bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; |
327 | 331 |
328 leak_power_subbank_closed_page += 329 (bank.mat.r_predec->power.readOp.gate_leakage + 330 bank.mat.b_mux_predec->power.readOp.gate_leakage + 331 bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + 332 bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + 333 bank.mat.power_row_decoders.readOp.gate_leakage + 334 bank.mat.power_bit_mux_decoders.readOp.gate_leakage + 335 bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + 336 bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+ 337 //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; | 332 leak_power_subbank_closed_page += 333 (bank.mat.r_predec->power.readOp.gate_leakage + 334 bank.mat.b_mux_predec->power.readOp.gate_leakage + 335 bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + 336 bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + 337 bank.mat.power_row_decoders.readOp.gate_leakage + 338 bank.mat.power_bit_mux_decoders.readOp.gate_leakage + 339 bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + 340 bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+ 341 //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; |
338 | 342 |
339 leak_power_subbank_open_page = 340 (bank.mat.r_predec->power.readOp.leakage + 341 bank.mat.b_mux_predec->power.readOp.leakage + 342 bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + 343 bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + 344 bank.mat.power_row_decoders.readOp.leakage + 345 bank.mat.power_bit_mux_decoders.readOp.leakage + 346 bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + 347 bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + 348 bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; | 343 leak_power_subbank_open_page = 344 (bank.mat.r_predec->power.readOp.leakage + 345 bank.mat.b_mux_predec->power.readOp.leakage + 346 bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + 347 bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + 348 bank.mat.power_row_decoders.readOp.leakage + 349 bank.mat.power_bit_mux_decoders.readOp.leakage + 350 bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + 351 bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + 352 bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; |
349 | 353 |
350 leak_power_subbank_open_page += 351 (bank.mat.r_predec->power.readOp.gate_leakage + 352 bank.mat.b_mux_predec->power.readOp.gate_leakage + 353 bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + 354 bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + 355 bank.mat.power_row_decoders.readOp.gate_leakage + 356 bank.mat.power_bit_mux_decoders.readOp.gate_leakage + 357 bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + 358 bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir; 359 //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; | 354 leak_power_subbank_open_page += 355 (bank.mat.r_predec->power.readOp.gate_leakage + 356 bank.mat.b_mux_predec->power.readOp.gate_leakage + 357 bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + 358 bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + 359 bank.mat.power_row_decoders.readOp.gate_leakage + 360 bank.mat.power_bit_mux_decoders.readOp.gate_leakage + 361 bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + 362 bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir; 363 //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; |
360 | 364 |
361 leak_power_request_and_reply_networks = 362 power_routing_to_bank.readOp.leakage + 363 bank.htree_in_add->power.readOp.leakage + 364 bank.htree_in_data->power.readOp.leakage + 365 bank.htree_out_data->power.readOp.leakage; | 365 leak_power_request_and_reply_networks = 366 power_routing_to_bank.readOp.leakage + 367 bank.htree_in_add->power.readOp.leakage + 368 bank.htree_in_data->power.readOp.leakage + 369 bank.htree_out_data->power.readOp.leakage; |
366 | 370 |
367 leak_power_request_and_reply_networks += 368 power_routing_to_bank.readOp.gate_leakage + 369 bank.htree_in_add->power.readOp.gate_leakage + 370 bank.htree_in_data->power.readOp.gate_leakage + 371 bank.htree_out_data->power.readOp.gate_leakage; | 371 leak_power_request_and_reply_networks += 372 power_routing_to_bank.readOp.gate_leakage + 373 bank.htree_in_add->power.readOp.gate_leakage + 374 bank.htree_in_data->power.readOp.gate_leakage + 375 bank.htree_out_data->power.readOp.gate_leakage; |
372 | 376 |
373 if (dp.fully_assoc || dp.pure_cam) 374 { | 377 if (dp.fully_assoc || dp.pure_cam) { |
375 leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; 376 leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; | 378 leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; 379 leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; |
377 } | 380 } |
378 379 | 381 382 |
380 if (dp.is_dram) 381 { // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power 382 refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir + 383 bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays; 384 refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays; 385 refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir; 386 refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; 387 refresh_power /= dp.dram_refresh_period; 388 } | 383 // if DRAM, add contribution of power spent in row predecoder drivers, 384 // blocks and decoders to refresh power 385 if (dp.is_dram) { 386 refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir + 387 bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays; 388 refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays; 389 refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir; 390 refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; 391 refresh_power /= dp.dram_refresh_period; 392 } |
389 390 | 393 394 |
391 if (dp.is_tag == false) 392 { 393 power.readOp.dynamic = dyn_read_energy_from_closed_page; 394 power.writeOp.dynamic = dyn_read_energy_from_closed_page 395 - dyn_read_energy_remaining_words_in_burst 396 - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir 397 + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir 398 + (power_routing_to_bank.writeOp.dynamic - 399 power_routing_to_bank.readOp.dynamic - 400 bank.htree_out_data->power.readOp.dynamic + 401 bank.htree_in_data->power.readOp.dynamic) * 402 (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME | 395 if (dp.is_tag == false) { 396 power.readOp.dynamic = dyn_read_energy_from_closed_page; 397 power.writeOp.dynamic = dyn_read_energy_from_closed_page 398 - dyn_read_energy_remaining_words_in_burst 399 - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir 400 + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir 401 + (power_routing_to_bank.writeOp.dynamic - 402 power_routing_to_bank.readOp.dynamic - 403 bank.htree_out_data->power.readOp.dynamic + 404 bank.htree_in_data->power.readOp.dynamic) * 405 (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME |
403 | 406 |
404 if (dp.is_dram == false) 405 { 406 power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; | 407 if (dp.is_dram == false) { 408 power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; 409 } |
407 } | 410 } |
408 } | |
409 | 411 |
410 // if DRAM, add refresh power to total leakage 411 if (dp.is_dram) 412 { 413 power.readOp.leakage += refresh_power; 414 } | 412 // if DRAM, add refresh power to total leakage 413 if (dp.is_dram) { 414 power.readOp.leakage += refresh_power; 415 } |
415 | 416 |
416 // TODO: below should be avoided. 417 /*if (dp.is_main_mem) 418 { 419 power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks; 420 }*/ | 417 // TODO: below should be avoided. 418 /*if (dp.is_main_mem) 419 { 420 power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks; 421 }*/ |
421 | 422 |
422 assert(power.readOp.dynamic > 0); 423 assert(power.writeOp.dynamic > 0); 424 assert(power.readOp.leakage > 0); | 423 assert(power.readOp.dynamic > 0); 424 assert(power.writeOp.dynamic > 0); 425 assert(power.readOp.leakage > 0); |
425} 426 | 426} 427 |