Ucache.cc revision 10152
1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. 5 * All Rights Reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer; 11 * redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution; 14 * neither the name of the copyright holders nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 * 30 ***************************************************************************/ 31 32 33#include <pthread.h> 34 35#include <algorithm> 36#include <cmath> 37#include <ctime> 38#include <iostream> 39#include <list> 40 41#include "Ucache.h" 42#include "area.h" 43#include "bank.h" 44#include "basic_circuit.h" 45#include "component.h" 46#include "const.h" 47#include "decoder.h" 48#include "parameter.h" 49#include "subarray.h" 50#include "uca.h" 51 52using namespace std; 53 54const uint32_t nthreads = NTHREADS; 55 56 57void min_values_t::update_min_values(const min_values_t * val) 58{ 59 min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay; 60 min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn; 61 min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage; 62 min_area = (min_area > val->min_area) ? val->min_area : min_area; 63 min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc; 64} 65 66 67 68void min_values_t::update_min_values(const uca_org_t & res) 69{ 70 min_delay = (min_delay > res.access_time) ? res.access_time : min_delay; 71 min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn; 72 min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage; 73 min_area = (min_area > res.area) ? res.area : min_area; 74 min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc; 75} 76 77void min_values_t::update_min_values(const nuca_org_t * res) 78{ 79 min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay; 80 min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn; 81 min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage; 82 min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area; 83 min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc; 84} 85 86void min_values_t::update_min_values(const mem_array * res) 87{ 88 min_delay = (min_delay > res->access_time) ? res->access_time : min_delay; 89 min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn; 90 min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage; 91 min_area = (min_area > res->area) ? res->area : min_area; 92 min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc; 93} 94 95 96 97void * calc_time_mt_wrapper(void * void_obj) 98{ 99 calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj; 100 uint32_t tid = calc_obj->tid; 101 list<mem_array *> & data_arr = calc_obj->data_arr; 102 list<mem_array *> & tag_arr = calc_obj->tag_arr; 103 bool is_tag = calc_obj->is_tag; 104 bool pure_ram = calc_obj->pure_ram; 105 bool pure_cam = calc_obj->pure_cam; 106 bool is_main_mem = calc_obj->is_main_mem; 107 double Nspd_min = calc_obj->Nspd_min; 108 min_values_t * data_res = calc_obj->data_res; 109 min_values_t * tag_res = calc_obj->tag_res; 110 111 data_arr.clear(); 112 data_arr.push_back(new mem_array); 113 tag_arr.clear(); 114 tag_arr.push_back(new mem_array); 115 116 uint32_t Ndwl_niter = _log2(MAXDATAN) + 1; 117 uint32_t Ndbl_niter = _log2(MAXDATAN) + 1; 118 uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1; 119 uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter; 120 121 122 bool is_valid_partition; 123 int wt_min, wt_max; 124 125 if (g_ip->force_wiretype) { 126 if (g_ip->wt == 0) { 127 wt_min = Low_swing; 128 wt_max = Low_swing; 129 } 130 else { 131 wt_min = Global; 132 wt_max = Low_swing-1; 133 } 134 } 135 else { 136 wt_min = Global; 137 wt_max = Low_swing; 138 } 139 140 for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2) 141 { 142 for (int wr = wt_min; wr <= wt_max; wr++) 143 { 144 for (uint32_t iter = tid; iter < niter; iter += nthreads) 145 { 146 // reconstruct Ndwl, Ndbl, Ndcm 147 unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter)); 148 unsigned int Ndbl = 1 << ((iter / (Ndcm_niter))%Ndbl_niter); 149 unsigned int Ndcm = 1 << (iter % Ndcm_niter); 150 for(unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; Ndsam_lev_1 *= 2) 151 { 152 for(unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2) 153 { 154 //for debuging 155 if (g_ip->force_cache_config && is_tag == false) 156 { 157 wr = g_ip->wt; 158 Ndwl = g_ip->ndwl; 159 Ndbl = g_ip->ndbl; 160 Ndcm = g_ip->ndcm; 161 if(g_ip->nspd != 0) { 162 Nspd = g_ip->nspd; 163 } 164 if(g_ip->ndsam1 != 0) { 165 Ndsam_lev_1 = g_ip->ndsam1; 166 Ndsam_lev_2 = g_ip->ndsam2; 167 } 168 } 169 170 if (is_tag == true) 171 { 172 is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl, 173 Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, 174 tag_arr.back(), 0, NULL, NULL, 175 is_main_mem); 176 } 177 // If it's a fully-associative cache, the data array partition parameters are identical to that of 178 // the tag array, so compute data array partition properties also here. 179 if (is_tag == false || g_ip->fully_assoc) 180 { 181 is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl, 182 Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, 183 data_arr.back(), 0, NULL, NULL, 184 is_main_mem); 185 } 186 187 if (is_valid_partition) 188 { 189 if (is_tag == true) 190 { 191 tag_arr.back()->wt = (enum Wire_type) wr; 192 tag_res->update_min_values(tag_arr.back()); 193 tag_arr.push_back(new mem_array); 194 } 195 if (is_tag == false || g_ip->fully_assoc) 196 { 197 data_arr.back()->wt = (enum Wire_type) wr; 198 data_res->update_min_values(data_arr.back()); 199 data_arr.push_back(new mem_array); 200 } 201 } 202 203 if (g_ip->force_cache_config && is_tag == false) 204 { 205 wr = wt_max; 206 iter = niter; 207 if(g_ip->nspd != 0) { 208 Nspd = MAXDATASPD; 209 } 210 if (g_ip->ndsam1 != 0) { 211 Ndsam_lev_1 = MAX_COL_MUX+1; 212 Ndsam_lev_2 = MAX_COL_MUX+1; 213 } 214 } 215 } 216 } 217 } 218 } 219 } 220 221 delete data_arr.back(); 222 delete tag_arr.back(); 223 data_arr.pop_back(); 224 tag_arr.pop_back(); 225 226 pthread_exit(NULL); 227} 228 229 230 231bool calculate_time( 232 bool is_tag, 233 int pure_ram, 234 bool pure_cam, 235 double Nspd, 236 unsigned int Ndwl, 237 unsigned int Ndbl, 238 unsigned int Ndcm, 239 unsigned int Ndsam_lev_1, 240 unsigned int Ndsam_lev_2, 241 mem_array *ptr_array, 242 int flag_results_populate, 243 results_mem_array *ptr_results, 244 uca_org_t *ptr_fin_res, 245 bool is_main_mem) 246{ 247 DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem); 248 249 if (dyn_p.is_valid == false) 250 { 251 return false; 252 } 253 254 UCA * uca = new UCA(dyn_p); 255 256 257 if (flag_results_populate) 258 { //For the final solution, populate the ptr_results data structure -- TODO: copy only necessary variables 259 } 260 else 261 { 262 int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir; 263 int num_mats = uca->bank.dp.num_mats; 264 bool is_fa = uca->bank.dp.fully_assoc; 265 bool pure_cam = uca->bank.dp.pure_cam; 266 ptr_array->Ndwl = Ndwl; 267 ptr_array->Ndbl = Ndbl; 268 ptr_array->Nspd = Nspd; 269 ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing; 270 ptr_array->Ndsam_lev_1 = Ndsam_lev_1; 271 ptr_array->Ndsam_lev_2 = Ndsam_lev_2; 272 ptr_array->access_time = uca->access_time; 273 ptr_array->cycle_time = uca->cycle_time; 274 ptr_array->multisubbank_interleave_cycle_time = uca->multisubbank_interleave_cycle_time; 275 ptr_array->area_ram_cells = uca->area_all_dataramcells; 276 ptr_array->area = uca->area.get_area(); 277 ptr_array->height = uca->area.h; 278 ptr_array->width = uca->area.w; 279 ptr_array->mat_height = uca->bank.mat.area.h; 280 ptr_array->mat_length = uca->bank.mat.area.w; 281 ptr_array->subarray_height = uca->bank.mat.subarray.area.h; 282 ptr_array->subarray_length = uca->bank.mat.subarray.area.w; 283 ptr_array->power = uca->power; 284 ptr_array->delay_senseamp_mux_decoder = 285 MAX(uca->delay_array_to_sa_mux_lev_1_decoder, 286 uca->delay_array_to_sa_mux_lev_2_decoder); 287 ptr_array->delay_before_subarray_output_driver = uca->delay_before_subarray_output_driver; 288 ptr_array->delay_from_subarray_output_driver_to_output = uca->delay_from_subarray_out_drv_to_out; 289 290 ptr_array->delay_route_to_bank = uca->htree_in_add->delay; 291 ptr_array->delay_input_htree = uca->bank.htree_in_add->delay; 292 ptr_array->delay_row_predecode_driver_and_block = uca->bank.mat.r_predec->delay; 293 ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay; 294 ptr_array->delay_bitlines = uca->bank.mat.delay_bitline; 295 ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline; 296 ptr_array->delay_sense_amp = uca->bank.mat.delay_sa; 297 ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv_htree; 298 ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay; 299 ptr_array->delay_comparator = uca->bank.mat.delay_comparator; 300 301 ptr_array->all_banks_height = uca->area.h; 302 ptr_array->all_banks_width = uca->area.w; 303 ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / (uca->area.get_area()); 304 305 ptr_array->power_routing_to_bank = uca->power_routing_to_bank; 306 ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power; 307 ptr_array->power_data_input_htree = uca->bank.htree_in_data->power; 308// cout<<"power_data_input_htree"<<uca->bank.htree_in_data->power.readOp.leakage<<endl; 309 ptr_array->power_data_output_htree = uca->bank.htree_out_data->power; 310// cout<<"power_data_output_htree"<<uca->bank.htree_out_data->power.readOp.leakage<<endl; 311 ptr_array->power_row_predecoder_drivers = uca->bank.mat.r_predec->driver_power; 312 ptr_array->power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; 313 ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; 314 ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; 315 316 ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power; 317 ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; 318 ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; 319 ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; 320 321 ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders; 322 ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir; 323 ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir; 324 ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir; 325 326 ptr_array->power_bit_mux_predecoder_drivers = uca->bank.mat.b_mux_predec->driver_power; 327 ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; 328 ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; 329 ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; 330 331 ptr_array->power_bit_mux_predecoder_blocks = uca->bank.mat.b_mux_predec->block_power; 332 ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; 333 ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; 334 ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; 335 336 ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders; 337 ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir; 338 ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir; 339 ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir; 340 341 ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = uca->bank.mat.sa_mux_lev_1_predec->driver_power; 342 ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= num_act_mats_hor_dir; 343 ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= num_act_mats_hor_dir; 344 ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= num_act_mats_hor_dir; 345 346 ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = uca->bank.mat.sa_mux_lev_1_predec->block_power; 347 ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; 348 ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; 349 ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; 350 351 ptr_array->power_senseamp_mux_lev_1_decoders = uca->bank.mat.power_sa_mux_lev_1_decoders; 352 ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir; 353 ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir; 354 ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir; 355 356 ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = uca->bank.mat.sa_mux_lev_2_predec->driver_power; 357 ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; 358 ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; 359 ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; 360 361 ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = uca->bank.mat.sa_mux_lev_2_predec->block_power; 362 ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; 363 ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; 364 ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; 365 366 ptr_array->power_senseamp_mux_lev_2_decoders = uca->bank.mat.power_sa_mux_lev_2_decoders; 367 ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= num_act_mats_hor_dir; 368 ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= num_act_mats_hor_dir; 369 ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= num_act_mats_hor_dir; 370 371 ptr_array->power_bitlines = uca->bank.mat.power_bitline; 372 ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir; 373 ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir; 374 ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir; 375 376 ptr_array->power_sense_amps = uca->bank.mat.power_sa; 377 ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir; 378 ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir; 379 ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir; 380 381 ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv; 382 ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir; 383 ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir; 384 ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir; 385 386 ptr_array->power_output_drivers_at_subarray = uca->bank.mat.power_subarray_out_drv; 387 ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir; 388 ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir; 389 ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir; 390 391 ptr_array->power_comparators = uca->bank.mat.power_comparator; 392 ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir; 393 ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir; 394 ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir; 395 396// cout << " num of mats: " << dyn_p.num_mats << endl; 397 if (is_fa || pure_cam) 398 { 399 ptr_array->power_htree_in_search = uca->bank.htree_in_search->power; 400// cout<<"power_htree_in_search"<<uca->bank.htree_in_search->power.readOp.leakage<<endl; 401 ptr_array->power_htree_out_search = uca->bank.htree_out_search->power; 402// cout<<"power_htree_out_search"<<uca->bank.htree_out_search->power.readOp.leakage<<endl; 403 ptr_array->power_searchline = uca->bank.mat.power_searchline; 404// cout<<"power_searchlineh"<<uca->bank.mat.power_searchline.readOp.leakage<<endl; 405 ptr_array->power_searchline.searchOp.dynamic *= num_mats; 406 ptr_array->power_searchline_precharge = uca->bank.mat.power_searchline_precharge; 407 ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats; 408 ptr_array->power_matchlines = uca->bank.mat.power_matchline; 409 ptr_array->power_matchlines.searchOp.dynamic *= num_mats; 410 ptr_array->power_matchline_precharge = uca->bank.mat.power_matchline_precharge; 411 ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats; 412 ptr_array->power_matchline_to_wordline_drv = uca->bank.mat.power_ml_to_ram_wl_drv; 413// cout<<"power_matchline.searchOp.leakage"<<uca->bank.mat.power_matchline.searchOp.leakage<<endl; 414 } 415 416 ptr_array->activate_energy = uca->activate_energy; 417 ptr_array->read_energy = uca->read_energy; 418 ptr_array->write_energy = uca->write_energy; 419 ptr_array->precharge_energy = uca->precharge_energy; 420 ptr_array->refresh_power = uca->refresh_power; 421 ptr_array->leak_power_subbank_closed_page = uca->leak_power_subbank_closed_page; 422 ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page; 423 ptr_array->leak_power_request_and_reply_networks = uca->leak_power_request_and_reply_networks; 424 425 ptr_array->precharge_delay = uca->precharge_delay; 426 427 428// cout<<"power_matchline.searchOp.leakage"<<uca->bank.mat.<<endl; 429// 430// if (!(is_fa || pure_cam)) 431// { 432// cout << " num of cols: " << dyn_p.num_c_subarray << endl; 433// } 434// else if (is_fa) 435// { 436// cout << " num of cols: " << dyn_p.tag_num_c_subarray+ dyn_p.data_num_c_subarray<< endl; 437// } else 438// cout << " num of cols: " << dyn_p.tag_num_c_subarray<< endl; 439// cout << uca->bank.mat.subarray.get_total_cell_area()<<endl; 440 } 441 442 443 delete uca; 444 return true; 445} 446 447 448 449bool check_uca_org(uca_org_t & u, min_values_t *minval) 450{ 451 if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) { 452 return false; 453 } 454 if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > 455 g_ip->dynamic_power_dev) { 456 return false; 457 } 458 if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > 459 g_ip->leakage_power_dev) { 460 return false; 461 } 462 if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > 463 g_ip->cycle_time_dev) { 464 return false; 465 } 466 if (((u.area - minval->min_area)/minval->min_area)*100 > 467 g_ip->area_dev) { 468 return false; 469 } 470 return true; 471} 472 473bool check_mem_org(mem_array & u, const min_values_t *minval) 474{ 475 if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) { 476 return false; 477 } 478 if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > 479 g_ip->dynamic_power_dev) { 480 return false; 481 } 482 if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > 483 g_ip->leakage_power_dev) { 484 return false; 485 } 486 if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > 487 g_ip->cycle_time_dev) { 488 return false; 489 } 490 if (((u.area - minval->min_area)/minval->min_area)*100 > 491 g_ip->area_dev) { 492 return false; 493 } 494 return true; 495} 496 497 498 499 500void find_optimal_uca(uca_org_t *res, min_values_t * minval, list<uca_org_t> & ulist) 501{ 502 double cost = 0; 503 double min_cost = BIGNUM; 504 float d, a, dp, lp, c; 505 506 dp = g_ip->dynamic_power_wt; 507 lp = g_ip->leakage_power_wt; 508 a = g_ip->area_wt; 509 d = g_ip->delay_wt; 510 c = g_ip->cycle_time_wt; 511 512 if (ulist.empty() == true) 513 { 514 cout << "ERROR: no valid cache organizations found" << endl; 515 exit(0); 516 } 517 518 for (list<uca_org_t>::iterator niter = ulist.begin(); niter != ulist.end(); niter++) 519 { 520 if (g_ip->ed == 1) 521 { 522 cost = ((niter)->access_time/minval->min_delay) * ((niter)->power.readOp.dynamic/minval->min_dyn); 523 if (min_cost > cost) 524 { 525 min_cost = cost; 526 *res = (*(niter)); 527 } 528 } 529 else if (g_ip->ed == 2) 530 { 531 cost = ((niter)->access_time/minval->min_delay)* 532 ((niter)->access_time/minval->min_delay)* 533 ((niter)->power.readOp.dynamic/minval->min_dyn); 534 if (min_cost > cost) 535 { 536 min_cost = cost; 537 *res = (*(niter)); 538 } 539 } 540 else 541 { 542 /* 543 * check whether the current organization 544 * meets the input deviation constraints 545 */ 546 bool v = check_uca_org(*niter, minval); 547 //if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling 548 549 if (v) 550 { 551 cost = (d * ((niter)->access_time/minval->min_delay) + 552 c * ((niter)->cycle_time/minval->min_cyc) + 553 dp * ((niter)->power.readOp.dynamic/minval->min_dyn) + 554 lp * ((niter)->power.readOp.leakage/minval->min_leakage) + 555 a * ((niter)->area/minval->min_area)); 556 //fprintf(stderr, "cost = %g\n", cost); 557 558 if (min_cost > cost) { 559 min_cost = cost; 560 *res = (*(niter)); 561 niter = ulist.erase(niter); 562 if (niter!=ulist.begin()) 563 niter--; 564 } 565 } 566 else { 567 niter = ulist.erase(niter); 568 if (niter!=ulist.begin()) 569 niter--; 570 } 571 } 572 } 573 574 if (min_cost == BIGNUM) 575 { 576 cout << "ERROR: no cache organizations met optimization criteria" << endl; 577 exit(0); 578 } 579} 580 581 582 583void filter_tag_arr(const min_values_t * min, list<mem_array *> & list) 584{ 585 double cost = BIGNUM; 586 double cur_cost; 587 double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, wt_area = g_ip->area_wt; 588 mem_array * res = NULL; 589 590 if (list.empty() == true) 591 { 592 cout << "ERROR: no valid tag organizations found" << endl; 593 exit(1); 594 } 595 596 597 while (list.empty() != true) 598 { 599 bool v = check_mem_org(*list.back(), min); 600 if (v) 601 { 602 cur_cost = wt_delay * (list.back()->access_time/min->min_delay) + 603 wt_dyn * (list.back()->power.readOp.dynamic/min->min_dyn) + 604 wt_leakage * (list.back()->power.readOp.leakage/min->min_leakage) + 605 wt_area * (list.back()->area/min->min_area) + 606 wt_cyc * (list.back()->cycle_time/min->min_cyc); 607 } 608 else 609 { 610 cur_cost = BIGNUM; 611 } 612 if (cur_cost < cost) 613 { 614 if (res != NULL) 615 { 616 delete res; 617 } 618 cost = cur_cost; 619 res = list.back(); 620 } 621 else 622 { 623 delete list.back(); 624 } 625 list.pop_back(); 626 } 627 if(!res) 628 { 629 cout << "ERROR: no valid tag organizations found" << endl; 630 exit(0); 631 } 632 633 list.push_back(res); 634} 635 636 637 638void filter_data_arr(list<mem_array *> & curr_list) 639{ 640 if (curr_list.empty() == true) 641 { 642 cout << "ERROR: no valid data array organizations found" << endl; 643 exit(1); 644 } 645 646 list<mem_array *>::iterator iter; 647 648 for (iter = curr_list.begin(); iter != curr_list.end(); ++iter) 649 { 650 mem_array * m = *iter; 651 652 if (m == NULL) exit(1); 653 654 if(((m->access_time - m->arr_min->min_delay)/m->arr_min->min_delay > 0.5) && 655 ((m->power.readOp.dynamic - m->arr_min->min_dyn)/m->arr_min->min_dyn > 0.5)) 656 { 657 delete m; 658 iter = curr_list.erase(iter); 659 iter --; 660 } 661 } 662} 663 664 665 666/* 667 * Performs exhaustive search across different sub-array sizes, 668 * wire types and aspect ratios to find an optimal UCA organization 669 * 1. First different valid tag array organizations are calculated 670 * and stored in tag_arr array 671 * 2. The exhaustive search is repeated to find valid data array 672 * organizations and stored in data_arr array 673 * 3. Cache area, delay, power, and cycle time for different 674 * cache organizations are calculated based on the 675 * above results 676 * 4. Cache model with least cost is picked from sol_list 677 */ 678void solve(uca_org_t *fin_res) 679{ 680 bool is_dram = false; 681 int pure_ram = g_ip->pure_ram; 682 bool pure_cam = g_ip->pure_cam; 683 684 init_tech_params(g_ip->F_sz_um, false); 685 686 687 list<mem_array *> tag_arr (0); 688 list<mem_array *> data_arr(0); 689 list<mem_array *>::iterator miter; 690 list<uca_org_t> sol_list(1, uca_org_t()); 691 692 fin_res->tag_array.access_time = 0; 693 fin_res->tag_array.Ndwl = 0; 694 fin_res->tag_array.Ndbl = 0; 695 fin_res->tag_array.Nspd = 0; 696 fin_res->tag_array.deg_bl_muxing = 0; 697 fin_res->tag_array.Ndsam_lev_1 = 0; 698 fin_res->tag_array.Ndsam_lev_2 = 0; 699 700 701 // distribute calculate_time() execution to multiple threads 702 calc_time_mt_wrapper_struct * calc_array = new calc_time_mt_wrapper_struct[nthreads]; 703 pthread_t threads[nthreads]; 704 705 for (uint32_t t = 0; t < nthreads; t++) 706 { 707 calc_array[t].tid = t; 708 calc_array[t].pure_ram = pure_ram; 709 calc_array[t].pure_cam = pure_cam; 710 calc_array[t].data_res = new min_values_t(); 711 calc_array[t].tag_res = new min_values_t(); 712 } 713 714 bool is_tag; 715 uint32_t ram_cell_tech_type; 716 717 // If it's a cache, first calculate the area, delay and power for all tag array partitions. 718 if (!(pure_ram||pure_cam||g_ip->fully_assoc)) 719 { //cache 720 is_tag = true; 721 ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type; 722 is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); 723 init_tech_params(g_ip->F_sz_um, is_tag); 724 725 for (uint32_t t = 0; t < nthreads; t++) 726 { 727 calc_array[t].is_tag = is_tag; 728 calc_array[t].is_main_mem = false; 729 calc_array[t].Nspd_min = 0.125; 730 pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t]))); 731 } 732 733 for (uint32_t t = 0; t < nthreads; t++) 734 { 735 pthread_join(threads[t], NULL); 736 } 737 738 for (uint32_t t = 0; t < nthreads; t++) 739 { 740 calc_array[t].data_arr.sort(mem_array::lt); 741 data_arr.merge(calc_array[t].data_arr, mem_array::lt); 742 calc_array[t].tag_arr.sort(mem_array::lt); 743 tag_arr.merge(calc_array[t].tag_arr, mem_array::lt); 744 } 745 } 746 747 748 // calculate the area, delay and power for all data array partitions (for cache or plain RAM). 749// if (!g_ip->fully_assoc) 750// {//in the new cacti, cam, fully_associative cache are processed as single array in the data portion 751 is_tag = false; 752 ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type; 753 is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); 754 init_tech_params(g_ip->F_sz_um, is_tag); 755 756 for (uint32_t t = 0; t < nthreads; t++) 757 { 758 calc_array[t].is_tag = is_tag; 759 calc_array[t].is_main_mem = g_ip->is_main_mem; 760 if (!(pure_cam||g_ip->fully_assoc)) 761 { 762 calc_array[t].Nspd_min = (double)(g_ip->out_w)/(double)(g_ip->block_sz*8); 763 } 764 else 765 { 766 calc_array[t].Nspd_min = 1; 767 } 768 769 pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t]))); 770 } 771 772 for (uint32_t t = 0; t < nthreads; t++) 773 { 774 pthread_join(threads[t], NULL); 775 } 776 777 data_arr.clear(); 778 for (uint32_t t = 0; t < nthreads; t++) 779 { 780 calc_array[t].data_arr.sort(mem_array::lt); 781 data_arr.merge(calc_array[t].data_arr, mem_array::lt); 782 } 783// } 784 785 786 min_values_t * d_min = new min_values_t(); 787 min_values_t * t_min = new min_values_t(); 788 min_values_t * cache_min = new min_values_t(); 789 790 for (uint32_t t = 0; t < nthreads; t++) 791 { 792 d_min->update_min_values(calc_array[t].data_res); 793 t_min->update_min_values(calc_array[t].tag_res); 794 } 795 796 for (miter = data_arr.begin(); miter != data_arr.end(); miter++) 797 { 798 (*miter)->arr_min = d_min; 799 } 800 801 802 //cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n"; 803 filter_data_arr(data_arr); 804 if(!(pure_ram||pure_cam||g_ip->fully_assoc)) 805 { 806 filter_tag_arr(t_min, tag_arr); 807 } 808 //cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n"; 809 810 811 if (pure_ram||pure_cam||g_ip->fully_assoc) 812 { 813 for (miter = data_arr.begin(); miter != data_arr.end(); miter++) 814 { 815 uca_org_t & curr_org = sol_list.back(); 816 curr_org.tag_array2 = NULL; 817 curr_org.data_array2 = (*miter); 818 819 curr_org.find_delay(); 820 curr_org.find_energy(); 821 curr_org.find_area(); 822 curr_org.find_cyc(); 823 824 //update min values for the entire cache 825 cache_min->update_min_values(curr_org); 826 827 sol_list.push_back(uca_org_t()); 828 } 829 } 830 else 831 { 832 while (tag_arr.empty() != true) 833 { 834 mem_array * arr_temp = (tag_arr.back()); 835 //delete tag_arr.back(); 836 tag_arr.pop_back(); 837 838 for (miter = data_arr.begin(); miter != data_arr.end(); miter++) 839 { 840 uca_org_t & curr_org = sol_list.back(); 841 curr_org.tag_array2 = arr_temp; 842 curr_org.data_array2 = (*miter); 843 844 curr_org.find_delay(); 845 curr_org.find_energy(); 846 curr_org.find_area(); 847 curr_org.find_cyc(); 848 849 //update min values for the entire cache 850 cache_min->update_min_values(curr_org); 851 852 sol_list.push_back(uca_org_t()); 853 } 854 } 855 } 856 857 sol_list.pop_back(); 858 859 find_optimal_uca(fin_res, cache_min, sol_list); 860 861 sol_list.clear(); 862 863 for (miter = data_arr.begin(); miter != data_arr.end(); ++miter) 864 { 865 if (*miter != fin_res->data_array2) 866 { 867 delete *miter; 868 } 869 } 870 data_arr.clear(); 871 872 for (uint32_t t = 0; t < nthreads; t++) 873 { 874 delete calc_array[t].data_res; 875 delete calc_array[t].tag_res; 876 } 877 878 delete [] calc_array; 879 delete cache_min; 880 delete d_min; 881 delete t_min; 882} 883 884void update(uca_org_t *fin_res) 885{ 886 if(fin_res->tag_array2) 887 { 888 init_tech_params(g_ip->F_sz_um,true); 889 DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem); 890 if(tag_arr_dyn_p.is_valid) 891 { 892 UCA * tag_arr = new UCA(tag_arr_dyn_p); 893 fin_res->tag_array2->power = tag_arr->power; 894 } 895 else 896 { 897 cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl; 898 exit(1); 899 } 900 } 901 init_tech_params(g_ip->F_sz_um,false); 902 DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem); 903 if(data_arr_dyn_p.is_valid) 904 { 905 UCA * data_arr = new UCA(data_arr_dyn_p); 906 fin_res->data_array2->power = data_arr->power; 907 } 908 else 909 { 910 cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl; 911 exit(1); 912 } 913 914 fin_res->find_energy(); 915} 916 917