cacti_interface.h revision 10234
1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. 5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. 6 * All Rights Reserved 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are 10 * met: redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer; 12 * redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution; 15 * neither the name of the copyright holders nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 ***************************************************************************/ 32 33 34 35#ifndef __CACTI_INTERFACE_H__ 36#define __CACTI_INTERFACE_H__ 37 38#include <iostream> 39#include <list> 40#include <map> 41#include <string> 42#include <vector> 43 44#include "const.h" 45 46using namespace std; 47 48 49class min_values_t; 50class mem_array; 51class uca_org_t; 52 53 54class powerComponents { 55public: 56 double dynamic; 57 double leakage; 58 double gate_leakage; 59 double short_circuit; 60 double longer_channel_leakage; 61 62 powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { } 63 powerComponents(const powerComponents & obj) { 64 *this = obj; 65 } 66 powerComponents & operator=(const powerComponents & rhs) { 67 dynamic = rhs.dynamic; 68 leakage = rhs.leakage; 69 gate_leakage = rhs.gate_leakage; 70 short_circuit = rhs.short_circuit; 71 longer_channel_leakage = rhs.longer_channel_leakage; 72 return *this; 73 } 74 void reset() { 75 dynamic = 0; 76 leakage = 0; 77 gate_leakage = 0; 78 short_circuit = 0; 79 longer_channel_leakage = 0; 80 } 81 82 friend powerComponents operator+(const powerComponents & x, const powerComponents & y); 83 friend powerComponents operator*(const powerComponents & x, double const * const y); 84}; 85 86 87 88class powerDef { 89public: 90 powerComponents readOp; 91 powerComponents writeOp; 92 powerComponents searchOp;//Sheng: for CAM and FA 93 94 powerDef() : readOp(), writeOp(), searchOp() { } 95 void reset() { 96 readOp.reset(); 97 writeOp.reset(); 98 searchOp.reset(); 99 } 100 101 friend powerDef operator+(const powerDef & x, const powerDef & y); 102 friend powerDef operator*(const powerDef & x, double const * const y); 103}; 104 105enum Wire_type { 106 Global /* gloabl wires with repeaters */, 107 Global_5 /* 5% delay penalty */, 108 Global_10 /* 10% delay penalty */, 109 Global_20 /* 20% delay penalty */, 110 Global_30 /* 30% delay penalty */, 111 Low_swing /* differential low power wires with high area overhead */, 112 Semi_global /* mid-level wires with repeaters*/, 113 Transmission /* tranmission lines with high area overhead */, 114 Optical /* optical wires */, 115 Invalid_wtype 116}; 117 118 119 120class InputParameter { 121public: 122 void parse_cfg(const string & infile); 123 124 // return false if the input parameters are problematic 125 bool error_checking(string name = "CACTI"); 126 void display_ip(); 127 128 unsigned int cache_sz; // in bytes 129 unsigned int line_sz; 130 unsigned int assoc; 131 unsigned int nbanks; 132 unsigned int out_w;// == nr_bits_out 133 bool specific_tag; 134 unsigned int tag_w; 135 unsigned int access_mode; 136 unsigned int obj_func_dyn_energy; 137 unsigned int obj_func_dyn_power; 138 unsigned int obj_func_leak_power; 139 unsigned int obj_func_cycle_t; 140 141 double F_sz_nm; // feature size in nm 142 double F_sz_um; // feature size in um 143 unsigned int num_rw_ports; 144 unsigned int num_rd_ports; 145 unsigned int num_wr_ports; 146 unsigned int num_se_rd_ports; // number of single ended read ports 147 unsigned int num_search_ports; // Sheng: number of search ports for CAM 148 bool is_main_mem; 149 bool is_cache; 150 bool pure_ram; 151 bool pure_cam; 152 bool rpters_in_htree; // if there are repeaters in htree segment 153 unsigned int ver_htree_wires_over_array; 154 unsigned int broadcast_addr_din_over_ver_htrees; 155 unsigned int temp; 156 157 unsigned int ram_cell_tech_type; 158 unsigned int peri_global_tech_type; 159 unsigned int data_arr_ram_cell_tech_type; 160 unsigned int data_arr_peri_global_tech_type; 161 unsigned int tag_arr_ram_cell_tech_type; 162 unsigned int tag_arr_peri_global_tech_type; 163 164 unsigned int burst_len; 165 unsigned int int_prefetch_w; 166 unsigned int page_sz_bits; 167 168 unsigned int ic_proj_type; // interconnect_projection_type 169 unsigned int wire_is_mat_type; // wire_inside_mat_type 170 unsigned int wire_os_mat_type; // wire_outside_mat_type 171 enum Wire_type wt; 172 int force_wiretype; 173 bool print_input_args; 174 unsigned int nuca_cache_sz; // TODO 175 int ndbl, ndwl, nspd, ndsam1, ndsam2, ndcm; 176 bool force_cache_config; 177 178 int cache_level; 179 int cores; 180 int nuca_bank_count; 181 int force_nuca_bank; 182 183 int delay_wt, dynamic_power_wt, leakage_power_wt, 184 cycle_time_wt, area_wt; 185 int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca, 186 cycle_time_wt_nuca, area_wt_nuca; 187 188 int delay_dev, dynamic_power_dev, leakage_power_dev, 189 cycle_time_dev, area_dev; 190 int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca, 191 cycle_time_dev_nuca, area_dev_nuca; 192 int ed; //ED or ED2 optimization 193 int nuca; 194 195 bool fast_access; 196 unsigned int block_sz; // bytes 197 unsigned int tag_assoc; 198 unsigned int data_assoc; 199 bool is_seq_acc; 200 bool fully_assoc; 201 unsigned int nsets; // == number_of_sets 202 int print_detail; 203 204 205 bool add_ecc_b_; 206 //parameters for design constraint 207 double throughput; 208 double latency; 209 bool pipelinable; 210 int pipeline_stages; 211 int per_stage_vector; 212 bool with_clock_grid; 213}; 214 215 216typedef struct { 217 int Ndwl; 218 int Ndbl; 219 double Nspd; 220 int deg_bl_muxing; 221 int Ndsam_lev_1; 222 int Ndsam_lev_2; 223 int number_activated_mats_horizontal_direction; 224 int number_subbanks; 225 int page_size_in_bits; 226 double delay_route_to_bank; 227 double delay_crossbar; 228 double delay_addr_din_horizontal_htree; 229 double delay_addr_din_vertical_htree; 230 double delay_row_predecode_driver_and_block; 231 double delay_row_decoder; 232 double delay_bitlines; 233 double delay_sense_amp; 234 double delay_subarray_output_driver; 235 double delay_bit_mux_predecode_driver_and_block; 236 double delay_bit_mux_decoder; 237 double delay_senseamp_mux_lev_1_predecode_driver_and_block; 238 double delay_senseamp_mux_lev_1_decoder; 239 double delay_senseamp_mux_lev_2_predecode_driver_and_block; 240 double delay_senseamp_mux_lev_2_decoder; 241 double delay_input_htree; 242 double delay_output_htree; 243 double delay_dout_vertical_htree; 244 double delay_dout_horizontal_htree; 245 double delay_comparator; 246 double access_time; 247 double cycle_time; 248 double multisubbank_interleave_cycle_time; 249 double delay_request_network; 250 double delay_inside_mat; 251 double delay_reply_network; 252 double trcd; 253 double cas_latency; 254 double precharge_delay; 255 powerDef power_routing_to_bank; 256 powerDef power_addr_input_htree; 257 powerDef power_data_input_htree; 258 powerDef power_data_output_htree; 259 powerDef power_addr_horizontal_htree; 260 powerDef power_datain_horizontal_htree; 261 powerDef power_dataout_horizontal_htree; 262 powerDef power_addr_vertical_htree; 263 powerDef power_datain_vertical_htree; 264 powerDef power_row_predecoder_drivers; 265 powerDef power_row_predecoder_blocks; 266 powerDef power_row_decoders; 267 powerDef power_bit_mux_predecoder_drivers; 268 powerDef power_bit_mux_predecoder_blocks; 269 powerDef power_bit_mux_decoders; 270 powerDef power_senseamp_mux_lev_1_predecoder_drivers; 271 powerDef power_senseamp_mux_lev_1_predecoder_blocks; 272 powerDef power_senseamp_mux_lev_1_decoders; 273 powerDef power_senseamp_mux_lev_2_predecoder_drivers; 274 powerDef power_senseamp_mux_lev_2_predecoder_blocks; 275 powerDef power_senseamp_mux_lev_2_decoders; 276 powerDef power_bitlines; 277 powerDef power_sense_amps; 278 powerDef power_prechg_eq_drivers; 279 powerDef power_output_drivers_at_subarray; 280 powerDef power_dataout_vertical_htree; 281 powerDef power_comparators; 282 powerDef power_crossbar; 283 powerDef total_power; 284 double area; 285 double all_banks_height; 286 double all_banks_width; 287 double bank_height; 288 double bank_width; 289 double subarray_memory_cell_area_height; 290 double subarray_memory_cell_area_width; 291 double mat_height; 292 double mat_width; 293 double routing_area_height_within_bank; 294 double routing_area_width_within_bank; 295 double area_efficiency; 296 double refresh_power; 297 double dram_refresh_period; 298 double dram_array_availability; 299 double dyn_read_energy_from_closed_page; 300 double dyn_read_energy_from_open_page; 301 double leak_power_subbank_closed_page; 302 double leak_power_subbank_open_page; 303 double leak_power_request_and_reply_networks; 304 double activate_energy; 305 double read_energy; 306 double write_energy; 307 double precharge_energy; 308} results_mem_array; 309 310 311class uca_org_t { 312public: 313 mem_array * tag_array2; 314 mem_array * data_array2; 315 double access_time; 316 double cycle_time; 317 double area; 318 double area_efficiency; 319 powerDef power; 320 double leak_power_with_sleep_transistors_in_mats; 321 double cache_ht; 322 double cache_len; 323 char file_n[100]; 324 double vdd_periph_global; 325 bool valid; 326 results_mem_array tag_array; 327 results_mem_array data_array; 328 329 uca_org_t(); 330 void find_delay(); 331 void find_energy(); 332 void find_area(); 333 void find_cyc(); 334 void adjust_area();//for McPAT only to adjust routing overhead 335 void cleanup(); 336 ~uca_org_t() {}; 337}; 338 339void reconfigure(InputParameter *local_interface, uca_org_t *fin_res); 340 341uca_org_t cacti_interface(const string & infile_name); 342//McPAT's plain interface, please keep !!! 343uca_org_t cacti_interface(InputParameter * const local_interface); 344//McPAT's plain interface, please keep !!! 345uca_org_t init_interface(InputParameter * const local_interface, 346 const string &name); 347//McPAT's plain interface, please keep !!! 348uca_org_t cacti_interface( 349 int cache_size, 350 int line_size, 351 int associativity, 352 int rw_ports, 353 int excl_read_ports, 354 int excl_write_ports, 355 int single_ended_read_ports, 356 int search_ports, 357 int banks, 358 double tech_node, 359 int output_width, 360 int specific_tag, 361 int tag_width, 362 int access_mode, 363 int cache, 364 int main_mem, 365 int obj_func_delay, 366 int obj_func_dynamic_power, 367 int obj_func_leakage_power, 368 int obj_func_cycle_time, 369 int obj_func_area, 370 int dev_func_delay, 371 int dev_func_dynamic_power, 372 int dev_func_leakage_power, 373 int dev_func_area, 374 int dev_func_cycle_time, 375 int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate 376 int temp, 377 int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing 378 int data_arr_ram_cell_tech_flavor_in, 379 int data_arr_peri_global_tech_flavor_in, 380 int tag_arr_ram_cell_tech_flavor_in, 381 int tag_arr_peri_global_tech_flavor_in, 382 int interconnect_projection_type_in, 383 int wire_inside_mat_type_in, 384 int wire_outside_mat_type_in, 385 int REPEATERS_IN_HTREE_SEGMENTS_in, 386 int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, 387 int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, 388 int PAGE_SIZE_BITS_in, 389 int BURST_LENGTH_in, 390 int INTERNAL_PREFETCH_WIDTH_in, 391 int force_wiretype, 392 int wiretype, 393 int force_config, 394 int ndwl, 395 int ndbl, 396 int nspd, 397 int ndcm, 398 int ndsam1, 399 int ndsam2, 400 int ecc); 401 402//Naveen's interface 403uca_org_t cacti_interface( 404 int cache_size, 405 int line_size, 406 int associativity, 407 int rw_ports, 408 int excl_read_ports, 409 int excl_write_ports, 410 int single_ended_read_ports, 411 int banks, 412 double tech_node, 413 int page_sz, 414 int burst_length, 415 int pre_width, 416 int output_width, 417 int specific_tag, 418 int tag_width, 419 int access_mode, //0 normal, 1 seq, 2 fast 420 int cache, //scratch ram or cache 421 int main_mem, 422 int obj_func_delay, 423 int obj_func_dynamic_power, 424 int obj_func_leakage_power, 425 int obj_func_area, 426 int obj_func_cycle_time, 427 int dev_func_delay, 428 int dev_func_dynamic_power, 429 int dev_func_leakage_power, 430 int dev_func_area, 431 int dev_func_cycle_time, 432 int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate 433 int temp, 434 int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing 435 int data_arr_ram_cell_tech_flavor_in, 436 int data_arr_peri_global_tech_flavor_in, 437 int tag_arr_ram_cell_tech_flavor_in, 438 int tag_arr_peri_global_tech_flavor_in, 439 int interconnect_projection_type_in, // 0 - aggressive, 1 - normal 440 int wire_inside_mat_type_in, 441 int wire_outside_mat_type_in, 442 int is_nuca, // 0 - UCA, 1 - NUCA 443 int core_count, 444 int cache_level, // 0 - L2, 1 - L3 445 int nuca_bank_count, 446 int nuca_obj_func_delay, 447 int nuca_obj_func_dynamic_power, 448 int nuca_obj_func_leakage_power, 449 int nuca_obj_func_area, 450 int nuca_obj_func_cycle_time, 451 int nuca_dev_func_delay, 452 int nuca_dev_func_dynamic_power, 453 int nuca_dev_func_leakage_power, 454 int nuca_dev_func_area, 455 int nuca_dev_func_cycle_time, 456 int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported 457 int p_input); 458 459class mem_array { 460public: 461 int Ndcm; 462 int Ndwl; 463 int Ndbl; 464 double Nspd; 465 int deg_bl_muxing; 466 int Ndsam_lev_1; 467 int Ndsam_lev_2; 468 double access_time; 469 double cycle_time; 470 double multisubbank_interleave_cycle_time; 471 double area_ram_cells; 472 double area; 473 powerDef power; 474 double delay_senseamp_mux_decoder; 475 double delay_before_subarray_output_driver; 476 double delay_from_subarray_output_driver_to_output; 477 double height; 478 double width; 479 480 double mat_height; 481 double mat_length; 482 double subarray_length; 483 double subarray_height; 484 485 double delay_route_to_bank, 486 delay_input_htree, 487 delay_row_predecode_driver_and_block, 488 delay_row_decoder, 489 delay_bitlines, 490 delay_sense_amp, 491 delay_subarray_output_driver, 492 delay_dout_htree, 493 delay_comparator, 494 delay_matchlines; 495 496 double all_banks_height, 497 all_banks_width, 498 area_efficiency; 499 500 powerDef power_routing_to_bank; 501 powerDef power_addr_input_htree; 502 powerDef power_data_input_htree; 503 powerDef power_data_output_htree; 504 powerDef power_htree_in_search; 505 powerDef power_htree_out_search; 506 powerDef power_row_predecoder_drivers; 507 powerDef power_row_predecoder_blocks; 508 powerDef power_row_decoders; 509 powerDef power_bit_mux_predecoder_drivers; 510 powerDef power_bit_mux_predecoder_blocks; 511 powerDef power_bit_mux_decoders; 512 powerDef power_senseamp_mux_lev_1_predecoder_drivers; 513 powerDef power_senseamp_mux_lev_1_predecoder_blocks; 514 powerDef power_senseamp_mux_lev_1_decoders; 515 powerDef power_senseamp_mux_lev_2_predecoder_drivers; 516 powerDef power_senseamp_mux_lev_2_predecoder_blocks; 517 powerDef power_senseamp_mux_lev_2_decoders; 518 powerDef power_bitlines; 519 powerDef power_sense_amps; 520 powerDef power_prechg_eq_drivers; 521 powerDef power_output_drivers_at_subarray; 522 powerDef power_dataout_vertical_htree; 523 powerDef power_comparators; 524 525 powerDef power_cam_bitline_precharge_eq_drv; 526 powerDef power_searchline; 527 powerDef power_searchline_precharge; 528 powerDef power_matchlines; 529 powerDef power_matchline_precharge; 530 powerDef power_matchline_to_wordline_drv; 531 532 min_values_t *arr_min; 533 enum Wire_type wt; 534 535 // dram stats 536 double activate_energy, read_energy, write_energy, precharge_energy, 537 refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page, 538 leak_power_request_and_reply_networks; 539 540 double precharge_delay; 541 542 static bool lt(const mem_array * m1, const mem_array * m2); 543}; 544 545 546#endif 547