1/*****************************************************************************
2 *                                McPAT/CACTI
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
6 *                          All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 ***************************************************************************/
32
33
34
35#ifndef __CACTI_INTERFACE_H__
36#define __CACTI_INTERFACE_H__
37
38#include <iostream>
39#include <list>
40#include <map>
41#include <string>
42#include <vector>
43
44#include "const.h"
45
46using namespace std;
47
48
49class min_values_t;
50class mem_array;
51class uca_org_t;
52
53
54class powerComponents {
55public:
56    double dynamic;
57    double leakage;
58    double gate_leakage;
59    double short_circuit;
60    double longer_channel_leakage;
61
62    powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0)  { }
63    powerComponents(const powerComponents & obj) {
64        *this = obj;
65    }
66    powerComponents & operator=(const powerComponents & rhs) {
67        dynamic = rhs.dynamic;
68        leakage = rhs.leakage;
69        gate_leakage  = rhs.gate_leakage;
70        short_circuit = rhs.short_circuit;
71        longer_channel_leakage = rhs.longer_channel_leakage;
72        return *this;
73    }
74    void reset() {
75        dynamic = 0;
76        leakage = 0;
77        gate_leakage = 0;
78        short_circuit = 0;
79        longer_channel_leakage = 0;
80    }
81
82    friend powerComponents operator+(const powerComponents & x, const powerComponents & y);
83    friend powerComponents operator*(const powerComponents & x, double const * const y);
84};
85
86
87
88class powerDef {
89public:
90    powerComponents readOp;
91    powerComponents writeOp;
92    powerComponents searchOp;//Sheng: for CAM and FA
93
94    powerDef() : readOp(), writeOp(), searchOp() { }
95    void reset() {
96        readOp.reset();
97        writeOp.reset();
98        searchOp.reset();
99    }
100
101    friend powerDef operator+(const powerDef & x, const powerDef & y);
102    friend powerDef operator*(const powerDef & x, double const * const y);
103};
104
105enum Wire_type {
106    Global /* gloabl wires with repeaters */,
107    Global_5 /* 5% delay penalty */,
108    Global_10 /* 10% delay penalty */,
109    Global_20 /* 20% delay penalty */,
110    Global_30 /* 30% delay penalty */,
111    Low_swing /* differential low power wires with high area overhead */,
112    Semi_global /* mid-level wires with repeaters*/,
113    Transmission /* tranmission lines with high area overhead */,
114    Optical /* optical wires */,
115    Invalid_wtype
116};
117
118
119
120class InputParameter {
121public:
122    void parse_cfg(const string & infile);
123
124    // return false if the input parameters are problematic
125    bool error_checking(string name = "CACTI");
126    void display_ip();
127
128    unsigned int cache_sz;  // in bytes
129    unsigned int line_sz;
130    unsigned int assoc;
131    unsigned int nbanks;
132    unsigned int out_w;// == nr_bits_out
133    bool     specific_tag;
134    unsigned int tag_w;
135    unsigned int access_mode;
136    unsigned int obj_func_dyn_energy;
137    unsigned int obj_func_dyn_power;
138    unsigned int obj_func_leak_power;
139    unsigned int obj_func_cycle_t;
140
141    double   F_sz_nm;          // feature size in nm
142    double   F_sz_um;          // feature size in um
143    unsigned int num_rw_ports;
144    unsigned int num_rd_ports;
145    unsigned int num_wr_ports;
146    unsigned int num_se_rd_ports;  // number of single ended read ports
147    unsigned int num_search_ports;  // Sheng: number of search ports for CAM
148    bool     is_main_mem;
149    bool     is_cache;
150    bool     pure_ram;
151    bool     pure_cam;
152    bool     rpters_in_htree;  // if there are repeaters in htree segment
153    unsigned int ver_htree_wires_over_array;
154    unsigned int broadcast_addr_din_over_ver_htrees;
155    unsigned int temp;
156
157    unsigned int ram_cell_tech_type;
158    unsigned int peri_global_tech_type;
159    unsigned int data_arr_ram_cell_tech_type;
160    unsigned int data_arr_peri_global_tech_type;
161    unsigned int tag_arr_ram_cell_tech_type;
162    unsigned int tag_arr_peri_global_tech_type;
163
164    unsigned int burst_len;
165    unsigned int int_prefetch_w;
166    unsigned int page_sz_bits;
167
168    unsigned int ic_proj_type;      // interconnect_projection_type
169    unsigned int wire_is_mat_type;  // wire_inside_mat_type
170    unsigned int wire_os_mat_type; // wire_outside_mat_type
171    enum Wire_type wt;
172    int force_wiretype;
173    bool print_input_args;
174    unsigned int nuca_cache_sz; // TODO
175    int ndbl, ndwl, nspd, ndsam1, ndsam2, ndcm;
176    bool force_cache_config;
177
178    int cache_level;
179    int cores;
180    int nuca_bank_count;
181    int force_nuca_bank;
182
183    int delay_wt, dynamic_power_wt, leakage_power_wt,
184    cycle_time_wt, area_wt;
185    int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca,
186    cycle_time_wt_nuca, area_wt_nuca;
187
188    int delay_dev, dynamic_power_dev, leakage_power_dev,
189    cycle_time_dev, area_dev;
190    int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca,
191    cycle_time_dev_nuca, area_dev_nuca;
192    int ed; //ED or ED2 optimization
193    int nuca;
194
195    bool     fast_access;
196    unsigned int block_sz;  // bytes
197    unsigned int tag_assoc;
198    unsigned int data_assoc;
199    bool     is_seq_acc;
200    bool     fully_assoc;
201    unsigned int nsets;  // == number_of_sets
202    int print_detail;
203
204
205    bool     add_ecc_b_;
206    //parameters for design constraint
207    double throughput;
208    double latency;
209    bool pipelinable;
210    int pipeline_stages;
211    int per_stage_vector;
212    bool with_clock_grid;
213};
214
215
216typedef struct {
217    int Ndwl;
218    int Ndbl;
219    double Nspd;
220    int deg_bl_muxing;
221    int Ndsam_lev_1;
222    int Ndsam_lev_2;
223    int number_activated_mats_horizontal_direction;
224    int number_subbanks;
225    int page_size_in_bits;
226    double delay_route_to_bank;
227    double delay_crossbar;
228    double delay_addr_din_horizontal_htree;
229    double delay_addr_din_vertical_htree;
230    double delay_row_predecode_driver_and_block;
231    double delay_row_decoder;
232    double delay_bitlines;
233    double delay_sense_amp;
234    double delay_subarray_output_driver;
235    double delay_bit_mux_predecode_driver_and_block;
236    double delay_bit_mux_decoder;
237    double delay_senseamp_mux_lev_1_predecode_driver_and_block;
238    double delay_senseamp_mux_lev_1_decoder;
239    double delay_senseamp_mux_lev_2_predecode_driver_and_block;
240    double delay_senseamp_mux_lev_2_decoder;
241    double delay_input_htree;
242    double delay_output_htree;
243    double delay_dout_vertical_htree;
244    double delay_dout_horizontal_htree;
245    double delay_comparator;
246    double access_time;
247    double cycle_time;
248    double multisubbank_interleave_cycle_time;
249    double delay_request_network;
250    double delay_inside_mat;
251    double delay_reply_network;
252    double trcd;
253    double cas_latency;
254    double precharge_delay;
255    powerDef power_routing_to_bank;
256    powerDef power_addr_input_htree;
257    powerDef power_data_input_htree;
258    powerDef power_data_output_htree;
259    powerDef power_addr_horizontal_htree;
260    powerDef power_datain_horizontal_htree;
261    powerDef power_dataout_horizontal_htree;
262    powerDef power_addr_vertical_htree;
263    powerDef power_datain_vertical_htree;
264    powerDef power_row_predecoder_drivers;
265    powerDef power_row_predecoder_blocks;
266    powerDef power_row_decoders;
267    powerDef power_bit_mux_predecoder_drivers;
268    powerDef power_bit_mux_predecoder_blocks;
269    powerDef power_bit_mux_decoders;
270    powerDef power_senseamp_mux_lev_1_predecoder_drivers;
271    powerDef power_senseamp_mux_lev_1_predecoder_blocks;
272    powerDef power_senseamp_mux_lev_1_decoders;
273    powerDef power_senseamp_mux_lev_2_predecoder_drivers;
274    powerDef power_senseamp_mux_lev_2_predecoder_blocks;
275    powerDef power_senseamp_mux_lev_2_decoders;
276    powerDef power_bitlines;
277    powerDef power_sense_amps;
278    powerDef power_prechg_eq_drivers;
279    powerDef power_output_drivers_at_subarray;
280    powerDef power_dataout_vertical_htree;
281    powerDef power_comparators;
282    powerDef power_crossbar;
283    powerDef total_power;
284    double area;
285    double all_banks_height;
286    double all_banks_width;
287    double bank_height;
288    double bank_width;
289    double subarray_memory_cell_area_height;
290    double subarray_memory_cell_area_width;
291    double mat_height;
292    double mat_width;
293    double routing_area_height_within_bank;
294    double routing_area_width_within_bank;
295    double area_efficiency;
296    double refresh_power;
297    double dram_refresh_period;
298    double dram_array_availability;
299    double dyn_read_energy_from_closed_page;
300    double dyn_read_energy_from_open_page;
301    double leak_power_subbank_closed_page;
302    double leak_power_subbank_open_page;
303    double leak_power_request_and_reply_networks;
304    double activate_energy;
305    double read_energy;
306    double write_energy;
307    double precharge_energy;
308} results_mem_array;
309
310
311class uca_org_t {
312public:
313    mem_array * tag_array2;
314    mem_array * data_array2;
315    double access_time;
316    double cycle_time;
317    double area;
318    double area_efficiency;
319    powerDef power;
320    double leak_power_with_sleep_transistors_in_mats;
321    double cache_ht;
322    double cache_len;
323    char file_n[100];
324    double vdd_periph_global;
325    bool valid;
326    results_mem_array tag_array;
327    results_mem_array data_array;
328
329    uca_org_t();
330    void find_delay();
331    void find_energy();
332    void find_area();
333    void find_cyc();
334    void adjust_area();//for McPAT only to adjust routing overhead
335    void cleanup();
336    ~uca_org_t() {};
337};
338
339void reconfigure(InputParameter *local_interface, uca_org_t *fin_res);
340
341uca_org_t cacti_interface(const string & infile_name);
342//McPAT's plain interface, please keep !!!
343uca_org_t cacti_interface(InputParameter * const local_interface);
344//McPAT's plain interface, please keep !!!
345uca_org_t init_interface(InputParameter * const local_interface,
346                         const string &name);
347//McPAT's plain interface, please keep !!!
348uca_org_t cacti_interface(
349    int cache_size,
350    int line_size,
351    int associativity,
352    int rw_ports,
353    int excl_read_ports,
354    int excl_write_ports,
355    int single_ended_read_ports,
356    int search_ports,
357    int banks,
358    double tech_node,
359    int output_width,
360    int specific_tag,
361    int tag_width,
362    int access_mode,
363    int cache,
364    int main_mem,
365    int obj_func_delay,
366    int obj_func_dynamic_power,
367    int obj_func_leakage_power,
368    int obj_func_cycle_time,
369    int obj_func_area,
370    int dev_func_delay,
371    int dev_func_dynamic_power,
372    int dev_func_leakage_power,
373    int dev_func_area,
374    int dev_func_cycle_time,
375    int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
376    int temp,
377    int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
378    int data_arr_ram_cell_tech_flavor_in,
379    int data_arr_peri_global_tech_flavor_in,
380    int tag_arr_ram_cell_tech_flavor_in,
381    int tag_arr_peri_global_tech_flavor_in,
382    int interconnect_projection_type_in,
383    int wire_inside_mat_type_in,
384    int wire_outside_mat_type_in,
385    int REPEATERS_IN_HTREE_SEGMENTS_in,
386    int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
387    int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
388    int PAGE_SIZE_BITS_in,
389    int BURST_LENGTH_in,
390    int INTERNAL_PREFETCH_WIDTH_in,
391    int force_wiretype,
392    int wiretype,
393    int force_config,
394    int ndwl,
395    int ndbl,
396    int nspd,
397    int ndcm,
398    int ndsam1,
399    int ndsam2,
400    int ecc);
401
402//Naveen's interface
403uca_org_t cacti_interface(
404    int cache_size,
405    int line_size,
406    int associativity,
407    int rw_ports,
408    int excl_read_ports,
409    int excl_write_ports,
410    int single_ended_read_ports,
411    int banks,
412    double tech_node,
413    int page_sz,
414    int burst_length,
415    int pre_width,
416    int output_width,
417    int specific_tag,
418    int tag_width,
419    int access_mode, //0 normal, 1 seq, 2 fast
420    int cache, //scratch ram or cache
421    int main_mem,
422    int obj_func_delay,
423    int obj_func_dynamic_power,
424    int obj_func_leakage_power,
425    int obj_func_area,
426    int obj_func_cycle_time,
427    int dev_func_delay,
428    int dev_func_dynamic_power,
429    int dev_func_leakage_power,
430    int dev_func_area,
431    int dev_func_cycle_time,
432    int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
433    int temp,
434    int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
435    int data_arr_ram_cell_tech_flavor_in,
436    int data_arr_peri_global_tech_flavor_in,
437    int tag_arr_ram_cell_tech_flavor_in,
438    int tag_arr_peri_global_tech_flavor_in,
439    int interconnect_projection_type_in, // 0 - aggressive, 1 - normal
440    int wire_inside_mat_type_in,
441    int wire_outside_mat_type_in,
442    int is_nuca, // 0 - UCA, 1 - NUCA
443    int core_count,
444    int cache_level, // 0 - L2, 1 - L3
445    int nuca_bank_count,
446    int nuca_obj_func_delay,
447    int nuca_obj_func_dynamic_power,
448    int nuca_obj_func_leakage_power,
449    int nuca_obj_func_area,
450    int nuca_obj_func_cycle_time,
451    int nuca_dev_func_delay,
452    int nuca_dev_func_dynamic_power,
453    int nuca_dev_func_leakage_power,
454    int nuca_dev_func_area,
455    int nuca_dev_func_cycle_time,
456    int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
457    int p_input);
458
459class mem_array {
460public:
461    int    Ndcm;
462    int    Ndwl;
463    int    Ndbl;
464    double Nspd;
465    int    deg_bl_muxing;
466    int    Ndsam_lev_1;
467    int    Ndsam_lev_2;
468    double access_time;
469    double cycle_time;
470    double multisubbank_interleave_cycle_time;
471    double area_ram_cells;
472    double area;
473    powerDef power;
474    double delay_senseamp_mux_decoder;
475    double delay_before_subarray_output_driver;
476    double delay_from_subarray_output_driver_to_output;
477    double height;
478    double width;
479
480    double mat_height;
481    double mat_length;
482    double subarray_length;
483    double subarray_height;
484
485    double delay_route_to_bank,
486    delay_input_htree,
487    delay_row_predecode_driver_and_block,
488    delay_row_decoder,
489    delay_bitlines,
490    delay_sense_amp,
491    delay_subarray_output_driver,
492    delay_dout_htree,
493    delay_comparator,
494    delay_matchlines;
495
496    double all_banks_height,
497    all_banks_width,
498    area_efficiency;
499
500    powerDef power_routing_to_bank;
501    powerDef power_addr_input_htree;
502    powerDef power_data_input_htree;
503    powerDef power_data_output_htree;
504    powerDef power_htree_in_search;
505    powerDef power_htree_out_search;
506    powerDef power_row_predecoder_drivers;
507    powerDef power_row_predecoder_blocks;
508    powerDef power_row_decoders;
509    powerDef power_bit_mux_predecoder_drivers;
510    powerDef power_bit_mux_predecoder_blocks;
511    powerDef power_bit_mux_decoders;
512    powerDef power_senseamp_mux_lev_1_predecoder_drivers;
513    powerDef power_senseamp_mux_lev_1_predecoder_blocks;
514    powerDef power_senseamp_mux_lev_1_decoders;
515    powerDef power_senseamp_mux_lev_2_predecoder_drivers;
516    powerDef power_senseamp_mux_lev_2_predecoder_blocks;
517    powerDef power_senseamp_mux_lev_2_decoders;
518    powerDef power_bitlines;
519    powerDef power_sense_amps;
520    powerDef power_prechg_eq_drivers;
521    powerDef power_output_drivers_at_subarray;
522    powerDef power_dataout_vertical_htree;
523    powerDef power_comparators;
524
525    powerDef power_cam_bitline_precharge_eq_drv;
526    powerDef power_searchline;
527    powerDef power_searchline_precharge;
528    powerDef power_matchlines;
529    powerDef power_matchline_precharge;
530    powerDef power_matchline_to_wordline_drv;
531
532    min_values_t *arr_min;
533    enum Wire_type wt;
534
535    // dram stats
536    double activate_energy, read_energy, write_energy, precharge_energy,
537    refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page,
538    leak_power_request_and_reply_networks;
539
540    double precharge_delay;
541
542    static bool lt(const mem_array * m1, const mem_array * m2);
543};
544
545
546#endif
547