1/*****************************************************************************
2 *                                McPAT/CACTI
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
6 *                          All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 ***************************************************************************/
32
33
34
35#include <fstream>
36#include <iostream>
37#include <sstream>
38
39#include "Ucache.h"
40#include "arbiter.h"
41#include "area.h"
42#include "basic_circuit.h"
43#include "crossbar.h"
44#include "io.h"
45#include "nuca.h"
46#include "parameter.h"
47//#include "highradix.h"
48
49using namespace std;
50
51
52/* Parses "cache.cfg" file */
53void
54InputParameter::parse_cfg(const string & in_file) {
55    FILE *fp = fopen(in_file.c_str(), "r");
56    char line[5000];
57    char jk[5000];
58    char temp_var[5000];
59
60    if (!fp) {
61        cout << in_file << " is missing!\n";
62        exit(-1);
63    }
64
65    while (fscanf(fp, "%[^\n]\n", line) != EOF) {
66
67        if (!strncmp("-size", line, strlen("-size"))) {
68            sscanf(line, "-size %[(:-~)*]%u", jk, &(cache_sz));
69            continue;
70        }
71
72        if (!strncmp("-page size", line, strlen("-page size"))) {
73            sscanf(line, "-page size %[(:-~)*]%u", jk, &(page_sz_bits));
74            continue;
75        }
76
77        if (!strncmp("-burst length", line, strlen("-burst length"))) {
78            sscanf(line, "-burst %[(:-~)*]%u", jk, &(burst_len));
79            continue;
80        }
81
82        if (!strncmp("-internal prefetch width", line, strlen("-internal prefetch width"))) {
83            sscanf(line, "-internal prefetch %[(:-~)*]%u", jk, &(int_prefetch_w));
84            continue;
85        }
86
87        if (!strncmp("-block", line, strlen("-block"))) {
88            sscanf(line, "-block size (bytes) %d", &(line_sz));
89            continue;
90        }
91
92        if (!strncmp("-associativity", line, strlen("-associativity"))) {
93            sscanf(line, "-associativity %d", &(assoc));
94            continue;
95        }
96
97        if (!strncmp("-read-write", line, strlen("-read-write"))) {
98            sscanf(line, "-read-write port %d", &(num_rw_ports));
99            continue;
100        }
101
102        if (!strncmp("-exclusive read", line, strlen("exclusive read"))) {
103            sscanf(line, "-exclusive read port %d", &(num_rd_ports));
104            continue;
105        }
106
107        if (!strncmp("-exclusive write", line, strlen("-exclusive write"))) {
108            sscanf(line, "-exclusive write port %d", &(num_wr_ports));
109            continue;
110        }
111
112        if (!strncmp("-single ended", line, strlen("-single ended"))) {
113            sscanf(line, "-single %[(:-~)*]%d", jk,
114                   &(num_se_rd_ports));
115            continue;
116        }
117
118        if (!strncmp("-search", line, strlen("-search"))) {
119            sscanf(line, "-search port %d", &(num_search_ports));
120            continue;
121        }
122
123        if (!strncmp("-UCA bank", line, strlen("-UCA bank"))) {
124            sscanf(line, "-UCA bank%[((:-~)| )*]%d", jk, &(nbanks));
125            continue;
126        }
127
128        if (!strncmp("-technology", line, strlen("-technology"))) {
129            sscanf(line, "-technology (u) %lf", &(F_sz_um));
130            F_sz_nm = F_sz_um * 1000;
131            continue;
132        }
133
134        if (!strncmp("-output/input", line, strlen("-output/input"))) {
135            sscanf(line, "-output/input bus %[(:-~)*]%d", jk, &(out_w));
136            continue;
137        }
138
139        if (!strncmp("-operating temperature", line, strlen("-operating temperature"))) {
140            sscanf(line, "-operating temperature %[(:-~)*]%d", jk, &(temp));
141            continue;
142        }
143
144        if (!strncmp("-cache type", line, strlen("-cache type"))) {
145            sscanf(line, "-cache type%[^\"]\"%[^\"]\"", jk, temp_var);
146
147            if (!strncmp("cache", temp_var, sizeof("cache"))) {
148                is_cache = true;
149            } else {
150                is_cache = false;
151            }
152
153            if (!strncmp("main memory", temp_var, sizeof("main memory"))) {
154                is_main_mem = true;
155            } else {
156                is_main_mem = false;
157            }
158
159            if (!strncmp("cam", temp_var, sizeof("cam"))) {
160                pure_cam = true;
161            } else {
162                pure_cam = false;
163            }
164
165            if (!strncmp("ram", temp_var, sizeof("ram"))) {
166                pure_ram = true;
167            } else {
168                if (!is_main_mem)
169                    pure_ram = false;
170                else
171                    pure_ram = true;
172            }
173
174            continue;
175        }
176
177
178        if (!strncmp("-tag size", line, strlen("-tag size"))) {
179            sscanf(line, "-tag size%[^\"]\"%[^\"]\"", jk, temp_var);
180            if (!strncmp("default", temp_var, sizeof("default"))) {
181                specific_tag = false;
182                tag_w = 42; /* the acutal value is calculated
183                     * later based on the cache size, bank count, and associativity
184                     */
185            } else {
186                specific_tag = true;
187                sscanf(line, "-tag size (b) %d", &(tag_w));
188            }
189            continue;
190        }
191
192        if (!strncmp("-access mode", line, strlen("-access mode"))) {
193            sscanf(line, "-access %[^\"]\"%[^\"]\"", jk, temp_var);
194            if (!strncmp("fast", temp_var, strlen("fast"))) {
195                access_mode = 2;
196            } else if (!strncmp("sequential", temp_var, strlen("sequential"))) {
197                access_mode = 1;
198            } else if (!strncmp("normal", temp_var, strlen("normal"))) {
199                access_mode = 0;
200            } else {
201                cout << "ERROR: Invalid access mode!\n";
202                exit(0);
203            }
204            continue;
205        }
206
207        if (!strncmp("-Data array cell type", line,
208                     strlen("-Data array cell type"))) {
209            sscanf(line, "-Data array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
210
211            if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
212                data_arr_ram_cell_tech_type = 0;
213            } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
214                data_arr_ram_cell_tech_type = 1;
215            } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
216                data_arr_ram_cell_tech_type = 2;
217            } else if (!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
218                data_arr_ram_cell_tech_type = 3;
219            } else if (!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
220                data_arr_ram_cell_tech_type = 4;
221            } else {
222                cout << "ERROR: Invalid type!\n";
223                exit(0);
224            }
225            continue;
226        }
227
228        if (!strncmp("-Data array peripheral type", line, strlen("-Data array peripheral type"))) {
229            sscanf(line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
230
231            if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
232                data_arr_peri_global_tech_type = 0;
233            } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
234                data_arr_peri_global_tech_type = 1;
235            } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
236                data_arr_peri_global_tech_type = 2;
237            } else {
238                cout << "ERROR: Invalid type!\n";
239                exit(0);
240            }
241            continue;
242        }
243
244        if (!strncmp("-Tag array cell type", line, strlen("-Tag array cell type"))) {
245            sscanf(line, "-Tag array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
246
247            if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
248                tag_arr_ram_cell_tech_type = 0;
249            } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
250                tag_arr_ram_cell_tech_type = 1;
251            } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
252                tag_arr_ram_cell_tech_type = 2;
253            } else if (!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
254                tag_arr_ram_cell_tech_type = 3;
255            } else if (!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
256                tag_arr_ram_cell_tech_type = 4;
257            } else {
258                cout << "ERROR: Invalid type!\n";
259                exit(0);
260            }
261            continue;
262        }
263
264        if (!strncmp("-Tag array peripheral type", line, strlen("-Tag array peripheral type"))) {
265            sscanf(line, "-Tag array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
266
267            if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
268                tag_arr_peri_global_tech_type = 0;
269            } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
270                tag_arr_peri_global_tech_type = 1;
271            } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
272                tag_arr_peri_global_tech_type = 2;
273            } else {
274                cout << "ERROR: Invalid type!\n";
275                exit(0);
276            }
277            continue;
278        }
279        if (!strncmp("-design", line, strlen("-design"))) {
280            sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
281                   &(delay_wt), &(dynamic_power_wt),
282                   &(leakage_power_wt),
283                   &(cycle_time_wt), &(area_wt));
284            continue;
285        }
286
287        if (!strncmp("-deviate", line, strlen("-deviate"))) {
288            sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
289                   &(delay_dev), &(dynamic_power_dev),
290                   &(leakage_power_dev),
291                   &(cycle_time_dev), &(area_dev));
292            continue;
293        }
294
295        if (!strncmp("-Optimize", line, strlen("-Optimize"))) {
296            sscanf(line, "-Optimize  %[^\"]\"%[^\"]\"", jk, temp_var);
297
298            if (!strncmp("ED^2", temp_var, strlen("ED^2"))) {
299                ed = 2;
300            } else if (!strncmp("ED", temp_var, strlen("ED"))) {
301                ed = 1;
302            } else {
303                ed = 0;
304            }
305        }
306
307        if (!strncmp("-NUCAdesign", line, strlen("-NUCAdesign"))) {
308            sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
309                   &(delay_wt_nuca), &(dynamic_power_wt_nuca),
310                   &(leakage_power_wt_nuca),
311                   &(cycle_time_wt_nuca), &(area_wt_nuca));
312            continue;
313        }
314
315        if (!strncmp("-NUCAdeviate", line, strlen("-NUCAdeviate"))) {
316            sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
317                   &(delay_dev_nuca), &(dynamic_power_dev_nuca),
318                   &(leakage_power_dev_nuca),
319                   &(cycle_time_dev_nuca), &(area_dev_nuca));
320            continue;
321        }
322
323        if (!strncmp("-Cache model", line, strlen("-cache model"))) {
324            sscanf(line, "-Cache model %[^\"]\"%[^\"]\"", jk, temp_var);
325
326            if (!strncmp("UCA", temp_var, strlen("UCA"))) {
327                nuca = 0;
328            } else {
329                nuca = 1;
330            }
331            continue;
332        }
333
334        if (!strncmp("-NUCA bank", line, strlen("-NUCA bank"))) {
335            sscanf(line, "-NUCA bank count %d", &(nuca_bank_count));
336
337            if (nuca_bank_count != 0) {
338                force_nuca_bank = 1;
339            }
340            continue;
341        }
342
343        if (!strncmp("-Wire inside mat", line, strlen("-Wire inside mat"))) {
344            sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
345
346            if (!strncmp("global", temp_var, strlen("global"))) {
347                wire_is_mat_type = 2;
348                continue;
349            } else if (!strncmp("local", temp_var, strlen("local"))) {
350                wire_is_mat_type = 0;
351                continue;
352            } else {
353                wire_is_mat_type = 1;
354                continue;
355            }
356        }
357
358        if (!strncmp("-Wire outside mat", line, strlen("-Wire outside mat"))) {
359            sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
360
361            if (!strncmp("global", temp_var, strlen("global"))) {
362                wire_os_mat_type = 2;
363            } else {
364                wire_os_mat_type = 1;
365            }
366            continue;
367        }
368
369        if (!strncmp("-Interconnect projection", line, strlen("-Interconnect projection"))) {
370            sscanf(line, "-Interconnect projection%[^\"]\"%[^\"]\"", jk, temp_var);
371
372            if (!strncmp("aggressive", temp_var, strlen("aggressive"))) {
373                ic_proj_type = 0;
374            } else {
375                ic_proj_type = 1;
376            }
377            continue;
378        }
379
380        if (!strncmp("-Wire signalling", line, strlen("-wire signalling"))) {
381            sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
382
383            if (!strncmp("default", temp_var, strlen("default"))) {
384                force_wiretype = 0;
385                wt = Global;
386            } else if (!(strncmp("Global_10", temp_var, strlen("Global_10")))) {
387                force_wiretype = 1;
388                wt = Global_10;
389            } else if (!(strncmp("Global_20", temp_var, strlen("Global_20")))) {
390                force_wiretype = 1;
391                wt = Global_20;
392            } else if (!(strncmp("Global_30", temp_var, strlen("Global_30")))) {
393                force_wiretype = 1;
394                wt = Global_30;
395            } else if (!(strncmp("Global_5", temp_var, strlen("Global_5")))) {
396                force_wiretype = 1;
397                wt = Global_5;
398            } else if (!(strncmp("Global", temp_var, strlen("Global")))) {
399                force_wiretype = 1;
400                wt = Global;
401            } else {
402                wt = Low_swing;
403                force_wiretype = 1;
404            }
405            continue;
406        }
407
408
409
410        if (!strncmp("-Core", line, strlen("-Core"))) {
411            sscanf(line, "-Core count %d\n", &(cores));
412            if (cores > 16) {
413                printf("No. of cores should be less than 16!\n");
414            }
415            continue;
416        }
417
418        if (!strncmp("-Cache level", line, strlen("-Cache level"))) {
419            sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var);
420            if (!strncmp("L2", temp_var, strlen("L2"))) {
421                cache_level = 0;
422            } else {
423                cache_level = 1;
424            }
425        }
426
427        if (!strncmp("-Print level", line, strlen("-Print level"))) {
428            sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var);
429            if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) {
430                print_detail = 1;
431            } else {
432                print_detail = 0;
433            }
434
435        }
436        if (!strncmp("-Add ECC", line, strlen("-Add ECC"))) {
437            sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var);
438            if (!strncmp("true", temp_var, strlen("true"))) {
439                add_ecc_b_ = true;
440            } else {
441                add_ecc_b_ = false;
442            }
443        }
444
445        if (!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) {
446            sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var);
447            if (!strncmp("true", temp_var, strlen("true"))) {
448                print_input_args = true;
449            } else {
450                print_input_args = false;
451            }
452        }
453
454        if (!strncmp("-Force cache config", line, strlen("-Force cache config"))) {
455            sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var);
456            if (!strncmp("true", temp_var, strlen("true"))) {
457                force_cache_config = true;
458            } else {
459                force_cache_config = false;
460            }
461        }
462
463        if (!strncmp("-Ndbl", line, strlen("-Ndbl"))) {
464            sscanf(line, "-Ndbl %d\n", &(ndbl));
465            continue;
466        }
467        if (!strncmp("-Ndwl", line, strlen("-Ndwl"))) {
468            sscanf(line, "-Ndwl %d\n", &(ndwl));
469            continue;
470        }
471        if (!strncmp("-Nspd", line, strlen("-Nspd"))) {
472            sscanf(line, "-Nspd %d\n", &(nspd));
473            continue;
474        }
475        if (!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) {
476            sscanf(line, "-Ndsam1 %d\n", &(ndsam1));
477            continue;
478        }
479        if (!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) {
480            sscanf(line, "-Ndsam2 %d\n", &(ndsam2));
481            continue;
482        }
483        if (!strncmp("-Ndcm", line, strlen("-Ndcm"))) {
484            sscanf(line, "-Ndcm %d\n", &(ndcm));
485            continue;
486        }
487
488    }
489    rpters_in_htree = true;
490    fclose(fp);
491}
492
493void
494InputParameter::display_ip() {
495    cout << "Cache size                    : " << cache_sz << endl;
496    cout << "Block size                    : " << line_sz << endl;
497    cout << "Associativity                 : " << assoc << endl;
498    cout << "Read only ports               : " << num_rd_ports << endl;
499    cout << "Write only ports              : " << num_wr_ports << endl;
500    cout << "Read write ports              : " << num_rw_ports << endl;
501    cout << "Single ended read ports       : " << num_se_rd_ports << endl;
502    if (fully_assoc || pure_cam) {
503        cout << "Search ports                  : " << num_search_ports << endl;
504    }
505    cout << "Cache banks (UCA)             : " << nbanks << endl;
506    cout << "Technology                    : " << F_sz_um << endl;
507    cout << "Temperature                   : " << temp << endl;
508    cout << "Tag size                      : " << tag_w << endl;
509    if (is_cache) {
510        cout << "array type                    : " << "Cache" << endl;
511    }
512    if (pure_ram) {
513        cout << "array type                    : " << "Scratch RAM" << endl;
514    }
515    if (pure_cam) {
516        cout << "array type                    : " << "CAM" << endl;
517    }
518    cout << "Model as memory               : " << is_main_mem << endl;
519    cout << "Access mode                   : " << access_mode << endl;
520    cout << "Data array cell type          : " << data_arr_ram_cell_tech_type << endl;
521    cout << "Data array peripheral type    : " << data_arr_peri_global_tech_type << endl;
522    cout << "Tag array cell type           : " << tag_arr_ram_cell_tech_type << endl;
523    cout << "Tag array peripheral type     : " << tag_arr_peri_global_tech_type << endl;
524    cout << "Optimization target           : " << ed << endl;
525    cout << "Design objective (UCA wt)     : " << delay_wt << " "
526         << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt
527         << " " << area_wt << endl;
528    cout << "Design objective (UCA dev)    : " << delay_dev << " "
529         << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev
530         << " " << area_dev << endl;
531    if (nuca) {
532        cout << "Cores                         : " << cores << endl;
533
534
535        cout << "Design objective (NUCA wt)    : " << delay_wt_nuca << " "
536             << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca
537             << " " << area_wt_nuca << endl;
538        cout << "Design objective (NUCA dev)   : " << delay_dev_nuca << " "
539             << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca
540             << " " << area_dev_nuca << endl;
541    }
542    cout << "Cache model                   : " << nuca << endl;
543    cout << "Nuca bank                     : " << nuca_bank_count << endl;
544    cout << "Wire inside mat               : " << wire_is_mat_type << endl;
545    cout << "Wire outside mat              : " << wire_os_mat_type << endl;
546    cout << "Interconnect projection       : " << ic_proj_type << endl;
547    cout << "Wire signalling               : " << force_wiretype << endl;
548    cout << "Print level                   : " << print_detail << endl;
549    cout << "ECC overhead                  : " << add_ecc_b_ << endl;
550    cout << "Page size                     : " << page_sz_bits << endl;
551    cout << "Burst length                  : " << burst_len << endl;
552    cout << "Internal prefetch width       : " << int_prefetch_w << endl;
553    cout << "Force cache config            : " << g_ip->force_cache_config << endl;
554    if (g_ip->force_cache_config) {
555        cout << "Ndwl                          : " << g_ip->ndwl << endl;
556        cout << "Ndbl                          : " << g_ip->ndbl << endl;
557        cout << "Nspd                          : " << g_ip->nspd << endl;
558        cout << "Ndcm                          : " << g_ip->ndcm << endl;
559        cout << "Ndsam1                        : " << g_ip->ndsam1 << endl;
560        cout << "Ndsam2                        : " << g_ip->ndsam2 << endl;
561    }
562}
563
564
565
566powerComponents operator+(const powerComponents & x, const powerComponents & y) {
567    powerComponents z;
568
569    z.dynamic = x.dynamic + y.dynamic;
570    z.leakage = x.leakage + y.leakage;
571    z.gate_leakage  = x.gate_leakage  + y.gate_leakage;
572    z.short_circuit = x.short_circuit + y.short_circuit;
573    z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage;
574
575    return z;
576}
577
578powerComponents operator*(const powerComponents & x, double const * const y) {
579    powerComponents z;
580
581    z.dynamic = x.dynamic * y[0];
582    z.leakage = x.leakage * y[1];
583    z.gate_leakage  = x.gate_leakage * y[2];
584    z.short_circuit = x.short_circuit * y[3];
585    //longer channel leakage has the same behavior as normal leakage
586    z.longer_channel_leakage = x.longer_channel_leakage * y[1];
587
588    return z;
589}
590
591
592powerDef operator+(const powerDef & x, const powerDef & y) {
593    powerDef z;
594
595    z.readOp   = x.readOp  + y.readOp;
596    z.writeOp  = x.writeOp + y.writeOp;
597    z.searchOp = x.searchOp + y.searchOp;
598    return z;
599}
600
601powerDef operator*(const powerDef & x, double const * const y) {
602    powerDef z;
603
604    z.readOp   = x.readOp * y;
605    z.writeOp  = x.writeOp * y;
606    z.searchOp = x.searchOp * y;
607    return z;
608}
609
610uca_org_t cacti_interface(const string & infile_name) {
611
612    uca_org_t fin_res;
613    //uca_org_t result;
614    fin_res.valid = false;
615
616    g_ip = new InputParameter();
617    g_ip->parse_cfg(infile_name);
618    if (!g_ip->error_checking(infile_name))
619        exit(0);
620    if (g_ip->print_input_args)
621        g_ip->display_ip();
622
623    init_tech_params(g_ip->F_sz_um, false);
624    Wire winit; // Do not delete this line. It initializes wires.
625
626
627//  For HighRadix Only
628//  ////  Wire wirea(g_ip->wt, 1000);
629//  ////  wirea.print_wire();
630//  ////  cout << "Wire Area " << wirea.area.get_area() << " sq. u" << endl;
631//  //  winit.print_wire();
632//  //
633//    HighRadix *hr;
634//      hr = new HighRadix();
635//      hr->compute_power();
636//      hr->print_router();
637//    exit(0);
638//
639//    double sub_switch_sz = 2;
640//    double rows = 32;
641//    for (int i=0; i<6; i++) {
642//      sub_switch_sz = pow(2, i);
643//      rows = 64/sub_switch_sz;
644//      hr = new HighRadix(sub_switch_sz, rows, .8/* freq */, 64, 2, 64, 0.7);
645//      hr->compute_power();
646//      hr->print_router();
647//      delete hr;
648//    }
649//  //  HighRadix yarc;
650//  //  yarc.compute_power();
651//  //  yarc.print_router();
652//    winit.print_wire();
653//    exit(0);
654//  For HighRadix Only End
655
656    if (g_ip->nuca == 1) {
657        Nuca n(&g_tp.peri_global);
658        n.sim_nuca();
659    }
660    g_ip->display_ip();
661    solve(&fin_res);
662
663    output_UCA(&fin_res);
664    output_data_csv(fin_res);
665
666    delete (g_ip);
667    return fin_res;
668}
669
670//cacti6.5's plain interface, please keep !!!
671uca_org_t cacti_interface(
672    int cache_size,
673    int line_size,
674    int associativity,
675    int rw_ports,
676    int excl_read_ports,
677    int excl_write_ports,
678    int single_ended_read_ports,
679    int banks,
680    double tech_node, // in nm
681    int page_sz,
682    int burst_length,
683    int pre_width,
684    int output_width,
685    int specific_tag,
686    int tag_width,
687    int access_mode, //0 normal, 1 seq, 2 fast
688    int cache, //scratch ram or cache
689    int main_mem,
690    int obj_func_delay,
691    int obj_func_dynamic_power,
692    int obj_func_leakage_power,
693    int obj_func_area,
694    int obj_func_cycle_time,
695    int dev_func_delay,
696    int dev_func_dynamic_power,
697    int dev_func_leakage_power,
698    int dev_func_area,
699    int dev_func_cycle_time,
700    int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
701    int temp,
702    int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
703    int data_arr_ram_cell_tech_flavor_in, // 0-4
704    int data_arr_peri_global_tech_flavor_in,
705    int tag_arr_ram_cell_tech_flavor_in,
706    int tag_arr_peri_global_tech_flavor_in,
707    int interconnect_projection_type_in, // 0 - aggressive, 1 - normal
708    int wire_inside_mat_type_in,
709    int wire_outside_mat_type_in,
710    int is_nuca, // 0 - UCA, 1 - NUCA
711    int core_count,
712    int cache_level, // 0 - L2, 1 - L3
713    int nuca_bank_count,
714    int nuca_obj_func_delay,
715    int nuca_obj_func_dynamic_power,
716    int nuca_obj_func_leakage_power,
717    int nuca_obj_func_area,
718    int nuca_obj_func_cycle_time,
719    int nuca_dev_func_delay,
720    int nuca_dev_func_dynamic_power,
721    int nuca_dev_func_leakage_power,
722    int nuca_dev_func_area,
723    int nuca_dev_func_cycle_time,
724    int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
725    int p_input) {
726    g_ip = new InputParameter();
727    g_ip->add_ecc_b_ = true;
728
729    g_ip->data_arr_ram_cell_tech_type    = data_arr_ram_cell_tech_flavor_in;
730    g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
731    g_ip->tag_arr_ram_cell_tech_type     = tag_arr_ram_cell_tech_flavor_in;
732    g_ip->tag_arr_peri_global_tech_type  = tag_arr_peri_global_tech_flavor_in;
733
734    g_ip->ic_proj_type     = interconnect_projection_type_in;
735    g_ip->wire_is_mat_type = wire_inside_mat_type_in;
736    g_ip->wire_os_mat_type = wire_outside_mat_type_in;
737    g_ip->burst_len        = burst_length;
738    g_ip->int_prefetch_w   = pre_width;
739    g_ip->page_sz_bits     = page_sz;
740
741    g_ip->cache_sz            = cache_size;
742    g_ip->line_sz             = line_size;
743    g_ip->assoc               = associativity;
744    g_ip->nbanks              = banks;
745    g_ip->out_w               = output_width;
746    g_ip->specific_tag        = specific_tag;
747    if (tag_width == 0) {
748        g_ip->tag_w = 42;
749    } else {
750        g_ip->tag_w               = tag_width;
751    }
752
753    g_ip->access_mode         = access_mode;
754    g_ip->delay_wt = obj_func_delay;
755    g_ip->dynamic_power_wt = obj_func_dynamic_power;
756    g_ip->leakage_power_wt = obj_func_leakage_power;
757    g_ip->area_wt = obj_func_area;
758    g_ip->cycle_time_wt    = obj_func_cycle_time;
759    g_ip->delay_dev = dev_func_delay;
760    g_ip->dynamic_power_dev = dev_func_dynamic_power;
761    g_ip->leakage_power_dev = dev_func_leakage_power;
762    g_ip->area_dev = dev_func_area;
763    g_ip->cycle_time_dev    = dev_func_cycle_time;
764    g_ip->ed = ed_ed2_none;
765
766    switch (wt) {
767    case (0):
768        g_ip->force_wiretype = 0;
769        g_ip->wt = Global;
770        break;
771    case (1):
772        g_ip->force_wiretype = 1;
773        g_ip->wt = Global;
774        break;
775    case (2):
776        g_ip->force_wiretype = 1;
777        g_ip->wt = Global_5;
778        break;
779    case (3):
780        g_ip->force_wiretype = 1;
781        g_ip->wt = Global_10;
782        break;
783    case (4):
784        g_ip->force_wiretype = 1;
785        g_ip->wt = Global_20;
786        break;
787    case (5):
788        g_ip->force_wiretype = 1;
789        g_ip->wt = Global_30;
790        break;
791    case (6):
792        g_ip->force_wiretype = 1;
793        g_ip->wt = Low_swing;
794        break;
795    default:
796        cout << "Unknown wire type!\n";
797        exit(0);
798    }
799
800    g_ip->delay_wt_nuca = nuca_obj_func_delay;
801    g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power;
802    g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power;
803    g_ip->area_wt_nuca = nuca_obj_func_area;
804    g_ip->cycle_time_wt_nuca    = nuca_obj_func_cycle_time;
805    g_ip->delay_dev_nuca = dev_func_delay;
806    g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power;
807    g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power;
808    g_ip->area_dev_nuca = nuca_dev_func_area;
809    g_ip->cycle_time_dev_nuca    = nuca_dev_func_cycle_time;
810    g_ip->nuca = is_nuca;
811    g_ip->nuca_bank_count = nuca_bank_count;
812    if (nuca_bank_count > 0) {
813        g_ip->force_nuca_bank = 1;
814    }
815    g_ip->cores = core_count;
816    g_ip->cache_level = cache_level;
817
818    g_ip->temp = temp;
819
820    g_ip->F_sz_nm         = tech_node;
821    g_ip->F_sz_um         = tech_node / 1000;
822    g_ip->is_main_mem     = (main_mem != 0) ? true : false;
823    g_ip->is_cache        = (cache != 0) ? true : false;
824    g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
825
826    g_ip->num_rw_ports    = rw_ports;
827    g_ip->num_rd_ports    = excl_read_ports;
828    g_ip->num_wr_ports    = excl_write_ports;
829    g_ip->num_se_rd_ports = single_ended_read_ports;
830    g_ip->print_detail = 1;
831    g_ip->nuca = 0;
832
833    g_ip->wt = Global_5;
834    g_ip->force_cache_config = false;
835    g_ip->force_wiretype = false;
836    g_ip->print_input_args = p_input;
837
838
839    uca_org_t fin_res;
840    fin_res.valid = false;
841
842    if (g_ip->error_checking() == false) exit(0);
843    if (g_ip->print_input_args)
844        g_ip->display_ip();
845    init_tech_params(g_ip->F_sz_um, false);
846    Wire winit; // Do not delete this line. It initializes wires.
847
848    if (g_ip->nuca == 1) {
849        Nuca n(&g_tp.peri_global);
850        n.sim_nuca();
851    }
852    solve(&fin_res);
853
854    output_UCA(&fin_res);
855
856    delete (g_ip);
857    return fin_res;
858}
859
860//McPAT's plain interface, please keep !!!
861uca_org_t cacti_interface(
862    int cache_size,
863    int line_size,
864    int associativity,
865    int rw_ports,
866    int excl_read_ports,// para5
867    int excl_write_ports,
868    int single_ended_read_ports,
869    int search_ports,
870    int banks,
871    double tech_node,//para10
872    int output_width,
873    int specific_tag,
874    int tag_width,
875    int access_mode,
876    int cache,      //para15
877    int main_mem,
878    int obj_func_delay,
879    int obj_func_dynamic_power,
880    int obj_func_leakage_power,
881    int obj_func_cycle_time, //para20
882    int obj_func_area,
883    int dev_func_delay,
884    int dev_func_dynamic_power,
885    int dev_func_leakage_power,
886    int dev_func_area, //para25
887    int dev_func_cycle_time,
888    int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
889    int temp,
890    int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
891    int data_arr_ram_cell_tech_flavor_in,//para30
892    int data_arr_peri_global_tech_flavor_in,
893    int tag_arr_ram_cell_tech_flavor_in,
894    int tag_arr_peri_global_tech_flavor_in,
895    int interconnect_projection_type_in,
896    int wire_inside_mat_type_in,//para35
897    int wire_outside_mat_type_in,
898    int REPEATERS_IN_HTREE_SEGMENTS_in,
899    int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
900    int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
901    int PAGE_SIZE_BITS_in,//para40
902    int BURST_LENGTH_in,
903    int INTERNAL_PREFETCH_WIDTH_in,
904    int force_wiretype,
905    int wiretype,
906    int force_config,//para45
907    int ndwl,
908    int ndbl,
909    int nspd,
910    int ndcm,
911    int ndsam1,//para50
912    int ndsam2,
913    int ecc) {
914    g_ip = new InputParameter();
915
916    uca_org_t fin_res;
917    fin_res.valid = false;
918
919    g_ip->data_arr_ram_cell_tech_type    = data_arr_ram_cell_tech_flavor_in;
920    g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
921    g_ip->tag_arr_ram_cell_tech_type     = tag_arr_ram_cell_tech_flavor_in;
922    g_ip->tag_arr_peri_global_tech_type  = tag_arr_peri_global_tech_flavor_in;
923
924    g_ip->ic_proj_type     = interconnect_projection_type_in;
925    g_ip->wire_is_mat_type = wire_inside_mat_type_in;
926    g_ip->wire_os_mat_type = wire_outside_mat_type_in;
927    g_ip->burst_len        = BURST_LENGTH_in;
928    g_ip->int_prefetch_w   = INTERNAL_PREFETCH_WIDTH_in;
929    g_ip->page_sz_bits     = PAGE_SIZE_BITS_in;
930
931    g_ip->cache_sz            = cache_size;
932    g_ip->line_sz             = line_size;
933    g_ip->assoc               = associativity;
934    g_ip->nbanks              = banks;
935    g_ip->out_w               = output_width;
936    g_ip->specific_tag        = specific_tag;
937    if (specific_tag == 0) {
938        g_ip->tag_w = 42;
939    } else {
940        g_ip->tag_w               = tag_width;
941    }
942
943    g_ip->access_mode         = access_mode;
944    g_ip->delay_wt = obj_func_delay;
945    g_ip->dynamic_power_wt = obj_func_dynamic_power;
946    g_ip->leakage_power_wt = obj_func_leakage_power;
947    g_ip->area_wt = obj_func_area;
948    g_ip->cycle_time_wt    = obj_func_cycle_time;
949    g_ip->delay_dev = dev_func_delay;
950    g_ip->dynamic_power_dev = dev_func_dynamic_power;
951    g_ip->leakage_power_dev = dev_func_leakage_power;
952    g_ip->area_dev = dev_func_area;
953    g_ip->cycle_time_dev    = dev_func_cycle_time;
954    g_ip->temp = temp;
955    g_ip->ed = ed_ed2_none;
956
957    g_ip->F_sz_nm         = tech_node;
958    g_ip->F_sz_um         = tech_node / 1000;
959    g_ip->is_main_mem     = (main_mem != 0) ? true : false;
960    g_ip->is_cache        = (cache == 1) ? true : false;
961    g_ip->pure_ram        = (cache == 0) ? true : false;
962    g_ip->pure_cam        = (cache == 2) ? true : false;
963    g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
964    g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
965    g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
966
967    g_ip->num_rw_ports    = rw_ports;
968    g_ip->num_rd_ports    = excl_read_ports;
969    g_ip->num_wr_ports    = excl_write_ports;
970    g_ip->num_se_rd_ports = single_ended_read_ports;
971    g_ip->num_search_ports = search_ports;
972
973    g_ip->print_detail = 1;
974    g_ip->nuca = 0;
975
976    if (force_wiretype == 0) {
977        g_ip->wt = Global;
978        g_ip->force_wiretype = false;
979    } else {
980        g_ip->force_wiretype = true;
981        if (wiretype == 10) {
982            g_ip->wt = Global_10;
983        }
984        if (wiretype == 20) {
985            g_ip->wt = Global_20;
986        }
987        if (wiretype == 30) {
988            g_ip->wt = Global_30;
989        }
990        if (wiretype == 5) {
991            g_ip->wt = Global_5;
992        }
993        if (wiretype == 0) {
994            g_ip->wt = Low_swing;
995        }
996    }
997    //g_ip->wt = Global_5;
998    if (force_config == 0) {
999        g_ip->force_cache_config = false;
1000    } else {
1001        g_ip->force_cache_config = true;
1002        g_ip->ndbl = ndbl;
1003        g_ip->ndwl = ndwl;
1004        g_ip->nspd = nspd;
1005        g_ip->ndcm = ndcm;
1006        g_ip->ndsam1 = ndsam1;
1007        g_ip->ndsam2 = ndsam2;
1008
1009
1010    }
1011
1012    if (ecc == 0) {
1013        g_ip->add_ecc_b_ = false;
1014    } else {
1015        g_ip->add_ecc_b_ = true;
1016    }
1017
1018
1019    if (!g_ip->error_checking())
1020        exit(0);
1021
1022    init_tech_params(g_ip->F_sz_um, false);
1023    Wire winit; // Do not delete this line. It initializes wires.
1024
1025    g_ip->display_ip();
1026    solve(&fin_res);
1027    output_UCA(&fin_res);
1028    output_data_csv(fin_res);
1029    delete (g_ip);
1030
1031    return fin_res;
1032}
1033
1034
1035
1036bool InputParameter::error_checking(string name) {
1037    int  A;
1038    bool seq_access  = false;
1039    fast_access = true;
1040
1041    switch (access_mode) {
1042    case 0:
1043        seq_access  = false;
1044        fast_access = false;
1045        break;
1046    case 1:
1047        seq_access  = true;
1048        fast_access = false;
1049        break;
1050    case 2:
1051        seq_access  = false;
1052        fast_access = true;
1053        break;
1054    }
1055
1056    if (is_main_mem) {
1057        if (ic_proj_type == 0) {
1058            cerr << name
1059                 << ": DRAM model supports only conservative interconnect "
1060                 << "projection but is set to aggressive!\n\n";
1061            return false;
1062        }
1063    }
1064
1065
1066    uint32_t B = line_sz;
1067
1068    if (B < 1) {
1069        cerr << name << ": Block size must be >= 1, but is set to " << B
1070             << endl;
1071        return false;
1072    } else if (B*8 < out_w) {
1073        cerr << name << ": Block size must be at least " << out_w / 8
1074             << ", but is set to " << B << endl;
1075        return false;
1076    }
1077
1078    if (F_sz_um <= 0) {
1079        cerr << name << ": Feature size must be > 0, but is set to "
1080             << F_sz_um << endl;
1081        return false;
1082    } else if (F_sz_um > 0.091) {
1083        cerr << name << ": Feature size must be <= 90 nm, but is set to "
1084             << F_sz_um << endl;
1085        return false;
1086    }
1087
1088
1089    uint32_t RWP  = num_rw_ports;
1090    uint32_t ERP  = num_rd_ports;
1091    uint32_t EWP  = num_wr_ports;
1092    uint32_t NSER = num_se_rd_ports;
1093    uint32_t SCHP = num_search_ports;
1094
1095//TODO: revisit this. This is an important feature. Sheng thought this should be used
1096//  // If multiple banks and multiple ports are specified, then if number of ports is less than or equal to
1097//  // the number of banks, we assume that the multiple ports are implemented via the multiple banks.
1098//  // In such a case we assume that each bank has 1 RWP port.
1099//  if ((RWP + ERP + EWP) <= nbanks && nbanks>1)
1100//  {
1101//    RWP  = 1;
1102//    ERP  = 0;
1103//    EWP  = 0;
1104//    NSER = 0;
1105//  }
1106//  else if ((RWP < 0) || (EWP < 0) || (ERP < 0))
1107//  {
1108//    cerr << "Ports must >=0" << endl;
1109//    return false;
1110//  }
1111//  else if (RWP > 2)
1112//  {
1113//    cerr << "Maximum of 2 read/write ports" << endl;
1114//    return false;
1115//  }
1116//  else if ((RWP+ERP+EWP) < 1)
1117    // Changed to new implementation:
1118    // The number of ports specified at input is per bank
1119    if ((RWP + ERP + EWP) < 1) {
1120        cerr << name << ": Must have at least one port" << endl;
1121        return false;
1122    }
1123
1124    if (is_pow2(nbanks) == false) {
1125        cerr << name << ": Number of subbanks should be greater than or "
1126             << "equal to 1 and should be a power of 2, but is set to "
1127             << nbanks << endl;
1128        return false;
1129    }
1130
1131    int C = cache_sz / nbanks;
1132    if (C < 64) {
1133        cerr << name << ": Cache size must be >=64, but is set to " << C
1134             << endl;
1135        return false;
1136    }
1137
1138//TODO: revisit this
1139//   if (pure_ram==true && assoc!=1)
1140//    {
1141//  	  cerr << "Pure RAM must have assoc as 1" << endl;
1142//  	  return false;
1143//    }
1144
1145    //fully assoc and cam check
1146    if (is_cache && assoc == 0)
1147        fully_assoc = true;
1148    else
1149        fully_assoc = false;
1150
1151    if (pure_cam == true && assoc != 0) {
1152        cerr << name
1153             << ": Pure CAM must have associativity as 0, but is set to"
1154             << assoc << endl;
1155        return false;
1156    }
1157
1158    if (assoc == 0 && (pure_cam == false && is_cache == false)) {
1159        cerr << name
1160             << ": Only CAM or Fully associative cache can have associativity "
1161             << "as 0" << endl;
1162        return false;
1163    }
1164
1165    if ((fully_assoc == true || pure_cam == true)
1166        && (data_arr_ram_cell_tech_type != tag_arr_ram_cell_tech_type
1167            || data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type)) {
1168        cerr << name
1169             << ": CAM and fully associative cache must have same device type "
1170             << "for both data and tag array" << endl;
1171        cerr << "\tData array RAM cell = " << data_arr_ram_cell_tech_type
1172             << ", Tag array RAM cell = " << tag_arr_ram_cell_tech_type << endl
1173             << "\tData array peripheral = " << data_arr_peri_global_tech_type
1174             << ", Tag array peripheral = " << tag_arr_peri_global_tech_type
1175             << endl;
1176        return false;
1177    }
1178
1179    if ((fully_assoc == true || pure_cam == true)
1180            &&  (data_arr_ram_cell_tech_type == lp_dram ||
1181                 data_arr_ram_cell_tech_type == comm_dram)) {
1182        cerr << name << ": DRAM based CAM and fully associative cache are not "
1183             << "supported" << endl;
1184        return false;
1185    }
1186
1187    if ((fully_assoc == true || pure_cam == true)
1188            &&  (is_main_mem == true)) {
1189        cerr << name
1190             << ": CAM and fully associative cache cannot be as main memory"
1191             << endl;
1192        return false;
1193    }
1194
1195    if ((fully_assoc || pure_cam) && SCHP < 1) {
1196        cerr << name
1197             << ": CAM and fully associative must have at least 1 search port,"
1198             << " but are set to " << SCHP << endl;
1199        return false;
1200    }
1201
1202    if (RWP == 0 && ERP == 0 && SCHP > 0 && ((fully_assoc || pure_cam))) {
1203        ERP = SCHP;
1204    }
1205
1206//    if ((!(fully_assoc || pure_cam)) && SCHP>=1)
1207//    {
1208//	  cerr << "None CAM and fully associative cannot have search ports" << endl;
1209//	  return false;
1210//    }
1211
1212    if (assoc == 0) {
1213        A = C / B;
1214        //fully_assoc = true;
1215    } else {
1216        if (assoc == 1) {
1217            A = 1;
1218            //fully_assoc = false;
1219        } else {
1220            //fully_assoc = false;
1221            A = assoc;
1222            if (is_pow2(A) == false) {
1223                cerr << name
1224                     << ": Associativity must be a power of 2, but is set to "
1225                     << A << endl;
1226                return false;
1227            }
1228        }
1229    }
1230
1231    if (C / (B*A) <= 1 && assoc != 0) {
1232        cerr << name << ": Number of sets (" << (C / (B * A))
1233             << ") is too small: " << endl;
1234        cerr << " Need to either increase cache size, or decrease "
1235             << "associativity or block size" << endl;
1236        cerr << " (or use fully associative cache)" << endl;
1237        return false;
1238    }
1239
1240    block_sz = B;
1241
1242    /*dt: testing sequential access mode*/
1243    if (seq_access) {
1244        tag_assoc  = A;
1245        data_assoc = 1;
1246        is_seq_acc = true;
1247    } else {
1248        tag_assoc  = A;
1249        data_assoc = A;
1250        is_seq_acc = false;
1251    }
1252
1253    if (assoc == 0) {
1254        data_assoc = 1;
1255    }
1256    num_rw_ports     = RWP;
1257    num_rd_ports     = ERP;
1258    num_wr_ports     = EWP;
1259    num_se_rd_ports  = NSER;
1260    if (!(fully_assoc || pure_cam))
1261        num_search_ports = 0;
1262    nsets            = C / (B * A);
1263
1264    if (temp < 300 || temp > 400 || temp % 10 != 0) {
1265        cerr << name << ": " << temp
1266             << " Temperature must be between 300 and 400 Kelvin and multiple "
1267             << "of 10." << endl;
1268        return false;
1269    }
1270
1271    if (nsets < 1) {
1272        cerr << name << ": Less than one set..." << endl;
1273        return false;
1274    }
1275
1276    return true;
1277}
1278
1279
1280
1281void output_data_csv(const uca_org_t & fin_res) {
1282    //TODO: the csv output should remain
1283    fstream file("out.csv", ios::in);
1284    bool    print_index = file.fail();
1285    file.close();
1286
1287    file.open("out.csv", ios::out | ios::app);
1288    if (file.fail() == true) {
1289        cerr << "File out.csv could not be opened successfully" << endl;
1290    } else {
1291        if (print_index == true) {
1292            file << "Tech node (nm), ";
1293            file << "Capacity (bytes), ";
1294            file << "Number of banks, ";
1295            file << "Associativity, ";
1296            file << "Output width (bits), ";
1297            file << "Access time (ns), ";
1298            file << "Random cycle time (ns), ";
1299            file << "Dynamic search energy (nJ), ";
1300            file << "Dynamic read energy (nJ), ";
1301            file << "Dynamic write energy (nJ), ";
1302            file << "Standby leakage per bank(mW), ";
1303            file << "Area (mm2), ";
1304            file << "Ndwl, ";
1305            file << "Ndbl, ";
1306            file << "Nspd, ";
1307            file << "Ndcm, ";
1308            file << "Ndsam_level_1, ";
1309            file << "Ndsam_level_2, ";
1310            file << "Data arrary area efficiency %, ";
1311            file << "Ntwl, ";
1312            file << "Ntbl, ";
1313            file << "Ntspd, ";
1314            file << "Ntcm, ";
1315            file << "Ntsam_level_1, ";
1316            file << "Ntsam_level_2, ";
1317            file << "Tag arrary area efficiency %, ";
1318
1319//      file << "Resistance per unit micron (ohm-micron), ";
1320//      file << "Capacitance per unit micron (fF per micron), ";
1321//      file << "Unit-length wire delay (ps), ";
1322//      file << "FO4 delay (ps), ";
1323//      file << "delay route to bank (including crossb delay) (ps), ";
1324//      file << "Crossbar delay (ps), ";
1325//      file << "Dyn read energy per access from closed page (nJ), ";
1326//      file << "Dyn read energy per access from open page (nJ), ";
1327//      file << "Leak power of an subbank with page closed (mW), ";
1328//      file << "Leak power of a subbank with page  open (mW), ";
1329//      file << "Leak power of request and reply networks (mW), ";
1330//      file << "Number of subbanks, ";
1331//      file << "Page size in bits, ";
1332//      file << "Activate power, ";
1333//      file << "Read power, ";
1334//      file << "Write power, ";
1335//      file << "Precharge power, ";
1336//      file << "tRCD, ";
1337//      file << "CAS latency, ";
1338//      file << "Precharge delay, ";
1339//      file << "Perc dyn energy bitlines, ";
1340//      file << "perc dyn energy wordlines, ";
1341//      file << "perc dyn energy outside mat, ";
1342//      file << "Area opt (perc), ";
1343//      file << "Delay opt (perc), ";
1344//      file << "Repeater opt (perc), ";
1345//      file << "Aspect ratio";
1346            file << endl;
1347        }
1348        file << g_ip->F_sz_nm << ", ";
1349        file << g_ip->cache_sz << ", ";
1350        file << g_ip->nbanks << ", ";
1351        file << g_ip->tag_assoc << ", ";
1352        file << g_ip->out_w << ", ";
1353        file << fin_res.access_time*1e+9 << ", ";
1354        file << fin_res.cycle_time*1e+9 << ", ";
1355//    file << fin_res.data_array2->multisubbank_interleave_cycle_time*1e+9 << ", ";
1356//    file << fin_res.data_array2->delay_request_network*1e+9 << ", ";
1357//    file << fin_res.data_array2->delay_inside_mat*1e+9 <<  ", ";
1358//    file << fin_res.data_array2.delay_reply_network*1e+9 << ", ";
1359
1360//    if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram))
1361//        {
1362//    	  file << fin_res.tag_array2->access_time*1e+9 << ", ";
1363//        }
1364//    else
1365//    {
1366//    	file << 0 << ", ";
1367//    }
1368//    file << fin_res.data_array2->access_time*1e+9 << ", ";
1369//    file << fin_res.data_array2->dram_refresh_period*1e+6 << ", ";
1370//    file << fin_res.data_array2->dram_array_availability <<  ", ";
1371        if (g_ip->fully_assoc || g_ip->pure_cam) {
1372            file << fin_res.power.searchOp.dynamic*1e+9 << ", ";
1373        } else {
1374            file << "N/A" << ", ";
1375        }
1376        file << fin_res.power.readOp.dynamic*1e+9 << ", ";
1377        file << fin_res.power.writeOp.dynamic*1e+9 << ", ";
1378//    if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram))
1379//        {
1380//        	file << fin_res.tag_array2->power.readOp.dynamic*1e+9 << ", ";
1381//        }
1382//        	else
1383//        {
1384//        		file << "NA" << ", ";
1385//        }
1386//    file << fin_res.data_array2->power.readOp.dynamic*1e+9 << ", ";
1387//    if (g_ip->fully_assoc || g_ip->pure_cam)
1388//        {
1389//    	    file << fin_res.power.searchOp.dynamic*1000/fin_res.cycle_time << ", ";
1390//        }
1391//        	else
1392//        {
1393//        	file << fin_res.power.readOp.dynamic*1000/fin_res.cycle_time << ", ";
1394//        }
1395
1396    file <<( fin_res.power.readOp.leakage + fin_res.power.readOp.gate_leakage )*1000 << ", ";
1397//    file << fin_res.leak_power_with_sleep_transistors_in_mats*1000 << ", ";
1398//    file << fin_res.data_array.refresh_power / fin_res.data_array.total_power.readOp.leakage << ", ";
1399        file << fin_res.area*1e-6 << ", ";
1400
1401        file << fin_res.data_array2->Ndwl << ", ";
1402        file << fin_res.data_array2->Ndbl << ", ";
1403        file << fin_res.data_array2->Nspd << ", ";
1404        file << fin_res.data_array2->deg_bl_muxing << ", ";
1405        file << fin_res.data_array2->Ndsam_lev_1 << ", ";
1406        file << fin_res.data_array2->Ndsam_lev_2 << ", ";
1407        file << fin_res.data_array2->area_efficiency << ", ";
1408        if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) {
1409            file << fin_res.tag_array2->Ndwl << ", ";
1410            file << fin_res.tag_array2->Ndbl << ", ";
1411            file << fin_res.tag_array2->Nspd << ", ";
1412            file << fin_res.tag_array2->deg_bl_muxing << ", ";
1413            file << fin_res.tag_array2->Ndsam_lev_1 << ", ";
1414            file << fin_res.tag_array2->Ndsam_lev_2 << ", ";
1415            file << fin_res.tag_array2->area_efficiency << ", ";
1416        } else {
1417        file << "N/A" << ", ";
1418        file << "N/A"<< ", ";
1419        file << "N/A" << ", ";
1420        file << "N/A" << ", ";
1421        file << "N/A" << ", ";
1422        file << "N/A" << ", ";
1423        file << "N/A" << ", ";
1424    }
1425
1426//    file << g_tp.wire_inside_mat.R_per_um << ", ";
1427//    file << g_tp.wire_inside_mat.C_per_um / 1e-15 << ", ";
1428//    file << g_tp.unit_len_wire_del / 1e-12 << ", ";
1429//    file << g_tp.FO4 / 1e-12 << ", ";
1430//    file << fin_res.data_array.delay_route_to_bank / 1e-9 << ", ";
1431//    file << fin_res.data_array.delay_crossbar / 1e-9 << ", ";
1432//    file << fin_res.data_array.dyn_read_energy_from_closed_page / 1e-9 << ", ";
1433//    file << fin_res.data_array.dyn_read_energy_from_open_page / 1e-9 << ", ";
1434//    file << fin_res.data_array.leak_power_subbank_closed_page / 1e-3 << ", ";
1435//    file << fin_res.data_array.leak_power_subbank_open_page / 1e-3 << ", ";
1436//    file << fin_res.data_array.leak_power_request_and_reply_networks / 1e-3 << ", ";
1437//    file << fin_res.data_array.number_subbanks << ", " ;
1438//    file << fin_res.data_array.page_size_in_bits << ", " ;
1439//    file << fin_res.data_array.activate_energy * 1e9 << ", " ;
1440//    file << fin_res.data_array.read_energy * 1e9 << ", " ;
1441//    file << fin_res.data_array.write_energy * 1e9 << ", " ;
1442//    file << fin_res.data_array.precharge_energy * 1e9 << ", " ;
1443//    file << fin_res.data_array.trcd * 1e9 << ", " ;
1444//    file << fin_res.data_array.cas_latency * 1e9 << ", " ;
1445//    file << fin_res.data_array.precharge_delay * 1e9 << ", " ;
1446//    file << fin_res.data_array.all_banks_height / fin_res.data_array.all_banks_width;
1447        file<<endl;
1448    }
1449    file.close();
1450}
1451
1452
1453
1454void output_UCA(uca_org_t *fr) {
1455    //    if (NUCA)
1456    if (0) {
1457        cout << "\n\n Detailed Bank Stats:\n";
1458        cout << "    Bank Size (bytes): %d\n" <<
1459             (int) (g_ip->cache_sz);
1460    } else {
1461        if (g_ip->data_arr_ram_cell_tech_type == 3) {
1462            cout << "\n---------- CACTI version 6.5, Uniform Cache Access " <<
1463                 "Logic Process Based DRAM Model ----------\n";
1464        } else if (g_ip->data_arr_ram_cell_tech_type == 4) {
1465            cout << "\n---------- CACTI version 6.5, Uniform" <<
1466                 "Cache Access Commodity DRAM Model ----------\n";
1467        } else {
1468            cout << "\n---------- CACTI version 6.5, Uniform Cache Access "
1469                 "SRAM Model ----------\n";
1470        }
1471        cout << "\nCache Parameters:\n";
1472        cout << "    Total cache size (bytes): " <<
1473             (int) (g_ip->cache_sz) << endl;
1474    }
1475
1476    cout << "    Number of banks: " << (int) g_ip->nbanks << endl;
1477    if (g_ip->fully_assoc || g_ip->pure_cam)
1478        cout << "    Associativity: fully associative\n";
1479    else {
1480        if (g_ip->tag_assoc == 1)
1481            cout << "    Associativity: direct mapped\n";
1482        else
1483            cout << "    Associativity: " <<
1484                 g_ip->tag_assoc << endl;
1485    }
1486
1487
1488    cout << "    Block size (bytes): " << g_ip->line_sz << endl;
1489    cout << "    Read/write Ports: " <<
1490         g_ip->num_rw_ports << endl;
1491    cout << "    Read ports: " <<
1492         g_ip->num_rd_ports << endl;
1493    cout << "    Write ports: " <<
1494         g_ip->num_wr_ports << endl;
1495    if (g_ip->fully_assoc || g_ip->pure_cam)
1496        cout << "    search ports: " <<
1497             g_ip->num_search_ports << endl;
1498    cout << "    Technology size (nm): " <<
1499         g_ip->F_sz_nm << endl << endl;
1500
1501    cout << "    Access time (ns): " << fr->access_time*1e9 << endl;
1502    cout << "    Cycle time (ns):  " << fr->cycle_time*1e9 << endl;
1503    if (g_ip->data_arr_ram_cell_tech_type >= 4) {
1504        cout << "    Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl;
1505        cout << "    Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl;
1506        cout << "    Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl;
1507        cout << "    Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl;
1508        cout << "    Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl;
1509        cout << "    Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl;
1510        cout << "    Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl;
1511        cout << "    Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl;
1512        cout << "    Refresh power (mW): " <<
1513             fr->data_array2->refresh_power*1e3 << endl;
1514    } else {
1515        if ((g_ip->fully_assoc || g_ip->pure_cam)) {
1516            cout << "    Total dynamic associative search energy per access (nJ): " <<
1517                 fr->power.searchOp.dynamic*1e9 << endl;
1518//		  cout << "    Total dynamic read energy per access (nJ): " <<
1519//		  fr->power.readOp.dynamic*1e9 << endl;
1520//		  cout << "    Total dynamic write energy per access (nJ): " <<
1521//		  fr->power.writeOp.dynamic*1e9 << endl;
1522        }
1523//	  else
1524//	  {
1525        cout << "    Total dynamic read energy per access (nJ): " <<
1526             fr->power.readOp.dynamic*1e9 << endl;
1527        cout << "    Total dynamic write energy per access (nJ): " <<
1528             fr->power.writeOp.dynamic*1e9 << endl;
1529//	  }
1530        cout << "    Total leakage power of a bank"
1531             " (mW): " << fr->power.readOp.leakage*1e3 << endl;
1532        cout << "    Total gate leakage power of a bank"
1533             " (mW): " << fr->power.readOp.gate_leakage*1e3 << endl;
1534    }
1535
1536    if (g_ip->data_arr_ram_cell_tech_type == 3 || g_ip->data_arr_ram_cell_tech_type == 4) {
1537    }
1538    cout <<  "    Cache height x width (mm): " <<
1539         fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl << endl;
1540
1541
1542    cout << "    Best Ndwl : " << fr->data_array2->Ndwl << endl;
1543    cout << "    Best Ndbl : " << fr->data_array2->Ndbl << endl;
1544    cout << "    Best Nspd : " << fr->data_array2->Nspd << endl;
1545    cout << "    Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl;
1546    cout << "    Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl;
1547    cout << "    Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl;
1548
1549    if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
1550        !g_ip->is_main_mem) {
1551        cout << "    Best Ntwl : " << fr->tag_array2->Ndwl << endl;
1552        cout << "    Best Ntbl : " << fr->tag_array2->Ndbl << endl;
1553        cout << "    Best Ntspd : " << fr->tag_array2->Nspd << endl;
1554        cout << "    Best Ntcm : " << fr->tag_array2->deg_bl_muxing << endl;
1555        cout << "    Best Ntsam L1 : " << fr->tag_array2->Ndsam_lev_1 << endl;
1556        cout << "    Best Ntsam L2 : " << fr->tag_array2->Ndsam_lev_2 << endl;
1557    }
1558
1559    switch (fr->data_array2->wt) {
1560    case (0):
1561        cout <<  "    Data array, H-tree wire type: Delay optimized global wires\n";
1562        break;
1563    case (1):
1564        cout <<  "    Data array, H-tree wire type: Global wires with 5\% delay penalty\n";
1565        break;
1566    case (2):
1567        cout <<  "    Data array, H-tree wire type: Global wires with 10\% delay penalty\n";
1568        break;
1569    case (3):
1570        cout <<  "    Data array, H-tree wire type: Global wires with 20\% delay penalty\n";
1571        break;
1572    case (4):
1573        cout <<  "    Data array, H-tree wire type: Global wires with 30\% delay penalty\n";
1574        break;
1575    case (5):
1576        cout <<  "    Data array, wire type: Low swing wires\n";
1577        break;
1578    default:
1579        cout << "ERROR - Unknown wire type " << (int) fr->data_array2->wt << endl;
1580        exit(0);
1581    }
1582
1583    if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) {
1584        switch (fr->tag_array2->wt) {
1585        case (0):
1586            cout <<  "    Tag array, H-tree wire type: Delay optimized global wires\n";
1587            break;
1588        case (1):
1589            cout <<  "    Tag array, H-tree wire type: Global wires with 5\% delay penalty\n";
1590            break;
1591        case (2):
1592            cout <<  "    Tag array, H-tree wire type: Global wires with 10\% delay penalty\n";
1593            break;
1594        case (3):
1595            cout <<  "    Tag array, H-tree wire type: Global wires with 20\% delay penalty\n";
1596            break;
1597        case (4):
1598            cout <<  "    Tag array, H-tree wire type: Global wires with 30\% delay penalty\n";
1599            break;
1600        case (5):
1601            cout <<  "    Tag array, wire type: Low swing wires\n";
1602            break;
1603        default:
1604            cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt << endl;
1605            exit(-1);
1606        }
1607    }
1608
1609    if (g_ip->print_detail) {
1610        /* Delay stats */
1611        /* data array stats */
1612        cout << endl << "Time Components:" << endl << endl;
1613
1614        cout << "  Data side (with Output driver) (ns): " <<
1615             fr->data_array2->access_time / 1e-9 << endl;
1616
1617        cout <<  "\tH-tree input delay (ns): " <<
1618             fr->data_array2->delay_route_to_bank * 1e9 +
1619             fr->data_array2->delay_input_htree * 1e9 << endl;
1620
1621        if (!(g_ip->pure_cam || g_ip->fully_assoc)) {
1622            cout <<  "\tDecoder + wordline delay (ns): " <<
1623                 fr->data_array2->delay_row_predecode_driver_and_block * 1e9 +
1624                 fr->data_array2->delay_row_decoder * 1e9 << endl;
1625        } else {
1626            cout <<  "\tCAM search delay (ns): " <<
1627                 fr->data_array2->delay_matchlines * 1e9 << endl;
1628        }
1629
1630        cout <<  "\tBitline delay (ns): " <<
1631             fr->data_array2->delay_bitlines / 1e-9 << endl;
1632
1633        cout <<  "\tSense Amplifier delay (ns): " <<
1634             fr->data_array2->delay_sense_amp * 1e9 << endl;
1635
1636
1637        cout <<  "\tH-tree output delay (ns): " <<
1638             fr->data_array2->delay_subarray_output_driver * 1e9 +
1639             fr->data_array2->delay_dout_htree * 1e9 << endl;
1640
1641        if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
1642            !g_ip->is_main_mem) {
1643            /* tag array stats */
1644            cout << endl << "  Tag side (with Output driver) (ns): " <<
1645                 fr->tag_array2->access_time / 1e-9 << endl;
1646
1647            cout <<  "\tH-tree input delay (ns): " <<
1648                 fr->tag_array2->delay_route_to_bank * 1e9 +
1649                 fr->tag_array2->delay_input_htree * 1e9 << endl;
1650
1651            cout <<  "\tDecoder + wordline delay (ns): " <<
1652                 fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 +
1653                 fr->tag_array2->delay_row_decoder * 1e9 << endl;
1654
1655            cout <<  "\tBitline delay (ns): " <<
1656                 fr->tag_array2->delay_bitlines / 1e-9 << endl;
1657
1658            cout <<  "\tSense Amplifier delay (ns): " <<
1659                 fr->tag_array2->delay_sense_amp * 1e9 << endl;
1660
1661            cout <<  "\tComparator delay (ns): " <<
1662                 fr->tag_array2->delay_comparator * 1e9 << endl;
1663
1664            cout <<  "\tH-tree output delay (ns): " <<
1665                 fr->tag_array2->delay_subarray_output_driver * 1e9 +
1666                 fr->tag_array2->delay_dout_htree * 1e9 << endl;
1667        }
1668
1669
1670
1671        /* Energy/Power stats */
1672        cout << endl << endl << "Power Components:" << endl << endl;
1673
1674        if (!(g_ip->pure_cam || g_ip->fully_assoc)) {
1675            cout << "  Data array: Total dynamic read energy/access  (nJ): " <<
1676                 fr->data_array2->power.readOp.dynamic * 1e9 << endl;
1677            cout << "\tTotal leakage read/write power of a bank (mW): " <<
1678                 fr->data_array2->power.readOp.leakage * 1e3 << endl;
1679
1680            cout << "\tTotal energy in H-tree (that includes both "
1681                 "address and data transfer) (nJ): " <<
1682                 (fr->data_array2->power_addr_input_htree.readOp.dynamic +
1683                  fr->data_array2->power_data_output_htree.readOp.dynamic +
1684                  fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
1685
1686            cout << "\tTotal leakage power in H-tree (that includes both "
1687                "address and data network) ((mW)): " <<
1688                (fr->data_array2->power_addr_input_htree.readOp.leakage +
1689                 fr->data_array2->power_data_output_htree.readOp.leakage +
1690                 fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3
1691                 << endl;
1692
1693            cout << "\tTotal gate leakage power in H-tree (that includes both "
1694                "address and data network) ((mW)): " <<
1695                (fr->data_array2->power_addr_input_htree.readOp.gate_leakage +
1696                 fr->data_array2->power_data_output_htree.readOp.gate_leakage +
1697                 fr->data_array2->power_routing_to_bank.readOp.gate_leakage) *
1698                1e3 << endl;
1699
1700            cout << "\tOutput Htree inside bank Energy (nJ): " <<
1701                 fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
1702            cout <<  "\tDecoder (nJ): " <<
1703                 fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
1704                 fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
1705            cout <<  "\tWordline (nJ): " <<
1706                 fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
1707            cout <<  "\tBitline mux & associated drivers (nJ): " <<
1708                 fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
1709                 fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
1710                 fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
1711            cout <<  "\tSense amp mux & associated drivers (nJ): " <<
1712                 fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
1713                 fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
1714                 fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9  +
1715                 fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
1716                 fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
1717                 fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
1718
1719            cout <<  "\tBitlines precharge and equalization circuit (nJ): " <<
1720                 fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
1721            cout <<  "\tBitlines (nJ): " <<
1722                 fr->data_array2->power_bitlines.readOp.dynamic * 1e9 << endl;
1723            cout <<  "\tSense amplifier energy (nJ): " <<
1724                 fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
1725            cout <<  "\tSub-array output driver (nJ): " <<
1726                 fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
1727        }
1728
1729        else if (g_ip->pure_cam) {
1730
1731            cout << "  CAM array:" << endl;
1732            cout << "  Total dynamic associative search energy/access  (nJ): " <<
1733                 fr->data_array2->power.searchOp.dynamic * 1e9 << endl;
1734            cout << "\tTotal energy in H-tree (that includes both "
1735                 "match key and data transfer) (nJ): " <<
1736                 (fr->data_array2->power_htree_in_search.searchOp.dynamic +
1737                  fr->data_array2->power_htree_out_search.searchOp.dynamic +
1738                  fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
1739            cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
1740                 (fr->data_array2->power_htree_in_search.searchOp.dynamic +
1741                  fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
1742            cout <<  "\tSearchlines (nJ): " <<
1743                 fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
1744                 fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
1745            cout <<  "\tMatchlines  (nJ): " <<
1746                 fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
1747                 fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
1748            cout <<  "\tSub-array output driver (nJ): " <<
1749                 fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
1750
1751
1752            cout << endl << "  Total dynamic read energy/access  (nJ): " <<
1753                 fr->data_array2->power.readOp.dynamic * 1e9 << endl;
1754            cout << "\tTotal energy in H-tree (that includes both "
1755                 "address and data transfer) (nJ): " <<
1756                 (fr->data_array2->power_addr_input_htree.readOp.dynamic +
1757                  fr->data_array2->power_data_output_htree.readOp.dynamic +
1758                  fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
1759            cout << "\tOutput Htree inside bank Energy (nJ): " <<
1760                 fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
1761            cout <<  "\tDecoder (nJ): " <<
1762                 fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
1763                 fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
1764            cout <<  "\tWordline (nJ): " <<
1765                 fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
1766            cout <<  "\tBitline mux & associated drivers (nJ): " <<
1767                 fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
1768                 fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
1769                 fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
1770            cout <<  "\tSense amp mux & associated drivers (nJ): " <<
1771                 fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
1772                 fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
1773                 fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9  +
1774                 fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
1775                 fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
1776                 fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
1777            cout <<  "\tBitlines (nJ): " <<
1778                 fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
1779                 fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
1780            cout <<  "\tSense amplifier energy (nJ): " <<
1781                 fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
1782            cout <<  "\tSub-array output driver (nJ): " <<
1783                 fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
1784
1785            cout << endl << "  Total leakage power of a bank (mW): " <<
1786                 fr->data_array2->power.readOp.leakage * 1e3 << endl;
1787        } else {
1788            cout << "  Fully associative array:" << endl;
1789            cout << "  Total dynamic associative search energy/access  (nJ): " <<
1790                 fr->data_array2->power.searchOp.dynamic * 1e9 << endl;
1791            cout << "\tTotal energy in H-tree (that includes both "
1792                 "match key and data transfer) (nJ): " <<
1793                 (fr->data_array2->power_htree_in_search.searchOp.dynamic +
1794                  fr->data_array2->power_htree_out_search.searchOp.dynamic +
1795                  fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
1796            cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
1797                 (fr->data_array2->power_htree_in_search.searchOp.dynamic +
1798                  fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
1799            cout <<  "\tSearchlines (nJ): " <<
1800                 fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
1801                 fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
1802            cout <<  "\tMatchlines  (nJ): " <<
1803                 fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
1804                 fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
1805            cout <<  "\tData portion wordline (nJ): " <<
1806                 fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 << endl;
1807            cout <<  "\tData Bitlines (nJ): " <<
1808                 fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 +
1809                 fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9 << endl;
1810            cout <<  "\tSense amplifier energy (nJ): " <<
1811                 fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 << endl;
1812            cout <<  "\tSub-array output driver (nJ): " <<
1813                 fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
1814
1815
1816            cout << endl << "  Total dynamic read energy/access  (nJ): " <<
1817                 fr->data_array2->power.readOp.dynamic * 1e9 << endl;
1818            cout << "\tTotal energy in H-tree (that includes both "
1819                 "address and data transfer) (nJ): " <<
1820                 (fr->data_array2->power_addr_input_htree.readOp.dynamic +
1821                  fr->data_array2->power_data_output_htree.readOp.dynamic +
1822                  fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
1823            cout << "\tOutput Htree inside bank Energy (nJ): " <<
1824                 fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
1825            cout <<  "\tDecoder (nJ): " <<
1826                 fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
1827                 fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
1828            cout <<  "\tWordline (nJ): " <<
1829                 fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
1830            cout <<  "\tBitline mux & associated drivers (nJ): " <<
1831                 fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
1832                 fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
1833                 fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
1834            cout <<  "\tSense amp mux & associated drivers (nJ): " <<
1835                 fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
1836                 fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
1837                 fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9  +
1838                 fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
1839                 fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
1840                 fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
1841            cout <<  "\tBitlines (nJ): " <<
1842                 fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
1843                 fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
1844            cout <<  "\tSense amplifier energy (nJ): " <<
1845                 fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
1846            cout <<  "\tSub-array output driver (nJ): " <<
1847                 fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
1848
1849            cout << endl << "  Total leakage power of a bank (mW): " <<
1850                 fr->data_array2->power.readOp.leakage * 1e3 << endl;
1851        }
1852
1853
1854        if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
1855            !g_ip->is_main_mem) {
1856            cout << endl << "  Tag array:  Total dynamic read energy/access (nJ): " <<
1857                 fr->tag_array2->power.readOp.dynamic * 1e9 << endl;
1858            cout << "\tTotal leakage read/write power of a bank (mW): " <<
1859                 fr->tag_array2->power.readOp.leakage * 1e3 << endl;
1860            cout << "\tTotal energy in H-tree (that includes both "
1861                 "address and data transfer) (nJ): " <<
1862                 (fr->tag_array2->power_addr_input_htree.readOp.dynamic +
1863                  fr->tag_array2->power_data_output_htree.readOp.dynamic +
1864                  fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
1865
1866            cout << "\tTotal leakage power in H-tree (that includes both "
1867                "address and data network) ((mW)): " <<
1868                (fr->tag_array2->power_addr_input_htree.readOp.leakage +
1869                 fr->tag_array2->power_data_output_htree.readOp.leakage +
1870                 fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3
1871                 << endl;
1872
1873            cout << "\tTotal gate leakage power in H-tree (that includes both "
1874                "address and data network) ((mW)): " <<
1875                (fr->tag_array2->power_addr_input_htree.readOp.gate_leakage +
1876                 fr->tag_array2->power_data_output_htree.readOp.gate_leakage +
1877                 fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) *
1878                1e3 << endl;
1879
1880            cout << "\tOutput Htree inside a bank Energy (nJ): " <<
1881                 fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
1882            cout <<  "\tDecoder (nJ): " <<
1883                 fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
1884                 fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
1885            cout <<  "\tWordline (nJ): " <<
1886                 fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
1887            cout <<  "\tBitline mux & associated drivers (nJ): " <<
1888                 fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
1889                 fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
1890                 fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
1891            cout <<  "\tSense amp mux & associated drivers (nJ): " <<
1892                 fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
1893                 fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
1894                 fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9  +
1895                 fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
1896                 fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
1897                 fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
1898            cout <<  "\tBitlines precharge and equalization circuit (nJ): " <<
1899                 fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
1900            cout <<  "\tBitlines (nJ): " <<
1901                 fr->tag_array2->power_bitlines.readOp.dynamic * 1e9 << endl;
1902            cout <<  "\tSense amplifier energy (nJ): " <<
1903                 fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
1904            cout <<  "\tSub-array output driver (nJ): " <<
1905                 fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
1906        }
1907
1908        cout << endl << endl <<  "Area Components:" << endl << endl;
1909        /* Data array area stats */
1910        if (!(g_ip->pure_cam || g_ip->fully_assoc))
1911            cout <<  "  Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
1912        else if (g_ip->pure_cam)
1913            cout <<  "  CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
1914        else
1915            cout <<  "  Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
1916        cout <<  "\tHeight (mm): " <<
1917             fr->data_array2->all_banks_height*1e-3 << endl;
1918        cout <<  "\tWidth (mm): " <<
1919             fr->data_array2->all_banks_width*1e-3 << endl;
1920        if (g_ip->print_detail) {
1921            cout <<  "\tArea efficiency (Memory cell area/Total area) - " <<
1922                 fr->data_array2->area_efficiency << " %" << endl;
1923            cout << "\t\tMAT Height (mm): " <<
1924                 fr->data_array2->mat_height*1e-3 << endl;
1925            cout << "\t\tMAT Length (mm): " <<
1926                 fr->data_array2->mat_length*1e-3 << endl;
1927            cout << "\t\tSubarray Height (mm): " <<
1928                 fr->data_array2->subarray_height*1e-3 << endl;
1929            cout << "\t\tSubarray Length (mm): " <<
1930                 fr->data_array2->subarray_length*1e-3 << endl;
1931        }
1932
1933        /* Tag array area stats */
1934        if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
1935            !g_ip->is_main_mem) {
1936            cout << endl << "  Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl;
1937            cout <<  "\tHeight (mm): " <<
1938                 fr->tag_array2->all_banks_height*1e-3 << endl;
1939            cout <<  "\tWidth (mm): " <<
1940                 fr->tag_array2->all_banks_width*1e-3 << endl;
1941            if (g_ip->print_detail) {
1942                cout <<  "\tArea efficiency (Memory cell area/Total area) - " <<
1943                     fr->tag_array2->area_efficiency << " %" << endl;
1944                cout << "\t\tMAT Height (mm): " <<
1945                     fr->tag_array2->mat_height*1e-3 << endl;
1946                cout << "\t\tMAT Length (mm): " <<
1947                     fr->tag_array2->mat_length*1e-3 << endl;
1948                cout << "\t\tSubarray Height (mm): " <<
1949                     fr->tag_array2->subarray_height*1e-3 << endl;
1950                cout << "\t\tSubarray Length (mm): " <<
1951                     fr->tag_array2->subarray_length*1e-3 << endl;
1952            }
1953        }
1954        Wire wpr;
1955        wpr.print_wire();
1956    }
1957}
1958
1959//McPAT's plain interface, please keep !!!
1960uca_org_t cacti_interface(InputParameter * const local_interface) {
1961    uca_org_t fin_res;
1962    fin_res.valid = false;
1963
1964    g_ip = local_interface;
1965
1966    if (!g_ip->error_checking()) {
1967        exit(0);
1968    }
1969
1970    init_tech_params(g_ip->F_sz_um, false);
1971    Wire winit; // Do not delete this line. It initializes wires.
1972
1973    solve(&fin_res);
1974
1975    return fin_res;
1976}
1977
1978//McPAT's plain interface, please keep !!!
1979uca_org_t init_interface(InputParameter* const local_interface,
1980                         const string &name) {
1981    uca_org_t fin_res;
1982    fin_res.valid = false;
1983
1984    g_ip = local_interface;
1985
1986    if (!g_ip->error_checking(name)) {
1987        exit(0);
1988    }
1989
1990    init_tech_params(g_ip->F_sz_um, false);
1991    Wire winit; // Do not delete this line. It initializes wires.
1992    return fin_res;
1993}
1994
1995void reconfigure(InputParameter *local_interface, uca_org_t *fin_res)
1996{
1997  // Copy the InputParameter to global interface (g_ip) and do error checking.
1998  g_ip = local_interface;
1999  g_ip->error_checking();
2000
2001  // Initialize technology parameters
2002  init_tech_params(g_ip->F_sz_um,false);
2003
2004  Wire winit; // Do not delete this line. It initializes wires.
2005
2006  // This corresponds to solve() in the initialization process.
2007  update(fin_res);
2008}
2009