1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. 5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. 6 * All Rights Reserved 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are 10 * met: redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer; 12 * redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution; 15 * neither the name of the copyright holders nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 ***************************************************************************/ 32 33 34 35#include <fstream> 36#include <iostream> 37#include <sstream> 38 39#include "Ucache.h" 40#include "arbiter.h" 41#include "area.h" 42#include "basic_circuit.h" 43#include "crossbar.h" 44#include "io.h" 45#include "nuca.h" 46#include "parameter.h" 47//#include "highradix.h" 48 49using namespace std; 50 51 52/* Parses "cache.cfg" file */ 53void 54InputParameter::parse_cfg(const string & in_file) { 55 FILE *fp = fopen(in_file.c_str(), "r"); 56 char line[5000]; 57 char jk[5000]; 58 char temp_var[5000]; 59 60 if (!fp) { 61 cout << in_file << " is missing!\n"; 62 exit(-1); 63 } 64 65 while (fscanf(fp, "%[^\n]\n", line) != EOF) { 66 67 if (!strncmp("-size", line, strlen("-size"))) { 68 sscanf(line, "-size %[(:-~)*]%u", jk, &(cache_sz)); 69 continue; 70 } 71 72 if (!strncmp("-page size", line, strlen("-page size"))) { 73 sscanf(line, "-page size %[(:-~)*]%u", jk, &(page_sz_bits)); 74 continue; 75 } 76 77 if (!strncmp("-burst length", line, strlen("-burst length"))) { 78 sscanf(line, "-burst %[(:-~)*]%u", jk, &(burst_len)); 79 continue; 80 } 81 82 if (!strncmp("-internal prefetch width", line, strlen("-internal prefetch width"))) { 83 sscanf(line, "-internal prefetch %[(:-~)*]%u", jk, &(int_prefetch_w)); 84 continue; 85 } 86 87 if (!strncmp("-block", line, strlen("-block"))) { 88 sscanf(line, "-block size (bytes) %d", &(line_sz)); 89 continue; 90 } 91 92 if (!strncmp("-associativity", line, strlen("-associativity"))) { 93 sscanf(line, "-associativity %d", &(assoc)); 94 continue; 95 } 96 97 if (!strncmp("-read-write", line, strlen("-read-write"))) { 98 sscanf(line, "-read-write port %d", &(num_rw_ports)); 99 continue; 100 } 101 102 if (!strncmp("-exclusive read", line, strlen("exclusive read"))) { 103 sscanf(line, "-exclusive read port %d", &(num_rd_ports)); 104 continue; 105 } 106 107 if (!strncmp("-exclusive write", line, strlen("-exclusive write"))) { 108 sscanf(line, "-exclusive write port %d", &(num_wr_ports)); 109 continue; 110 } 111 112 if (!strncmp("-single ended", line, strlen("-single ended"))) { 113 sscanf(line, "-single %[(:-~)*]%d", jk, 114 &(num_se_rd_ports)); 115 continue; 116 } 117 118 if (!strncmp("-search", line, strlen("-search"))) { 119 sscanf(line, "-search port %d", &(num_search_ports)); 120 continue; 121 } 122 123 if (!strncmp("-UCA bank", line, strlen("-UCA bank"))) { 124 sscanf(line, "-UCA bank%[((:-~)| )*]%d", jk, &(nbanks)); 125 continue; 126 } 127 128 if (!strncmp("-technology", line, strlen("-technology"))) { 129 sscanf(line, "-technology (u) %lf", &(F_sz_um)); 130 F_sz_nm = F_sz_um * 1000; 131 continue; 132 } 133 134 if (!strncmp("-output/input", line, strlen("-output/input"))) { 135 sscanf(line, "-output/input bus %[(:-~)*]%d", jk, &(out_w)); 136 continue; 137 } 138 139 if (!strncmp("-operating temperature", line, strlen("-operating temperature"))) { 140 sscanf(line, "-operating temperature %[(:-~)*]%d", jk, &(temp)); 141 continue; 142 } 143 144 if (!strncmp("-cache type", line, strlen("-cache type"))) { 145 sscanf(line, "-cache type%[^\"]\"%[^\"]\"", jk, temp_var); 146 147 if (!strncmp("cache", temp_var, sizeof("cache"))) { 148 is_cache = true; 149 } else { 150 is_cache = false; 151 } 152 153 if (!strncmp("main memory", temp_var, sizeof("main memory"))) { 154 is_main_mem = true; 155 } else { 156 is_main_mem = false; 157 } 158 159 if (!strncmp("cam", temp_var, sizeof("cam"))) { 160 pure_cam = true; 161 } else { 162 pure_cam = false; 163 } 164 165 if (!strncmp("ram", temp_var, sizeof("ram"))) { 166 pure_ram = true; 167 } else { 168 if (!is_main_mem) 169 pure_ram = false; 170 else 171 pure_ram = true; 172 } 173 174 continue; 175 } 176 177 178 if (!strncmp("-tag size", line, strlen("-tag size"))) { 179 sscanf(line, "-tag size%[^\"]\"%[^\"]\"", jk, temp_var); 180 if (!strncmp("default", temp_var, sizeof("default"))) { 181 specific_tag = false; 182 tag_w = 42; /* the acutal value is calculated 183 * later based on the cache size, bank count, and associativity 184 */ 185 } else { 186 specific_tag = true; 187 sscanf(line, "-tag size (b) %d", &(tag_w)); 188 } 189 continue; 190 } 191 192 if (!strncmp("-access mode", line, strlen("-access mode"))) { 193 sscanf(line, "-access %[^\"]\"%[^\"]\"", jk, temp_var); 194 if (!strncmp("fast", temp_var, strlen("fast"))) { 195 access_mode = 2; 196 } else if (!strncmp("sequential", temp_var, strlen("sequential"))) { 197 access_mode = 1; 198 } else if (!strncmp("normal", temp_var, strlen("normal"))) { 199 access_mode = 0; 200 } else { 201 cout << "ERROR: Invalid access mode!\n"; 202 exit(0); 203 } 204 continue; 205 } 206 207 if (!strncmp("-Data array cell type", line, 208 strlen("-Data array cell type"))) { 209 sscanf(line, "-Data array cell type %[^\"]\"%[^\"]\"", jk, temp_var); 210 211 if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { 212 data_arr_ram_cell_tech_type = 0; 213 } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { 214 data_arr_ram_cell_tech_type = 1; 215 } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { 216 data_arr_ram_cell_tech_type = 2; 217 } else if (!strncmp("lp-dram", temp_var, strlen("lp-dram"))) { 218 data_arr_ram_cell_tech_type = 3; 219 } else if (!strncmp("comm-dram", temp_var, strlen("comm-dram"))) { 220 data_arr_ram_cell_tech_type = 4; 221 } else { 222 cout << "ERROR: Invalid type!\n"; 223 exit(0); 224 } 225 continue; 226 } 227 228 if (!strncmp("-Data array peripheral type", line, strlen("-Data array peripheral type"))) { 229 sscanf(line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var); 230 231 if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { 232 data_arr_peri_global_tech_type = 0; 233 } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { 234 data_arr_peri_global_tech_type = 1; 235 } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { 236 data_arr_peri_global_tech_type = 2; 237 } else { 238 cout << "ERROR: Invalid type!\n"; 239 exit(0); 240 } 241 continue; 242 } 243 244 if (!strncmp("-Tag array cell type", line, strlen("-Tag array cell type"))) { 245 sscanf(line, "-Tag array cell type %[^\"]\"%[^\"]\"", jk, temp_var); 246 247 if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { 248 tag_arr_ram_cell_tech_type = 0; 249 } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { 250 tag_arr_ram_cell_tech_type = 1; 251 } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { 252 tag_arr_ram_cell_tech_type = 2; 253 } else if (!strncmp("lp-dram", temp_var, strlen("lp-dram"))) { 254 tag_arr_ram_cell_tech_type = 3; 255 } else if (!strncmp("comm-dram", temp_var, strlen("comm-dram"))) { 256 tag_arr_ram_cell_tech_type = 4; 257 } else { 258 cout << "ERROR: Invalid type!\n"; 259 exit(0); 260 } 261 continue; 262 } 263 264 if (!strncmp("-Tag array peripheral type", line, strlen("-Tag array peripheral type"))) { 265 sscanf(line, "-Tag array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var); 266 267 if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { 268 tag_arr_peri_global_tech_type = 0; 269 } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { 270 tag_arr_peri_global_tech_type = 1; 271 } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { 272 tag_arr_peri_global_tech_type = 2; 273 } else { 274 cout << "ERROR: Invalid type!\n"; 275 exit(0); 276 } 277 continue; 278 } 279 if (!strncmp("-design", line, strlen("-design"))) { 280 sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, 281 &(delay_wt), &(dynamic_power_wt), 282 &(leakage_power_wt), 283 &(cycle_time_wt), &(area_wt)); 284 continue; 285 } 286 287 if (!strncmp("-deviate", line, strlen("-deviate"))) { 288 sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, 289 &(delay_dev), &(dynamic_power_dev), 290 &(leakage_power_dev), 291 &(cycle_time_dev), &(area_dev)); 292 continue; 293 } 294 295 if (!strncmp("-Optimize", line, strlen("-Optimize"))) { 296 sscanf(line, "-Optimize %[^\"]\"%[^\"]\"", jk, temp_var); 297 298 if (!strncmp("ED^2", temp_var, strlen("ED^2"))) { 299 ed = 2; 300 } else if (!strncmp("ED", temp_var, strlen("ED"))) { 301 ed = 1; 302 } else { 303 ed = 0; 304 } 305 } 306 307 if (!strncmp("-NUCAdesign", line, strlen("-NUCAdesign"))) { 308 sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, 309 &(delay_wt_nuca), &(dynamic_power_wt_nuca), 310 &(leakage_power_wt_nuca), 311 &(cycle_time_wt_nuca), &(area_wt_nuca)); 312 continue; 313 } 314 315 if (!strncmp("-NUCAdeviate", line, strlen("-NUCAdeviate"))) { 316 sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, 317 &(delay_dev_nuca), &(dynamic_power_dev_nuca), 318 &(leakage_power_dev_nuca), 319 &(cycle_time_dev_nuca), &(area_dev_nuca)); 320 continue; 321 } 322 323 if (!strncmp("-Cache model", line, strlen("-cache model"))) { 324 sscanf(line, "-Cache model %[^\"]\"%[^\"]\"", jk, temp_var); 325 326 if (!strncmp("UCA", temp_var, strlen("UCA"))) { 327 nuca = 0; 328 } else { 329 nuca = 1; 330 } 331 continue; 332 } 333 334 if (!strncmp("-NUCA bank", line, strlen("-NUCA bank"))) { 335 sscanf(line, "-NUCA bank count %d", &(nuca_bank_count)); 336 337 if (nuca_bank_count != 0) { 338 force_nuca_bank = 1; 339 } 340 continue; 341 } 342 343 if (!strncmp("-Wire inside mat", line, strlen("-Wire inside mat"))) { 344 sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var); 345 346 if (!strncmp("global", temp_var, strlen("global"))) { 347 wire_is_mat_type = 2; 348 continue; 349 } else if (!strncmp("local", temp_var, strlen("local"))) { 350 wire_is_mat_type = 0; 351 continue; 352 } else { 353 wire_is_mat_type = 1; 354 continue; 355 } 356 } 357 358 if (!strncmp("-Wire outside mat", line, strlen("-Wire outside mat"))) { 359 sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var); 360 361 if (!strncmp("global", temp_var, strlen("global"))) { 362 wire_os_mat_type = 2; 363 } else { 364 wire_os_mat_type = 1; 365 } 366 continue; 367 } 368 369 if (!strncmp("-Interconnect projection", line, strlen("-Interconnect projection"))) { 370 sscanf(line, "-Interconnect projection%[^\"]\"%[^\"]\"", jk, temp_var); 371 372 if (!strncmp("aggressive", temp_var, strlen("aggressive"))) { 373 ic_proj_type = 0; 374 } else { 375 ic_proj_type = 1; 376 } 377 continue; 378 } 379 380 if (!strncmp("-Wire signalling", line, strlen("-wire signalling"))) { 381 sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var); 382 383 if (!strncmp("default", temp_var, strlen("default"))) { 384 force_wiretype = 0; 385 wt = Global; 386 } else if (!(strncmp("Global_10", temp_var, strlen("Global_10")))) { 387 force_wiretype = 1; 388 wt = Global_10; 389 } else if (!(strncmp("Global_20", temp_var, strlen("Global_20")))) { 390 force_wiretype = 1; 391 wt = Global_20; 392 } else if (!(strncmp("Global_30", temp_var, strlen("Global_30")))) { 393 force_wiretype = 1; 394 wt = Global_30; 395 } else if (!(strncmp("Global_5", temp_var, strlen("Global_5")))) { 396 force_wiretype = 1; 397 wt = Global_5; 398 } else if (!(strncmp("Global", temp_var, strlen("Global")))) { 399 force_wiretype = 1; 400 wt = Global; 401 } else { 402 wt = Low_swing; 403 force_wiretype = 1; 404 } 405 continue; 406 } 407 408 409 410 if (!strncmp("-Core", line, strlen("-Core"))) { 411 sscanf(line, "-Core count %d\n", &(cores)); 412 if (cores > 16) { 413 printf("No. of cores should be less than 16!\n"); 414 } 415 continue; 416 } 417 418 if (!strncmp("-Cache level", line, strlen("-Cache level"))) { 419 sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var); 420 if (!strncmp("L2", temp_var, strlen("L2"))) { 421 cache_level = 0; 422 } else { 423 cache_level = 1; 424 } 425 } 426 427 if (!strncmp("-Print level", line, strlen("-Print level"))) { 428 sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var); 429 if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) { 430 print_detail = 1; 431 } else { 432 print_detail = 0; 433 } 434 435 } 436 if (!strncmp("-Add ECC", line, strlen("-Add ECC"))) { 437 sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var); 438 if (!strncmp("true", temp_var, strlen("true"))) { 439 add_ecc_b_ = true; 440 } else { 441 add_ecc_b_ = false; 442 } 443 } 444 445 if (!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) { 446 sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var); 447 if (!strncmp("true", temp_var, strlen("true"))) { 448 print_input_args = true; 449 } else { 450 print_input_args = false; 451 } 452 } 453 454 if (!strncmp("-Force cache config", line, strlen("-Force cache config"))) { 455 sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var); 456 if (!strncmp("true", temp_var, strlen("true"))) { 457 force_cache_config = true; 458 } else { 459 force_cache_config = false; 460 } 461 } 462 463 if (!strncmp("-Ndbl", line, strlen("-Ndbl"))) { 464 sscanf(line, "-Ndbl %d\n", &(ndbl)); 465 continue; 466 } 467 if (!strncmp("-Ndwl", line, strlen("-Ndwl"))) { 468 sscanf(line, "-Ndwl %d\n", &(ndwl)); 469 continue; 470 } 471 if (!strncmp("-Nspd", line, strlen("-Nspd"))) { 472 sscanf(line, "-Nspd %d\n", &(nspd)); 473 continue; 474 } 475 if (!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) { 476 sscanf(line, "-Ndsam1 %d\n", &(ndsam1)); 477 continue; 478 } 479 if (!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) { 480 sscanf(line, "-Ndsam2 %d\n", &(ndsam2)); 481 continue; 482 } 483 if (!strncmp("-Ndcm", line, strlen("-Ndcm"))) { 484 sscanf(line, "-Ndcm %d\n", &(ndcm)); 485 continue; 486 } 487 488 } 489 rpters_in_htree = true; 490 fclose(fp); 491} 492 493void 494InputParameter::display_ip() { 495 cout << "Cache size : " << cache_sz << endl; 496 cout << "Block size : " << line_sz << endl; 497 cout << "Associativity : " << assoc << endl; 498 cout << "Read only ports : " << num_rd_ports << endl; 499 cout << "Write only ports : " << num_wr_ports << endl; 500 cout << "Read write ports : " << num_rw_ports << endl; 501 cout << "Single ended read ports : " << num_se_rd_ports << endl; 502 if (fully_assoc || pure_cam) { 503 cout << "Search ports : " << num_search_ports << endl; 504 } 505 cout << "Cache banks (UCA) : " << nbanks << endl; 506 cout << "Technology : " << F_sz_um << endl; 507 cout << "Temperature : " << temp << endl; 508 cout << "Tag size : " << tag_w << endl; 509 if (is_cache) { 510 cout << "array type : " << "Cache" << endl; 511 } 512 if (pure_ram) { 513 cout << "array type : " << "Scratch RAM" << endl; 514 } 515 if (pure_cam) { 516 cout << "array type : " << "CAM" << endl; 517 } 518 cout << "Model as memory : " << is_main_mem << endl; 519 cout << "Access mode : " << access_mode << endl; 520 cout << "Data array cell type : " << data_arr_ram_cell_tech_type << endl; 521 cout << "Data array peripheral type : " << data_arr_peri_global_tech_type << endl; 522 cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type << endl; 523 cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type << endl; 524 cout << "Optimization target : " << ed << endl; 525 cout << "Design objective (UCA wt) : " << delay_wt << " " 526 << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt 527 << " " << area_wt << endl; 528 cout << "Design objective (UCA dev) : " << delay_dev << " " 529 << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev 530 << " " << area_dev << endl; 531 if (nuca) { 532 cout << "Cores : " << cores << endl; 533 534 535 cout << "Design objective (NUCA wt) : " << delay_wt_nuca << " " 536 << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca 537 << " " << area_wt_nuca << endl; 538 cout << "Design objective (NUCA dev) : " << delay_dev_nuca << " " 539 << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca 540 << " " << area_dev_nuca << endl; 541 } 542 cout << "Cache model : " << nuca << endl; 543 cout << "Nuca bank : " << nuca_bank_count << endl; 544 cout << "Wire inside mat : " << wire_is_mat_type << endl; 545 cout << "Wire outside mat : " << wire_os_mat_type << endl; 546 cout << "Interconnect projection : " << ic_proj_type << endl; 547 cout << "Wire signalling : " << force_wiretype << endl; 548 cout << "Print level : " << print_detail << endl; 549 cout << "ECC overhead : " << add_ecc_b_ << endl; 550 cout << "Page size : " << page_sz_bits << endl; 551 cout << "Burst length : " << burst_len << endl; 552 cout << "Internal prefetch width : " << int_prefetch_w << endl; 553 cout << "Force cache config : " << g_ip->force_cache_config << endl; 554 if (g_ip->force_cache_config) { 555 cout << "Ndwl : " << g_ip->ndwl << endl; 556 cout << "Ndbl : " << g_ip->ndbl << endl; 557 cout << "Nspd : " << g_ip->nspd << endl; 558 cout << "Ndcm : " << g_ip->ndcm << endl; 559 cout << "Ndsam1 : " << g_ip->ndsam1 << endl; 560 cout << "Ndsam2 : " << g_ip->ndsam2 << endl; 561 } 562} 563 564 565 566powerComponents operator+(const powerComponents & x, const powerComponents & y) { 567 powerComponents z; 568 569 z.dynamic = x.dynamic + y.dynamic; 570 z.leakage = x.leakage + y.leakage; 571 z.gate_leakage = x.gate_leakage + y.gate_leakage; 572 z.short_circuit = x.short_circuit + y.short_circuit; 573 z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage; 574 575 return z; 576} 577 578powerComponents operator*(const powerComponents & x, double const * const y) { 579 powerComponents z; 580 581 z.dynamic = x.dynamic * y[0]; 582 z.leakage = x.leakage * y[1]; 583 z.gate_leakage = x.gate_leakage * y[2]; 584 z.short_circuit = x.short_circuit * y[3]; 585 //longer channel leakage has the same behavior as normal leakage 586 z.longer_channel_leakage = x.longer_channel_leakage * y[1]; 587 588 return z; 589} 590 591 592powerDef operator+(const powerDef & x, const powerDef & y) { 593 powerDef z; 594 595 z.readOp = x.readOp + y.readOp; 596 z.writeOp = x.writeOp + y.writeOp; 597 z.searchOp = x.searchOp + y.searchOp; 598 return z; 599} 600 601powerDef operator*(const powerDef & x, double const * const y) { 602 powerDef z; 603 604 z.readOp = x.readOp * y; 605 z.writeOp = x.writeOp * y; 606 z.searchOp = x.searchOp * y; 607 return z; 608} 609 610uca_org_t cacti_interface(const string & infile_name) { 611 612 uca_org_t fin_res; 613 //uca_org_t result; 614 fin_res.valid = false; 615 616 g_ip = new InputParameter(); 617 g_ip->parse_cfg(infile_name); 618 if (!g_ip->error_checking(infile_name)) 619 exit(0); 620 if (g_ip->print_input_args) 621 g_ip->display_ip(); 622 623 init_tech_params(g_ip->F_sz_um, false); 624 Wire winit; // Do not delete this line. It initializes wires. 625 626 627// For HighRadix Only 628// //// Wire wirea(g_ip->wt, 1000); 629// //// wirea.print_wire(); 630// //// cout << "Wire Area " << wirea.area.get_area() << " sq. u" << endl; 631// // winit.print_wire(); 632// // 633// HighRadix *hr; 634// hr = new HighRadix(); 635// hr->compute_power(); 636// hr->print_router(); 637// exit(0); 638// 639// double sub_switch_sz = 2; 640// double rows = 32; 641// for (int i=0; i<6; i++) { 642// sub_switch_sz = pow(2, i); 643// rows = 64/sub_switch_sz; 644// hr = new HighRadix(sub_switch_sz, rows, .8/* freq */, 64, 2, 64, 0.7); 645// hr->compute_power(); 646// hr->print_router(); 647// delete hr; 648// } 649// // HighRadix yarc; 650// // yarc.compute_power(); 651// // yarc.print_router(); 652// winit.print_wire(); 653// exit(0); 654// For HighRadix Only End 655 656 if (g_ip->nuca == 1) { 657 Nuca n(&g_tp.peri_global); 658 n.sim_nuca(); 659 } 660 g_ip->display_ip(); 661 solve(&fin_res); 662 663 output_UCA(&fin_res); 664 output_data_csv(fin_res); 665 666 delete (g_ip); 667 return fin_res; 668} 669 670//cacti6.5's plain interface, please keep !!! 671uca_org_t cacti_interface( 672 int cache_size, 673 int line_size, 674 int associativity, 675 int rw_ports, 676 int excl_read_ports, 677 int excl_write_ports, 678 int single_ended_read_ports, 679 int banks, 680 double tech_node, // in nm 681 int page_sz, 682 int burst_length, 683 int pre_width, 684 int output_width, 685 int specific_tag, 686 int tag_width, 687 int access_mode, //0 normal, 1 seq, 2 fast 688 int cache, //scratch ram or cache 689 int main_mem, 690 int obj_func_delay, 691 int obj_func_dynamic_power, 692 int obj_func_leakage_power, 693 int obj_func_area, 694 int obj_func_cycle_time, 695 int dev_func_delay, 696 int dev_func_dynamic_power, 697 int dev_func_leakage_power, 698 int dev_func_area, 699 int dev_func_cycle_time, 700 int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate 701 int temp, 702 int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing 703 int data_arr_ram_cell_tech_flavor_in, // 0-4 704 int data_arr_peri_global_tech_flavor_in, 705 int tag_arr_ram_cell_tech_flavor_in, 706 int tag_arr_peri_global_tech_flavor_in, 707 int interconnect_projection_type_in, // 0 - aggressive, 1 - normal 708 int wire_inside_mat_type_in, 709 int wire_outside_mat_type_in, 710 int is_nuca, // 0 - UCA, 1 - NUCA 711 int core_count, 712 int cache_level, // 0 - L2, 1 - L3 713 int nuca_bank_count, 714 int nuca_obj_func_delay, 715 int nuca_obj_func_dynamic_power, 716 int nuca_obj_func_leakage_power, 717 int nuca_obj_func_area, 718 int nuca_obj_func_cycle_time, 719 int nuca_dev_func_delay, 720 int nuca_dev_func_dynamic_power, 721 int nuca_dev_func_leakage_power, 722 int nuca_dev_func_area, 723 int nuca_dev_func_cycle_time, 724 int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported 725 int p_input) { 726 g_ip = new InputParameter(); 727 g_ip->add_ecc_b_ = true; 728 729 g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; 730 g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; 731 g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; 732 g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; 733 734 g_ip->ic_proj_type = interconnect_projection_type_in; 735 g_ip->wire_is_mat_type = wire_inside_mat_type_in; 736 g_ip->wire_os_mat_type = wire_outside_mat_type_in; 737 g_ip->burst_len = burst_length; 738 g_ip->int_prefetch_w = pre_width; 739 g_ip->page_sz_bits = page_sz; 740 741 g_ip->cache_sz = cache_size; 742 g_ip->line_sz = line_size; 743 g_ip->assoc = associativity; 744 g_ip->nbanks = banks; 745 g_ip->out_w = output_width; 746 g_ip->specific_tag = specific_tag; 747 if (tag_width == 0) { 748 g_ip->tag_w = 42; 749 } else { 750 g_ip->tag_w = tag_width; 751 } 752 753 g_ip->access_mode = access_mode; 754 g_ip->delay_wt = obj_func_delay; 755 g_ip->dynamic_power_wt = obj_func_dynamic_power; 756 g_ip->leakage_power_wt = obj_func_leakage_power; 757 g_ip->area_wt = obj_func_area; 758 g_ip->cycle_time_wt = obj_func_cycle_time; 759 g_ip->delay_dev = dev_func_delay; 760 g_ip->dynamic_power_dev = dev_func_dynamic_power; 761 g_ip->leakage_power_dev = dev_func_leakage_power; 762 g_ip->area_dev = dev_func_area; 763 g_ip->cycle_time_dev = dev_func_cycle_time; 764 g_ip->ed = ed_ed2_none; 765 766 switch (wt) { 767 case (0): 768 g_ip->force_wiretype = 0; 769 g_ip->wt = Global; 770 break; 771 case (1): 772 g_ip->force_wiretype = 1; 773 g_ip->wt = Global; 774 break; 775 case (2): 776 g_ip->force_wiretype = 1; 777 g_ip->wt = Global_5; 778 break; 779 case (3): 780 g_ip->force_wiretype = 1; 781 g_ip->wt = Global_10; 782 break; 783 case (4): 784 g_ip->force_wiretype = 1; 785 g_ip->wt = Global_20; 786 break; 787 case (5): 788 g_ip->force_wiretype = 1; 789 g_ip->wt = Global_30; 790 break; 791 case (6): 792 g_ip->force_wiretype = 1; 793 g_ip->wt = Low_swing; 794 break; 795 default: 796 cout << "Unknown wire type!\n"; 797 exit(0); 798 } 799 800 g_ip->delay_wt_nuca = nuca_obj_func_delay; 801 g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power; 802 g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power; 803 g_ip->area_wt_nuca = nuca_obj_func_area; 804 g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time; 805 g_ip->delay_dev_nuca = dev_func_delay; 806 g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power; 807 g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power; 808 g_ip->area_dev_nuca = nuca_dev_func_area; 809 g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time; 810 g_ip->nuca = is_nuca; 811 g_ip->nuca_bank_count = nuca_bank_count; 812 if (nuca_bank_count > 0) { 813 g_ip->force_nuca_bank = 1; 814 } 815 g_ip->cores = core_count; 816 g_ip->cache_level = cache_level; 817 818 g_ip->temp = temp; 819 820 g_ip->F_sz_nm = tech_node; 821 g_ip->F_sz_um = tech_node / 1000; 822 g_ip->is_main_mem = (main_mem != 0) ? true : false; 823 g_ip->is_cache = (cache != 0) ? true : false; 824 g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; 825 826 g_ip->num_rw_ports = rw_ports; 827 g_ip->num_rd_ports = excl_read_ports; 828 g_ip->num_wr_ports = excl_write_ports; 829 g_ip->num_se_rd_ports = single_ended_read_ports; 830 g_ip->print_detail = 1; 831 g_ip->nuca = 0; 832 833 g_ip->wt = Global_5; 834 g_ip->force_cache_config = false; 835 g_ip->force_wiretype = false; 836 g_ip->print_input_args = p_input; 837 838 839 uca_org_t fin_res; 840 fin_res.valid = false; 841 842 if (g_ip->error_checking() == false) exit(0); 843 if (g_ip->print_input_args) 844 g_ip->display_ip(); 845 init_tech_params(g_ip->F_sz_um, false); 846 Wire winit; // Do not delete this line. It initializes wires. 847 848 if (g_ip->nuca == 1) { 849 Nuca n(&g_tp.peri_global); 850 n.sim_nuca(); 851 } 852 solve(&fin_res); 853 854 output_UCA(&fin_res); 855 856 delete (g_ip); 857 return fin_res; 858} 859 860//McPAT's plain interface, please keep !!! 861uca_org_t cacti_interface( 862 int cache_size, 863 int line_size, 864 int associativity, 865 int rw_ports, 866 int excl_read_ports,// para5 867 int excl_write_ports, 868 int single_ended_read_ports, 869 int search_ports, 870 int banks, 871 double tech_node,//para10 872 int output_width, 873 int specific_tag, 874 int tag_width, 875 int access_mode, 876 int cache, //para15 877 int main_mem, 878 int obj_func_delay, 879 int obj_func_dynamic_power, 880 int obj_func_leakage_power, 881 int obj_func_cycle_time, //para20 882 int obj_func_area, 883 int dev_func_delay, 884 int dev_func_dynamic_power, 885 int dev_func_leakage_power, 886 int dev_func_area, //para25 887 int dev_func_cycle_time, 888 int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate 889 int temp, 890 int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing 891 int data_arr_ram_cell_tech_flavor_in,//para30 892 int data_arr_peri_global_tech_flavor_in, 893 int tag_arr_ram_cell_tech_flavor_in, 894 int tag_arr_peri_global_tech_flavor_in, 895 int interconnect_projection_type_in, 896 int wire_inside_mat_type_in,//para35 897 int wire_outside_mat_type_in, 898 int REPEATERS_IN_HTREE_SEGMENTS_in, 899 int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, 900 int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, 901 int PAGE_SIZE_BITS_in,//para40 902 int BURST_LENGTH_in, 903 int INTERNAL_PREFETCH_WIDTH_in, 904 int force_wiretype, 905 int wiretype, 906 int force_config,//para45 907 int ndwl, 908 int ndbl, 909 int nspd, 910 int ndcm, 911 int ndsam1,//para50 912 int ndsam2, 913 int ecc) { 914 g_ip = new InputParameter(); 915 916 uca_org_t fin_res; 917 fin_res.valid = false; 918 919 g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; 920 g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; 921 g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; 922 g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; 923 924 g_ip->ic_proj_type = interconnect_projection_type_in; 925 g_ip->wire_is_mat_type = wire_inside_mat_type_in; 926 g_ip->wire_os_mat_type = wire_outside_mat_type_in; 927 g_ip->burst_len = BURST_LENGTH_in; 928 g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; 929 g_ip->page_sz_bits = PAGE_SIZE_BITS_in; 930 931 g_ip->cache_sz = cache_size; 932 g_ip->line_sz = line_size; 933 g_ip->assoc = associativity; 934 g_ip->nbanks = banks; 935 g_ip->out_w = output_width; 936 g_ip->specific_tag = specific_tag; 937 if (specific_tag == 0) { 938 g_ip->tag_w = 42; 939 } else { 940 g_ip->tag_w = tag_width; 941 } 942 943 g_ip->access_mode = access_mode; 944 g_ip->delay_wt = obj_func_delay; 945 g_ip->dynamic_power_wt = obj_func_dynamic_power; 946 g_ip->leakage_power_wt = obj_func_leakage_power; 947 g_ip->area_wt = obj_func_area; 948 g_ip->cycle_time_wt = obj_func_cycle_time; 949 g_ip->delay_dev = dev_func_delay; 950 g_ip->dynamic_power_dev = dev_func_dynamic_power; 951 g_ip->leakage_power_dev = dev_func_leakage_power; 952 g_ip->area_dev = dev_func_area; 953 g_ip->cycle_time_dev = dev_func_cycle_time; 954 g_ip->temp = temp; 955 g_ip->ed = ed_ed2_none; 956 957 g_ip->F_sz_nm = tech_node; 958 g_ip->F_sz_um = tech_node / 1000; 959 g_ip->is_main_mem = (main_mem != 0) ? true : false; 960 g_ip->is_cache = (cache == 1) ? true : false; 961 g_ip->pure_ram = (cache == 0) ? true : false; 962 g_ip->pure_cam = (cache == 2) ? true : false; 963 g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; 964 g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; 965 g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; 966 967 g_ip->num_rw_ports = rw_ports; 968 g_ip->num_rd_ports = excl_read_ports; 969 g_ip->num_wr_ports = excl_write_ports; 970 g_ip->num_se_rd_ports = single_ended_read_ports; 971 g_ip->num_search_ports = search_ports; 972 973 g_ip->print_detail = 1; 974 g_ip->nuca = 0; 975 976 if (force_wiretype == 0) { 977 g_ip->wt = Global; 978 g_ip->force_wiretype = false; 979 } else { 980 g_ip->force_wiretype = true; 981 if (wiretype == 10) { 982 g_ip->wt = Global_10; 983 } 984 if (wiretype == 20) { 985 g_ip->wt = Global_20; 986 } 987 if (wiretype == 30) { 988 g_ip->wt = Global_30; 989 } 990 if (wiretype == 5) { 991 g_ip->wt = Global_5; 992 } 993 if (wiretype == 0) { 994 g_ip->wt = Low_swing; 995 } 996 } 997 //g_ip->wt = Global_5; 998 if (force_config == 0) { 999 g_ip->force_cache_config = false; 1000 } else { 1001 g_ip->force_cache_config = true; 1002 g_ip->ndbl = ndbl; 1003 g_ip->ndwl = ndwl; 1004 g_ip->nspd = nspd; 1005 g_ip->ndcm = ndcm; 1006 g_ip->ndsam1 = ndsam1; 1007 g_ip->ndsam2 = ndsam2; 1008 1009 1010 } 1011 1012 if (ecc == 0) { 1013 g_ip->add_ecc_b_ = false; 1014 } else { 1015 g_ip->add_ecc_b_ = true; 1016 } 1017 1018 1019 if (!g_ip->error_checking()) 1020 exit(0); 1021 1022 init_tech_params(g_ip->F_sz_um, false); 1023 Wire winit; // Do not delete this line. It initializes wires. 1024 1025 g_ip->display_ip(); 1026 solve(&fin_res); 1027 output_UCA(&fin_res); 1028 output_data_csv(fin_res); 1029 delete (g_ip); 1030 1031 return fin_res; 1032} 1033 1034 1035 1036bool InputParameter::error_checking(string name) { 1037 int A; 1038 bool seq_access = false; 1039 fast_access = true; 1040 1041 switch (access_mode) { 1042 case 0: 1043 seq_access = false; 1044 fast_access = false; 1045 break; 1046 case 1: 1047 seq_access = true; 1048 fast_access = false; 1049 break; 1050 case 2: 1051 seq_access = false; 1052 fast_access = true; 1053 break; 1054 } 1055 1056 if (is_main_mem) { 1057 if (ic_proj_type == 0) { 1058 cerr << name 1059 << ": DRAM model supports only conservative interconnect " 1060 << "projection but is set to aggressive!\n\n"; 1061 return false; 1062 } 1063 } 1064 1065 1066 uint32_t B = line_sz; 1067 1068 if (B < 1) { 1069 cerr << name << ": Block size must be >= 1, but is set to " << B 1070 << endl; 1071 return false; 1072 } else if (B*8 < out_w) { 1073 cerr << name << ": Block size must be at least " << out_w / 8 1074 << ", but is set to " << B << endl; 1075 return false; 1076 } 1077 1078 if (F_sz_um <= 0) { 1079 cerr << name << ": Feature size must be > 0, but is set to " 1080 << F_sz_um << endl; 1081 return false; 1082 } else if (F_sz_um > 0.091) { 1083 cerr << name << ": Feature size must be <= 90 nm, but is set to " 1084 << F_sz_um << endl; 1085 return false; 1086 } 1087 1088 1089 uint32_t RWP = num_rw_ports; 1090 uint32_t ERP = num_rd_ports; 1091 uint32_t EWP = num_wr_ports; 1092 uint32_t NSER = num_se_rd_ports; 1093 uint32_t SCHP = num_search_ports; 1094 1095//TODO: revisit this. This is an important feature. Sheng thought this should be used 1096// // If multiple banks and multiple ports are specified, then if number of ports is less than or equal to 1097// // the number of banks, we assume that the multiple ports are implemented via the multiple banks. 1098// // In such a case we assume that each bank has 1 RWP port. 1099// if ((RWP + ERP + EWP) <= nbanks && nbanks>1) 1100// { 1101// RWP = 1; 1102// ERP = 0; 1103// EWP = 0; 1104// NSER = 0; 1105// } 1106// else if ((RWP < 0) || (EWP < 0) || (ERP < 0)) 1107// { 1108// cerr << "Ports must >=0" << endl; 1109// return false; 1110// } 1111// else if (RWP > 2) 1112// { 1113// cerr << "Maximum of 2 read/write ports" << endl; 1114// return false; 1115// } 1116// else if ((RWP+ERP+EWP) < 1) 1117 // Changed to new implementation: 1118 // The number of ports specified at input is per bank 1119 if ((RWP + ERP + EWP) < 1) { 1120 cerr << name << ": Must have at least one port" << endl; 1121 return false; 1122 } 1123 1124 if (is_pow2(nbanks) == false) { 1125 cerr << name << ": Number of subbanks should be greater than or " 1126 << "equal to 1 and should be a power of 2, but is set to " 1127 << nbanks << endl; 1128 return false; 1129 } 1130 1131 int C = cache_sz / nbanks; 1132 if (C < 64) { 1133 cerr << name << ": Cache size must be >=64, but is set to " << C 1134 << endl; 1135 return false; 1136 } 1137 1138//TODO: revisit this 1139// if (pure_ram==true && assoc!=1) 1140// { 1141// cerr << "Pure RAM must have assoc as 1" << endl; 1142// return false; 1143// } 1144 1145 //fully assoc and cam check 1146 if (is_cache && assoc == 0) 1147 fully_assoc = true; 1148 else 1149 fully_assoc = false; 1150 1151 if (pure_cam == true && assoc != 0) { 1152 cerr << name 1153 << ": Pure CAM must have associativity as 0, but is set to" 1154 << assoc << endl; 1155 return false; 1156 } 1157 1158 if (assoc == 0 && (pure_cam == false && is_cache == false)) { 1159 cerr << name 1160 << ": Only CAM or Fully associative cache can have associativity " 1161 << "as 0" << endl; 1162 return false; 1163 } 1164 1165 if ((fully_assoc == true || pure_cam == true) 1166 && (data_arr_ram_cell_tech_type != tag_arr_ram_cell_tech_type 1167 || data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type)) { 1168 cerr << name 1169 << ": CAM and fully associative cache must have same device type " 1170 << "for both data and tag array" << endl; 1171 cerr << "\tData array RAM cell = " << data_arr_ram_cell_tech_type 1172 << ", Tag array RAM cell = " << tag_arr_ram_cell_tech_type << endl 1173 << "\tData array peripheral = " << data_arr_peri_global_tech_type 1174 << ", Tag array peripheral = " << tag_arr_peri_global_tech_type 1175 << endl; 1176 return false; 1177 } 1178 1179 if ((fully_assoc == true || pure_cam == true) 1180 && (data_arr_ram_cell_tech_type == lp_dram || 1181 data_arr_ram_cell_tech_type == comm_dram)) { 1182 cerr << name << ": DRAM based CAM and fully associative cache are not " 1183 << "supported" << endl; 1184 return false; 1185 } 1186 1187 if ((fully_assoc == true || pure_cam == true) 1188 && (is_main_mem == true)) { 1189 cerr << name 1190 << ": CAM and fully associative cache cannot be as main memory" 1191 << endl; 1192 return false; 1193 } 1194 1195 if ((fully_assoc || pure_cam) && SCHP < 1) { 1196 cerr << name 1197 << ": CAM and fully associative must have at least 1 search port," 1198 << " but are set to " << SCHP << endl; 1199 return false; 1200 } 1201 1202 if (RWP == 0 && ERP == 0 && SCHP > 0 && ((fully_assoc || pure_cam))) { 1203 ERP = SCHP; 1204 } 1205 1206// if ((!(fully_assoc || pure_cam)) && SCHP>=1) 1207// { 1208// cerr << "None CAM and fully associative cannot have search ports" << endl; 1209// return false; 1210// } 1211 1212 if (assoc == 0) { 1213 A = C / B; 1214 //fully_assoc = true; 1215 } else { 1216 if (assoc == 1) { 1217 A = 1; 1218 //fully_assoc = false; 1219 } else { 1220 //fully_assoc = false; 1221 A = assoc; 1222 if (is_pow2(A) == false) { 1223 cerr << name 1224 << ": Associativity must be a power of 2, but is set to " 1225 << A << endl; 1226 return false; 1227 } 1228 } 1229 } 1230 1231 if (C / (B*A) <= 1 && assoc != 0) { 1232 cerr << name << ": Number of sets (" << (C / (B * A)) 1233 << ") is too small: " << endl; 1234 cerr << " Need to either increase cache size, or decrease " 1235 << "associativity or block size" << endl; 1236 cerr << " (or use fully associative cache)" << endl; 1237 return false; 1238 } 1239 1240 block_sz = B; 1241 1242 /*dt: testing sequential access mode*/ 1243 if (seq_access) { 1244 tag_assoc = A; 1245 data_assoc = 1; 1246 is_seq_acc = true; 1247 } else { 1248 tag_assoc = A; 1249 data_assoc = A; 1250 is_seq_acc = false; 1251 } 1252 1253 if (assoc == 0) { 1254 data_assoc = 1; 1255 } 1256 num_rw_ports = RWP; 1257 num_rd_ports = ERP; 1258 num_wr_ports = EWP; 1259 num_se_rd_ports = NSER; 1260 if (!(fully_assoc || pure_cam)) 1261 num_search_ports = 0; 1262 nsets = C / (B * A); 1263 1264 if (temp < 300 || temp > 400 || temp % 10 != 0) { 1265 cerr << name << ": " << temp 1266 << " Temperature must be between 300 and 400 Kelvin and multiple " 1267 << "of 10." << endl; 1268 return false; 1269 } 1270 1271 if (nsets < 1) { 1272 cerr << name << ": Less than one set..." << endl; 1273 return false; 1274 } 1275 1276 return true; 1277} 1278 1279 1280 1281void output_data_csv(const uca_org_t & fin_res) { 1282 //TODO: the csv output should remain 1283 fstream file("out.csv", ios::in); 1284 bool print_index = file.fail(); 1285 file.close(); 1286 1287 file.open("out.csv", ios::out | ios::app); 1288 if (file.fail() == true) { 1289 cerr << "File out.csv could not be opened successfully" << endl; 1290 } else { 1291 if (print_index == true) { 1292 file << "Tech node (nm), "; 1293 file << "Capacity (bytes), "; 1294 file << "Number of banks, "; 1295 file << "Associativity, "; 1296 file << "Output width (bits), "; 1297 file << "Access time (ns), "; 1298 file << "Random cycle time (ns), "; 1299 file << "Dynamic search energy (nJ), "; 1300 file << "Dynamic read energy (nJ), "; 1301 file << "Dynamic write energy (nJ), "; 1302 file << "Standby leakage per bank(mW), "; 1303 file << "Area (mm2), "; 1304 file << "Ndwl, "; 1305 file << "Ndbl, "; 1306 file << "Nspd, "; 1307 file << "Ndcm, "; 1308 file << "Ndsam_level_1, "; 1309 file << "Ndsam_level_2, "; 1310 file << "Data arrary area efficiency %, "; 1311 file << "Ntwl, "; 1312 file << "Ntbl, "; 1313 file << "Ntspd, "; 1314 file << "Ntcm, "; 1315 file << "Ntsam_level_1, "; 1316 file << "Ntsam_level_2, "; 1317 file << "Tag arrary area efficiency %, "; 1318 1319// file << "Resistance per unit micron (ohm-micron), "; 1320// file << "Capacitance per unit micron (fF per micron), "; 1321// file << "Unit-length wire delay (ps), "; 1322// file << "FO4 delay (ps), "; 1323// file << "delay route to bank (including crossb delay) (ps), "; 1324// file << "Crossbar delay (ps), "; 1325// file << "Dyn read energy per access from closed page (nJ), "; 1326// file << "Dyn read energy per access from open page (nJ), "; 1327// file << "Leak power of an subbank with page closed (mW), "; 1328// file << "Leak power of a subbank with page open (mW), "; 1329// file << "Leak power of request and reply networks (mW), "; 1330// file << "Number of subbanks, "; 1331// file << "Page size in bits, "; 1332// file << "Activate power, "; 1333// file << "Read power, "; 1334// file << "Write power, "; 1335// file << "Precharge power, "; 1336// file << "tRCD, "; 1337// file << "CAS latency, "; 1338// file << "Precharge delay, "; 1339// file << "Perc dyn energy bitlines, "; 1340// file << "perc dyn energy wordlines, "; 1341// file << "perc dyn energy outside mat, "; 1342// file << "Area opt (perc), "; 1343// file << "Delay opt (perc), "; 1344// file << "Repeater opt (perc), "; 1345// file << "Aspect ratio"; 1346 file << endl; 1347 } 1348 file << g_ip->F_sz_nm << ", "; 1349 file << g_ip->cache_sz << ", "; 1350 file << g_ip->nbanks << ", "; 1351 file << g_ip->tag_assoc << ", "; 1352 file << g_ip->out_w << ", "; 1353 file << fin_res.access_time*1e+9 << ", "; 1354 file << fin_res.cycle_time*1e+9 << ", "; 1355// file << fin_res.data_array2->multisubbank_interleave_cycle_time*1e+9 << ", "; 1356// file << fin_res.data_array2->delay_request_network*1e+9 << ", "; 1357// file << fin_res.data_array2->delay_inside_mat*1e+9 << ", "; 1358// file << fin_res.data_array2.delay_reply_network*1e+9 << ", "; 1359 1360// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) 1361// { 1362// file << fin_res.tag_array2->access_time*1e+9 << ", "; 1363// } 1364// else 1365// { 1366// file << 0 << ", "; 1367// } 1368// file << fin_res.data_array2->access_time*1e+9 << ", "; 1369// file << fin_res.data_array2->dram_refresh_period*1e+6 << ", "; 1370// file << fin_res.data_array2->dram_array_availability << ", "; 1371 if (g_ip->fully_assoc || g_ip->pure_cam) { 1372 file << fin_res.power.searchOp.dynamic*1e+9 << ", "; 1373 } else { 1374 file << "N/A" << ", "; 1375 } 1376 file << fin_res.power.readOp.dynamic*1e+9 << ", "; 1377 file << fin_res.power.writeOp.dynamic*1e+9 << ", "; 1378// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) 1379// { 1380// file << fin_res.tag_array2->power.readOp.dynamic*1e+9 << ", "; 1381// } 1382// else 1383// { 1384// file << "NA" << ", "; 1385// } 1386// file << fin_res.data_array2->power.readOp.dynamic*1e+9 << ", "; 1387// if (g_ip->fully_assoc || g_ip->pure_cam) 1388// { 1389// file << fin_res.power.searchOp.dynamic*1000/fin_res.cycle_time << ", "; 1390// } 1391// else 1392// { 1393// file << fin_res.power.readOp.dynamic*1000/fin_res.cycle_time << ", "; 1394// } 1395 1396 file <<( fin_res.power.readOp.leakage + fin_res.power.readOp.gate_leakage )*1000 << ", "; 1397// file << fin_res.leak_power_with_sleep_transistors_in_mats*1000 << ", "; 1398// file << fin_res.data_array.refresh_power / fin_res.data_array.total_power.readOp.leakage << ", "; 1399 file << fin_res.area*1e-6 << ", "; 1400 1401 file << fin_res.data_array2->Ndwl << ", "; 1402 file << fin_res.data_array2->Ndbl << ", "; 1403 file << fin_res.data_array2->Nspd << ", "; 1404 file << fin_res.data_array2->deg_bl_muxing << ", "; 1405 file << fin_res.data_array2->Ndsam_lev_1 << ", "; 1406 file << fin_res.data_array2->Ndsam_lev_2 << ", "; 1407 file << fin_res.data_array2->area_efficiency << ", "; 1408 if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) { 1409 file << fin_res.tag_array2->Ndwl << ", "; 1410 file << fin_res.tag_array2->Ndbl << ", "; 1411 file << fin_res.tag_array2->Nspd << ", "; 1412 file << fin_res.tag_array2->deg_bl_muxing << ", "; 1413 file << fin_res.tag_array2->Ndsam_lev_1 << ", "; 1414 file << fin_res.tag_array2->Ndsam_lev_2 << ", "; 1415 file << fin_res.tag_array2->area_efficiency << ", "; 1416 } else { 1417 file << "N/A" << ", "; 1418 file << "N/A"<< ", "; 1419 file << "N/A" << ", "; 1420 file << "N/A" << ", "; 1421 file << "N/A" << ", "; 1422 file << "N/A" << ", "; 1423 file << "N/A" << ", "; 1424 } 1425 1426// file << g_tp.wire_inside_mat.R_per_um << ", "; 1427// file << g_tp.wire_inside_mat.C_per_um / 1e-15 << ", "; 1428// file << g_tp.unit_len_wire_del / 1e-12 << ", "; 1429// file << g_tp.FO4 / 1e-12 << ", "; 1430// file << fin_res.data_array.delay_route_to_bank / 1e-9 << ", "; 1431// file << fin_res.data_array.delay_crossbar / 1e-9 << ", "; 1432// file << fin_res.data_array.dyn_read_energy_from_closed_page / 1e-9 << ", "; 1433// file << fin_res.data_array.dyn_read_energy_from_open_page / 1e-9 << ", "; 1434// file << fin_res.data_array.leak_power_subbank_closed_page / 1e-3 << ", "; 1435// file << fin_res.data_array.leak_power_subbank_open_page / 1e-3 << ", "; 1436// file << fin_res.data_array.leak_power_request_and_reply_networks / 1e-3 << ", "; 1437// file << fin_res.data_array.number_subbanks << ", " ; 1438// file << fin_res.data_array.page_size_in_bits << ", " ; 1439// file << fin_res.data_array.activate_energy * 1e9 << ", " ; 1440// file << fin_res.data_array.read_energy * 1e9 << ", " ; 1441// file << fin_res.data_array.write_energy * 1e9 << ", " ; 1442// file << fin_res.data_array.precharge_energy * 1e9 << ", " ; 1443// file << fin_res.data_array.trcd * 1e9 << ", " ; 1444// file << fin_res.data_array.cas_latency * 1e9 << ", " ; 1445// file << fin_res.data_array.precharge_delay * 1e9 << ", " ; 1446// file << fin_res.data_array.all_banks_height / fin_res.data_array.all_banks_width; 1447 file<<endl; 1448 } 1449 file.close(); 1450} 1451 1452 1453 1454void output_UCA(uca_org_t *fr) { 1455 // if (NUCA) 1456 if (0) { 1457 cout << "\n\n Detailed Bank Stats:\n"; 1458 cout << " Bank Size (bytes): %d\n" << 1459 (int) (g_ip->cache_sz); 1460 } else { 1461 if (g_ip->data_arr_ram_cell_tech_type == 3) { 1462 cout << "\n---------- CACTI version 6.5, Uniform Cache Access " << 1463 "Logic Process Based DRAM Model ----------\n"; 1464 } else if (g_ip->data_arr_ram_cell_tech_type == 4) { 1465 cout << "\n---------- CACTI version 6.5, Uniform" << 1466 "Cache Access Commodity DRAM Model ----------\n"; 1467 } else { 1468 cout << "\n---------- CACTI version 6.5, Uniform Cache Access " 1469 "SRAM Model ----------\n"; 1470 } 1471 cout << "\nCache Parameters:\n"; 1472 cout << " Total cache size (bytes): " << 1473 (int) (g_ip->cache_sz) << endl; 1474 } 1475 1476 cout << " Number of banks: " << (int) g_ip->nbanks << endl; 1477 if (g_ip->fully_assoc || g_ip->pure_cam) 1478 cout << " Associativity: fully associative\n"; 1479 else { 1480 if (g_ip->tag_assoc == 1) 1481 cout << " Associativity: direct mapped\n"; 1482 else 1483 cout << " Associativity: " << 1484 g_ip->tag_assoc << endl; 1485 } 1486 1487 1488 cout << " Block size (bytes): " << g_ip->line_sz << endl; 1489 cout << " Read/write Ports: " << 1490 g_ip->num_rw_ports << endl; 1491 cout << " Read ports: " << 1492 g_ip->num_rd_ports << endl; 1493 cout << " Write ports: " << 1494 g_ip->num_wr_ports << endl; 1495 if (g_ip->fully_assoc || g_ip->pure_cam) 1496 cout << " search ports: " << 1497 g_ip->num_search_ports << endl; 1498 cout << " Technology size (nm): " << 1499 g_ip->F_sz_nm << endl << endl; 1500 1501 cout << " Access time (ns): " << fr->access_time*1e9 << endl; 1502 cout << " Cycle time (ns): " << fr->cycle_time*1e9 << endl; 1503 if (g_ip->data_arr_ram_cell_tech_type >= 4) { 1504 cout << " Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl; 1505 cout << " Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl; 1506 cout << " Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl; 1507 cout << " Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl; 1508 cout << " Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl; 1509 cout << " Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl; 1510 cout << " Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl; 1511 cout << " Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl; 1512 cout << " Refresh power (mW): " << 1513 fr->data_array2->refresh_power*1e3 << endl; 1514 } else { 1515 if ((g_ip->fully_assoc || g_ip->pure_cam)) { 1516 cout << " Total dynamic associative search energy per access (nJ): " << 1517 fr->power.searchOp.dynamic*1e9 << endl; 1518// cout << " Total dynamic read energy per access (nJ): " << 1519// fr->power.readOp.dynamic*1e9 << endl; 1520// cout << " Total dynamic write energy per access (nJ): " << 1521// fr->power.writeOp.dynamic*1e9 << endl; 1522 } 1523// else 1524// { 1525 cout << " Total dynamic read energy per access (nJ): " << 1526 fr->power.readOp.dynamic*1e9 << endl; 1527 cout << " Total dynamic write energy per access (nJ): " << 1528 fr->power.writeOp.dynamic*1e9 << endl; 1529// } 1530 cout << " Total leakage power of a bank" 1531 " (mW): " << fr->power.readOp.leakage*1e3 << endl; 1532 cout << " Total gate leakage power of a bank" 1533 " (mW): " << fr->power.readOp.gate_leakage*1e3 << endl; 1534 } 1535 1536 if (g_ip->data_arr_ram_cell_tech_type == 3 || g_ip->data_arr_ram_cell_tech_type == 4) { 1537 } 1538 cout << " Cache height x width (mm): " << 1539 fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl << endl; 1540 1541 1542 cout << " Best Ndwl : " << fr->data_array2->Ndwl << endl; 1543 cout << " Best Ndbl : " << fr->data_array2->Ndbl << endl; 1544 cout << " Best Nspd : " << fr->data_array2->Nspd << endl; 1545 cout << " Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl; 1546 cout << " Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl; 1547 cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl; 1548 1549 if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && 1550 !g_ip->is_main_mem) { 1551 cout << " Best Ntwl : " << fr->tag_array2->Ndwl << endl; 1552 cout << " Best Ntbl : " << fr->tag_array2->Ndbl << endl; 1553 cout << " Best Ntspd : " << fr->tag_array2->Nspd << endl; 1554 cout << " Best Ntcm : " << fr->tag_array2->deg_bl_muxing << endl; 1555 cout << " Best Ntsam L1 : " << fr->tag_array2->Ndsam_lev_1 << endl; 1556 cout << " Best Ntsam L2 : " << fr->tag_array2->Ndsam_lev_2 << endl; 1557 } 1558 1559 switch (fr->data_array2->wt) { 1560 case (0): 1561 cout << " Data array, H-tree wire type: Delay optimized global wires\n"; 1562 break; 1563 case (1): 1564 cout << " Data array, H-tree wire type: Global wires with 5\% delay penalty\n"; 1565 break; 1566 case (2): 1567 cout << " Data array, H-tree wire type: Global wires with 10\% delay penalty\n"; 1568 break; 1569 case (3): 1570 cout << " Data array, H-tree wire type: Global wires with 20\% delay penalty\n"; 1571 break; 1572 case (4): 1573 cout << " Data array, H-tree wire type: Global wires with 30\% delay penalty\n"; 1574 break; 1575 case (5): 1576 cout << " Data array, wire type: Low swing wires\n"; 1577 break; 1578 default: 1579 cout << "ERROR - Unknown wire type " << (int) fr->data_array2->wt << endl; 1580 exit(0); 1581 } 1582 1583 if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) { 1584 switch (fr->tag_array2->wt) { 1585 case (0): 1586 cout << " Tag array, H-tree wire type: Delay optimized global wires\n"; 1587 break; 1588 case (1): 1589 cout << " Tag array, H-tree wire type: Global wires with 5\% delay penalty\n"; 1590 break; 1591 case (2): 1592 cout << " Tag array, H-tree wire type: Global wires with 10\% delay penalty\n"; 1593 break; 1594 case (3): 1595 cout << " Tag array, H-tree wire type: Global wires with 20\% delay penalty\n"; 1596 break; 1597 case (4): 1598 cout << " Tag array, H-tree wire type: Global wires with 30\% delay penalty\n"; 1599 break; 1600 case (5): 1601 cout << " Tag array, wire type: Low swing wires\n"; 1602 break; 1603 default: 1604 cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt << endl; 1605 exit(-1); 1606 } 1607 } 1608 1609 if (g_ip->print_detail) { 1610 /* Delay stats */ 1611 /* data array stats */ 1612 cout << endl << "Time Components:" << endl << endl; 1613 1614 cout << " Data side (with Output driver) (ns): " << 1615 fr->data_array2->access_time / 1e-9 << endl; 1616 1617 cout << "\tH-tree input delay (ns): " << 1618 fr->data_array2->delay_route_to_bank * 1e9 + 1619 fr->data_array2->delay_input_htree * 1e9 << endl; 1620 1621 if (!(g_ip->pure_cam || g_ip->fully_assoc)) { 1622 cout << "\tDecoder + wordline delay (ns): " << 1623 fr->data_array2->delay_row_predecode_driver_and_block * 1e9 + 1624 fr->data_array2->delay_row_decoder * 1e9 << endl; 1625 } else { 1626 cout << "\tCAM search delay (ns): " << 1627 fr->data_array2->delay_matchlines * 1e9 << endl; 1628 } 1629 1630 cout << "\tBitline delay (ns): " << 1631 fr->data_array2->delay_bitlines / 1e-9 << endl; 1632 1633 cout << "\tSense Amplifier delay (ns): " << 1634 fr->data_array2->delay_sense_amp * 1e9 << endl; 1635 1636 1637 cout << "\tH-tree output delay (ns): " << 1638 fr->data_array2->delay_subarray_output_driver * 1e9 + 1639 fr->data_array2->delay_dout_htree * 1e9 << endl; 1640 1641 if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && 1642 !g_ip->is_main_mem) { 1643 /* tag array stats */ 1644 cout << endl << " Tag side (with Output driver) (ns): " << 1645 fr->tag_array2->access_time / 1e-9 << endl; 1646 1647 cout << "\tH-tree input delay (ns): " << 1648 fr->tag_array2->delay_route_to_bank * 1e9 + 1649 fr->tag_array2->delay_input_htree * 1e9 << endl; 1650 1651 cout << "\tDecoder + wordline delay (ns): " << 1652 fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 + 1653 fr->tag_array2->delay_row_decoder * 1e9 << endl; 1654 1655 cout << "\tBitline delay (ns): " << 1656 fr->tag_array2->delay_bitlines / 1e-9 << endl; 1657 1658 cout << "\tSense Amplifier delay (ns): " << 1659 fr->tag_array2->delay_sense_amp * 1e9 << endl; 1660 1661 cout << "\tComparator delay (ns): " << 1662 fr->tag_array2->delay_comparator * 1e9 << endl; 1663 1664 cout << "\tH-tree output delay (ns): " << 1665 fr->tag_array2->delay_subarray_output_driver * 1e9 + 1666 fr->tag_array2->delay_dout_htree * 1e9 << endl; 1667 } 1668 1669 1670 1671 /* Energy/Power stats */ 1672 cout << endl << endl << "Power Components:" << endl << endl; 1673 1674 if (!(g_ip->pure_cam || g_ip->fully_assoc)) { 1675 cout << " Data array: Total dynamic read energy/access (nJ): " << 1676 fr->data_array2->power.readOp.dynamic * 1e9 << endl; 1677 cout << "\tTotal leakage read/write power of a bank (mW): " << 1678 fr->data_array2->power.readOp.leakage * 1e3 << endl; 1679 1680 cout << "\tTotal energy in H-tree (that includes both " 1681 "address and data transfer) (nJ): " << 1682 (fr->data_array2->power_addr_input_htree.readOp.dynamic + 1683 fr->data_array2->power_data_output_htree.readOp.dynamic + 1684 fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; 1685 1686 cout << "\tTotal leakage power in H-tree (that includes both " 1687 "address and data network) ((mW)): " << 1688 (fr->data_array2->power_addr_input_htree.readOp.leakage + 1689 fr->data_array2->power_data_output_htree.readOp.leakage + 1690 fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 1691 << endl; 1692 1693 cout << "\tTotal gate leakage power in H-tree (that includes both " 1694 "address and data network) ((mW)): " << 1695 (fr->data_array2->power_addr_input_htree.readOp.gate_leakage + 1696 fr->data_array2->power_data_output_htree.readOp.gate_leakage + 1697 fr->data_array2->power_routing_to_bank.readOp.gate_leakage) * 1698 1e3 << endl; 1699 1700 cout << "\tOutput Htree inside bank Energy (nJ): " << 1701 fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; 1702 cout << "\tDecoder (nJ): " << 1703 fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + 1704 fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; 1705 cout << "\tWordline (nJ): " << 1706 fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; 1707 cout << "\tBitline mux & associated drivers (nJ): " << 1708 fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + 1709 fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + 1710 fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; 1711 cout << "\tSense amp mux & associated drivers (nJ): " << 1712 fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + 1713 fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + 1714 fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + 1715 fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + 1716 fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + 1717 fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; 1718 1719 cout << "\tBitlines precharge and equalization circuit (nJ): " << 1720 fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; 1721 cout << "\tBitlines (nJ): " << 1722 fr->data_array2->power_bitlines.readOp.dynamic * 1e9 << endl; 1723 cout << "\tSense amplifier energy (nJ): " << 1724 fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; 1725 cout << "\tSub-array output driver (nJ): " << 1726 fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; 1727 } 1728 1729 else if (g_ip->pure_cam) { 1730 1731 cout << " CAM array:" << endl; 1732 cout << " Total dynamic associative search energy/access (nJ): " << 1733 fr->data_array2->power.searchOp.dynamic * 1e9 << endl; 1734 cout << "\tTotal energy in H-tree (that includes both " 1735 "match key and data transfer) (nJ): " << 1736 (fr->data_array2->power_htree_in_search.searchOp.dynamic + 1737 fr->data_array2->power_htree_out_search.searchOp.dynamic + 1738 fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl; 1739 cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " << 1740 (fr->data_array2->power_htree_in_search.searchOp.dynamic + 1741 fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl; 1742 cout << "\tSearchlines (nJ): " << 1743 fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + 1744 fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl; 1745 cout << "\tMatchlines (nJ): " << 1746 fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + 1747 fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl; 1748 cout << "\tSub-array output driver (nJ): " << 1749 fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl; 1750 1751 1752 cout << endl << " Total dynamic read energy/access (nJ): " << 1753 fr->data_array2->power.readOp.dynamic * 1e9 << endl; 1754 cout << "\tTotal energy in H-tree (that includes both " 1755 "address and data transfer) (nJ): " << 1756 (fr->data_array2->power_addr_input_htree.readOp.dynamic + 1757 fr->data_array2->power_data_output_htree.readOp.dynamic + 1758 fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; 1759 cout << "\tOutput Htree inside bank Energy (nJ): " << 1760 fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; 1761 cout << "\tDecoder (nJ): " << 1762 fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + 1763 fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; 1764 cout << "\tWordline (nJ): " << 1765 fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; 1766 cout << "\tBitline mux & associated drivers (nJ): " << 1767 fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + 1768 fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + 1769 fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; 1770 cout << "\tSense amp mux & associated drivers (nJ): " << 1771 fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + 1772 fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + 1773 fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + 1774 fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + 1775 fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + 1776 fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; 1777 cout << "\tBitlines (nJ): " << 1778 fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + 1779 fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; 1780 cout << "\tSense amplifier energy (nJ): " << 1781 fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; 1782 cout << "\tSub-array output driver (nJ): " << 1783 fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; 1784 1785 cout << endl << " Total leakage power of a bank (mW): " << 1786 fr->data_array2->power.readOp.leakage * 1e3 << endl; 1787 } else { 1788 cout << " Fully associative array:" << endl; 1789 cout << " Total dynamic associative search energy/access (nJ): " << 1790 fr->data_array2->power.searchOp.dynamic * 1e9 << endl; 1791 cout << "\tTotal energy in H-tree (that includes both " 1792 "match key and data transfer) (nJ): " << 1793 (fr->data_array2->power_htree_in_search.searchOp.dynamic + 1794 fr->data_array2->power_htree_out_search.searchOp.dynamic + 1795 fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl; 1796 cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " << 1797 (fr->data_array2->power_htree_in_search.searchOp.dynamic + 1798 fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl; 1799 cout << "\tSearchlines (nJ): " << 1800 fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + 1801 fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl; 1802 cout << "\tMatchlines (nJ): " << 1803 fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + 1804 fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl; 1805 cout << "\tData portion wordline (nJ): " << 1806 fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 << endl; 1807 cout << "\tData Bitlines (nJ): " << 1808 fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 + 1809 fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9 << endl; 1810 cout << "\tSense amplifier energy (nJ): " << 1811 fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 << endl; 1812 cout << "\tSub-array output driver (nJ): " << 1813 fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl; 1814 1815 1816 cout << endl << " Total dynamic read energy/access (nJ): " << 1817 fr->data_array2->power.readOp.dynamic * 1e9 << endl; 1818 cout << "\tTotal energy in H-tree (that includes both " 1819 "address and data transfer) (nJ): " << 1820 (fr->data_array2->power_addr_input_htree.readOp.dynamic + 1821 fr->data_array2->power_data_output_htree.readOp.dynamic + 1822 fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; 1823 cout << "\tOutput Htree inside bank Energy (nJ): " << 1824 fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; 1825 cout << "\tDecoder (nJ): " << 1826 fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + 1827 fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; 1828 cout << "\tWordline (nJ): " << 1829 fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; 1830 cout << "\tBitline mux & associated drivers (nJ): " << 1831 fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + 1832 fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + 1833 fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; 1834 cout << "\tSense amp mux & associated drivers (nJ): " << 1835 fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + 1836 fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + 1837 fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + 1838 fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + 1839 fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + 1840 fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; 1841 cout << "\tBitlines (nJ): " << 1842 fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + 1843 fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; 1844 cout << "\tSense amplifier energy (nJ): " << 1845 fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; 1846 cout << "\tSub-array output driver (nJ): " << 1847 fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; 1848 1849 cout << endl << " Total leakage power of a bank (mW): " << 1850 fr->data_array2->power.readOp.leakage * 1e3 << endl; 1851 } 1852 1853 1854 if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && 1855 !g_ip->is_main_mem) { 1856 cout << endl << " Tag array: Total dynamic read energy/access (nJ): " << 1857 fr->tag_array2->power.readOp.dynamic * 1e9 << endl; 1858 cout << "\tTotal leakage read/write power of a bank (mW): " << 1859 fr->tag_array2->power.readOp.leakage * 1e3 << endl; 1860 cout << "\tTotal energy in H-tree (that includes both " 1861 "address and data transfer) (nJ): " << 1862 (fr->tag_array2->power_addr_input_htree.readOp.dynamic + 1863 fr->tag_array2->power_data_output_htree.readOp.dynamic + 1864 fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; 1865 1866 cout << "\tTotal leakage power in H-tree (that includes both " 1867 "address and data network) ((mW)): " << 1868 (fr->tag_array2->power_addr_input_htree.readOp.leakage + 1869 fr->tag_array2->power_data_output_htree.readOp.leakage + 1870 fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3 1871 << endl; 1872 1873 cout << "\tTotal gate leakage power in H-tree (that includes both " 1874 "address and data network) ((mW)): " << 1875 (fr->tag_array2->power_addr_input_htree.readOp.gate_leakage + 1876 fr->tag_array2->power_data_output_htree.readOp.gate_leakage + 1877 fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) * 1878 1e3 << endl; 1879 1880 cout << "\tOutput Htree inside a bank Energy (nJ): " << 1881 fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; 1882 cout << "\tDecoder (nJ): " << 1883 fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + 1884 fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; 1885 cout << "\tWordline (nJ): " << 1886 fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; 1887 cout << "\tBitline mux & associated drivers (nJ): " << 1888 fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + 1889 fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + 1890 fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; 1891 cout << "\tSense amp mux & associated drivers (nJ): " << 1892 fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + 1893 fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + 1894 fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + 1895 fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + 1896 fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + 1897 fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; 1898 cout << "\tBitlines precharge and equalization circuit (nJ): " << 1899 fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; 1900 cout << "\tBitlines (nJ): " << 1901 fr->tag_array2->power_bitlines.readOp.dynamic * 1e9 << endl; 1902 cout << "\tSense amplifier energy (nJ): " << 1903 fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; 1904 cout << "\tSub-array output driver (nJ): " << 1905 fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; 1906 } 1907 1908 cout << endl << endl << "Area Components:" << endl << endl; 1909 /* Data array area stats */ 1910 if (!(g_ip->pure_cam || g_ip->fully_assoc)) 1911 cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; 1912 else if (g_ip->pure_cam) 1913 cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; 1914 else 1915 cout << " Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; 1916 cout << "\tHeight (mm): " << 1917 fr->data_array2->all_banks_height*1e-3 << endl; 1918 cout << "\tWidth (mm): " << 1919 fr->data_array2->all_banks_width*1e-3 << endl; 1920 if (g_ip->print_detail) { 1921 cout << "\tArea efficiency (Memory cell area/Total area) - " << 1922 fr->data_array2->area_efficiency << " %" << endl; 1923 cout << "\t\tMAT Height (mm): " << 1924 fr->data_array2->mat_height*1e-3 << endl; 1925 cout << "\t\tMAT Length (mm): " << 1926 fr->data_array2->mat_length*1e-3 << endl; 1927 cout << "\t\tSubarray Height (mm): " << 1928 fr->data_array2->subarray_height*1e-3 << endl; 1929 cout << "\t\tSubarray Length (mm): " << 1930 fr->data_array2->subarray_length*1e-3 << endl; 1931 } 1932 1933 /* Tag array area stats */ 1934 if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && 1935 !g_ip->is_main_mem) { 1936 cout << endl << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl; 1937 cout << "\tHeight (mm): " << 1938 fr->tag_array2->all_banks_height*1e-3 << endl; 1939 cout << "\tWidth (mm): " << 1940 fr->tag_array2->all_banks_width*1e-3 << endl; 1941 if (g_ip->print_detail) { 1942 cout << "\tArea efficiency (Memory cell area/Total area) - " << 1943 fr->tag_array2->area_efficiency << " %" << endl; 1944 cout << "\t\tMAT Height (mm): " << 1945 fr->tag_array2->mat_height*1e-3 << endl; 1946 cout << "\t\tMAT Length (mm): " << 1947 fr->tag_array2->mat_length*1e-3 << endl; 1948 cout << "\t\tSubarray Height (mm): " << 1949 fr->tag_array2->subarray_height*1e-3 << endl; 1950 cout << "\t\tSubarray Length (mm): " << 1951 fr->tag_array2->subarray_length*1e-3 << endl; 1952 } 1953 } 1954 Wire wpr; 1955 wpr.print_wire(); 1956 } 1957} 1958 1959//McPAT's plain interface, please keep !!! 1960uca_org_t cacti_interface(InputParameter * const local_interface) { 1961 uca_org_t fin_res; 1962 fin_res.valid = false; 1963 1964 g_ip = local_interface; 1965 1966 if (!g_ip->error_checking()) { 1967 exit(0); 1968 } 1969 1970 init_tech_params(g_ip->F_sz_um, false); 1971 Wire winit; // Do not delete this line. It initializes wires. 1972 1973 solve(&fin_res); 1974 1975 return fin_res; 1976} 1977 1978//McPAT's plain interface, please keep !!! 1979uca_org_t init_interface(InputParameter* const local_interface, 1980 const string &name) { 1981 uca_org_t fin_res; 1982 fin_res.valid = false; 1983 1984 g_ip = local_interface; 1985 1986 if (!g_ip->error_checking(name)) { 1987 exit(0); 1988 } 1989 1990 init_tech_params(g_ip->F_sz_um, false); 1991 Wire winit; // Do not delete this line. It initializes wires. 1992 return fin_res; 1993} 1994 1995void reconfigure(InputParameter *local_interface, uca_org_t *fin_res) 1996{ 1997 // Copy the InputParameter to global interface (g_ip) and do error checking. 1998 g_ip = local_interface; 1999 g_ip->error_checking(); 2000 2001 // Initialize technology parameters 2002 init_tech_params(g_ip->F_sz_um,false); 2003 2004 Wire winit; // Do not delete this line. It initializes wires. 2005 2006 // This corresponds to solve() in the initialization process. 2007 update(fin_res); 2008} 2009