1/***************************************************************************** 2 * McPAT 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. 5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. 6 * All Rights Reserved 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are 10 * met: redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer; 12 * redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution; 15 * neither the name of the copyright holders nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 ***************************************************************************/ 32 33#include <algorithm> 34#include <cassert> 35#include <cmath> 36#include <iostream> 37#include <sstream> 38#include <string> 39 40#include "basic_circuit.h" 41#include "basic_components.h" 42#include "common.h" 43#include "const.h" 44#include "core.h" 45#include "io.h" 46#include "parameter.h" 47 48int RegFU::RFWIN_ACCESS_MULTIPLIER = 16; 49 50// The five bits are: busy, Issued, Finished, speculative, valid 51int SchedulerU::ROB_STATUS_BITS = 5; 52 53InstFetchU::InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_, 54 const CoreParameters & _core_params, 55 const CoreStatistics & _core_stats, bool exist_) 56 : McPATComponent(_xml_data), icache(NULL), IB(NULL), BTB(NULL), 57 BPT(NULL), ID_inst(NULL), ID_operand(NULL), ID_misc(NULL), 58 interface_ip(*interface_ip_), 59 core_params(_core_params), core_stats(_core_stats), exist(exist_) { 60 if (!exist) return; 61 int idx, tag, data, size, line, assoc, banks; 62 bool is_default = true; 63 64 clockRate = core_params.clockRate; 65 name = "Instruction Fetch Unit"; 66 // Check if there is an icache child: 67 int i; 68 icache = NULL; 69 for( i = 0; i < xml_data->nChildNode("component"); i++ ) { 70 XMLNode* childXML = xml_data->getChildNodePtr("component", &i); 71 XMLCSTR type = childXML->getAttribute("type"); 72 73 if (!type) 74 warnMissingComponentType(childXML->getAttribute("id")); 75 76 STRCMP(type, "CacheUnit") { 77 XMLCSTR name = childXML->getAttribute("name"); 78 if (strcmp(name, "Instruction Cache") == 0 || 79 strcmp(name, "icache") == 0) { 80 icache = new CacheUnit(childXML, &interface_ip); 81 children.push_back(icache); 82 } 83 } 84 } 85 86 set_params_stats(); 87 88 //Instruction buffer 89 data = core_params.instruction_length * core_params.peak_issueW; 90 line = int(ceil(data / BITS_PER_BYTE)); 91 size = core_params.num_hthreads * core_params.instruction_buffer_size * 92 line; 93 if (size < MIN_BUFFER_SIZE) { 94 size = MIN_BUFFER_SIZE; 95 } 96 97 interface_ip.cache_sz = size; 98 interface_ip.line_sz = line; 99 interface_ip.assoc = core_params.instruction_buffer_assoc; 100 interface_ip.nbanks = core_params.instruction_buffer_nbanks; 101 interface_ip.out_w = line * BITS_PER_BYTE; 102 interface_ip.specific_tag = core_params.instruction_buffer_tag_width > 0; 103 interface_ip.tag_w = core_params.instruction_buffer_tag_width; 104 interface_ip.access_mode = Normal; 105 interface_ip.obj_func_dyn_energy = 0; 106 interface_ip.obj_func_dyn_power = 0; 107 interface_ip.obj_func_leak_power = 0; 108 interface_ip.obj_func_cycle_t = 1; 109 interface_ip.num_rw_ports = 110 core_params.number_instruction_fetch_ports; 111 interface_ip.num_rd_ports = 0; 112 interface_ip.num_wr_ports = 0; 113 interface_ip.num_se_rd_ports = 0; 114 interface_ip.num_search_ports = 0; 115 interface_ip.is_cache = false; 116 interface_ip.pure_ram = true; 117 interface_ip.pure_cam = false; 118 interface_ip.throughput = 1.0 / clockRate; 119 interface_ip.latency = 1.0 / clockRate; 120 121 IB = new ArrayST(xml_data, &interface_ip, "Instruction Buffer", 122 Core_device, clockRate, core_params.opt_local, 123 core_params.core_ty); 124 IB->area.set_area(IB->area.get_area() + IB->local_result.area); 125 area.set_area(area.get_area() + IB->local_result.area); 126 127 if (core_params.predictionW > 0) { 128 /* 129 * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged 130 * It is only a cache without all the buffers in the cache controller since it is more like a 131 * look up table than a cache with cache controller. When access miss, no load from other places 132 * such as main memory (not actively fill the misses), it is passively updated under two circumstances: 133 * 1) when BPT@ID stage finds out current is a taken branch while BTB missed 134 * 2) When BPT@ID stage predicts differently than BTB 135 * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid) 136 * 4) when EXEU find out wrong target has been provided from BTB. 137 * 138 */ 139 size = inst_fetch_params.btb_size; 140 line = inst_fetch_params.btb_block_size; 141 assoc = inst_fetch_params.btb_assoc; 142 banks = inst_fetch_params.btb_num_banks; 143 idx = int(ceil(log2(size / line / assoc))); 144 tag = virtual_address_width + int(ceil(log2(core_params.num_hthreads))) 145 + EXTRA_TAG_BITS; 146 147 interface_ip.cache_sz = size; 148 interface_ip.line_sz = line; 149 interface_ip.assoc = assoc; 150 interface_ip.nbanks = banks; 151 interface_ip.out_w = line * BITS_PER_BYTE; 152 interface_ip.specific_tag = tag > 0; 153 interface_ip.tag_w = tag; 154 interface_ip.access_mode = Normal; 155 interface_ip.obj_func_dyn_energy = 0; 156 interface_ip.obj_func_dyn_power = 0; 157 interface_ip.obj_func_leak_power = 0; 158 interface_ip.obj_func_cycle_t = 1; 159 interface_ip.num_rw_ports = 1; 160 interface_ip.num_rd_ports = core_params.predictionW; 161 interface_ip.num_wr_ports = core_params.predictionW; 162 interface_ip.num_se_rd_ports = 0; 163 interface_ip.num_search_ports = 0; 164 interface_ip.is_cache = true; 165 interface_ip.pure_ram = false; 166 interface_ip.pure_cam = false; 167 interface_ip.throughput = inst_fetch_params.btb_throughput / clockRate; 168 interface_ip.latency = inst_fetch_params.btb_latency / clockRate; 169 170 BTB = new ArrayST(xml_data, &interface_ip, "Branch Target Buffer", 171 Core_device, clockRate, core_params.opt_local, 172 core_params.core_ty); 173 area.set_area(area.get_area() + BTB->local_result.area); 174 175 BPT = new BranchPredictor(xml_data, &interface_ip, 176 core_params, core_stats); 177 area.set_area(area.get_area() + BPT->area.get_area()); 178 } 179 180 ID_inst = new InstructionDecoder(xml_data, "Instruction Opcode Decoder", 181 is_default, &interface_ip, 182 core_params.opcode_width, 183 core_params.decodeW, 184 core_params.x86, clockRate, 185 Core_device, core_params.core_ty); 186 187 ID_operand = new InstructionDecoder(xml_data, 188 "Instruction Operand Decoder", 189 is_default, &interface_ip, 190 core_params.arch_ireg_width, 191 core_params.decodeW, 192 core_params.x86, clockRate, 193 Core_device, core_params.core_ty); 194 195 ID_misc = new InstructionDecoder(xml_data, "Instruction Microcode Decoder", 196 is_default, &interface_ip, 197 core_params.micro_opcode_length, 198 core_params.decodeW, 199 core_params.x86, clockRate, 200 Core_device, core_params.core_ty); 201 area.set_area(area.get_area()+ (ID_inst->area.get_area() 202 + ID_operand->area.get_area() 203 + ID_misc->area.get_area()) 204 * core_params.decodeW); 205} 206 207void 208InstFetchU::set_params_stats() { 209 int num_children = xml_data->nChildNode("component"); 210 int i; 211 memset(&inst_fetch_params,0,sizeof(InstFetchParameters)); 212 for (i = 0; i < num_children; i++) { 213 XMLNode* child = xml_data->getChildNodePtr("component", &i); 214 XMLCSTR type = child->getAttribute("type"); 215 216 if (!type) 217 warnMissingComponentType(child->getAttribute("id")); 218 219 STRCMP(type, "BranchTargetBuffer") { 220 int sub_num_children = child->nChildNode("param"); 221 int j; 222 for (j = 0; j < sub_num_children; j++) { 223 XMLNode* paramNode = child->getChildNodePtr("param", &j); 224 XMLCSTR node_name = paramNode->getAttribute("name"); 225 XMLCSTR value = paramNode->getAttribute("value"); 226 227 if (!node_name) 228 warnMissingParamName(paramNode->getAttribute("id")); 229 230 ASSIGN_INT_IF("size", inst_fetch_params.btb_size); 231 ASSIGN_INT_IF("block_size", inst_fetch_params.btb_block_size); 232 ASSIGN_INT_IF("assoc", inst_fetch_params.btb_assoc); 233 ASSIGN_INT_IF("num_banks", inst_fetch_params.btb_num_banks); 234 ASSIGN_INT_IF("latency", inst_fetch_params.btb_latency); 235 ASSIGN_INT_IF("throughput", inst_fetch_params.btb_throughput); 236 ASSIGN_INT_IF("rw_ports", inst_fetch_params.btb_rw_ports); 237 238 else { 239 warnUnrecognizedParam(node_name); 240 } 241 } 242 243 sub_num_children = child->nChildNode("stat"); 244 for (j = 0; j < sub_num_children; j++) { 245 XMLNode* statNode = child->getChildNodePtr("stat", &j); 246 XMLCSTR node_name = statNode->getAttribute("name"); 247 XMLCSTR value = statNode->getAttribute("value"); 248 249 if (!node_name) 250 warnMissingStatName(statNode->getAttribute("id")); 251 252 ASSIGN_FP_IF("read_accesses", 253 inst_fetch_stats.btb_read_accesses); 254 ASSIGN_FP_IF("write_accesses", 255 inst_fetch_stats.btb_write_accesses); 256 else { 257 warnUnrecognizedStat(node_name); 258 } 259 } 260 } 261 } 262 263 // Parameter sanity check 264 if (inst_fetch_params.btb_size <= 0) { 265 errorNonPositiveParam("size"); 266 } 267 268 if (inst_fetch_params.btb_block_size <= 0) { 269 errorNonPositiveParam("block_size"); 270 } 271 272 if (inst_fetch_params.btb_assoc <= 0) { 273 errorNonPositiveParam("assoc"); 274 } 275 276 if (inst_fetch_params.btb_num_banks <= 0) { 277 errorNonPositiveParam("num_banks"); 278 } 279} 280 281BranchPredictor::BranchPredictor(XMLNode* _xml_data, 282 InputParameter* interface_ip_, 283 const CoreParameters & _core_params, 284 const CoreStatistics & _core_stats, 285 bool exist_) 286 : McPATComponent(_xml_data), globalBPT(NULL), localBPT(NULL), 287 L1_localBPT(NULL), L2_localBPT(NULL), chooser(NULL), RAS(NULL), 288 interface_ip(*interface_ip_), 289 core_params(_core_params), core_stats(_core_stats), exist(exist_) { 290 if (!exist) return; 291 int tag; 292 int data; 293 int size; 294 295 clockRate = core_params.clockRate; 296 name = "Branch Predictor"; 297 298 // Common interface parameters for the branch predictor structures 299 interface_ip.pure_cam = false; 300 301 if (core_params.multithreaded) { 302 tag = int(log2(core_params.num_hthreads) + EXTRA_TAG_BITS); 303 interface_ip.specific_tag = tag > 0; 304 interface_ip.tag_w = tag; 305 interface_ip.is_cache = true; 306 interface_ip.pure_ram = false; 307 } else { 308 interface_ip.specific_tag = 0; 309 interface_ip.tag_w = 0; 310 interface_ip.is_cache = false; 311 interface_ip.pure_ram = true; 312 } 313 314 // Parse params and stats from XML 315 set_params_stats(); 316 317 // Common interface parameters for the branch predictor structures 318 interface_ip.assoc = branch_pred_params.assoc; 319 interface_ip.nbanks = branch_pred_params.nbanks; 320 321 //Global predictor 322 data = int(ceil(branch_pred_params.global_predictor_bits / BITS_PER_BYTE)); 323 size = data * branch_pred_params.global_predictor_entries; 324 325 interface_ip.cache_sz = size; 326 interface_ip.line_sz = data; 327 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 328 interface_ip.access_mode = Fast; 329 interface_ip.obj_func_dyn_energy = 0; 330 interface_ip.obj_func_dyn_power = 0; 331 interface_ip.obj_func_leak_power = 0; 332 interface_ip.obj_func_cycle_t = 1; 333 interface_ip.num_rw_ports = 0; 334 interface_ip.num_rd_ports = core_params.predictionW; 335 interface_ip.num_wr_ports = core_params.predictionW; 336 interface_ip.num_se_rd_ports = 0; 337 interface_ip.num_search_ports = 0; 338 interface_ip.throughput = 1.0 / clockRate; 339 interface_ip.latency = 1.0 / clockRate; 340 globalBPT = new ArrayST(xml_data, &interface_ip, "Global Predictor", 341 Core_device, clockRate, core_params.opt_local, 342 core_params.core_ty); 343 area.set_area(area.get_area() + globalBPT->local_result.area); 344 345 //Local BPT (Level 1) 346 data = int(ceil(branch_pred_params.local_l1_predictor_size / 347 BITS_PER_BYTE)); 348 size = data * branch_pred_params.local_predictor_entries; 349 350 interface_ip.cache_sz = size; 351 interface_ip.line_sz = data; 352 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 353 interface_ip.access_mode = Fast; 354 interface_ip.obj_func_dyn_energy = 0; 355 interface_ip.obj_func_dyn_power = 0; 356 interface_ip.obj_func_leak_power = 0; 357 interface_ip.obj_func_cycle_t = 1; 358 interface_ip.num_rw_ports = 0; 359 interface_ip.num_rd_ports = core_params.predictionW; 360 interface_ip.num_wr_ports = core_params.predictionW; 361 interface_ip.num_se_rd_ports = 0; 362 interface_ip.num_search_ports = 0; 363 interface_ip.throughput = 1.0 / clockRate; 364 interface_ip.latency = 1.0 / clockRate; 365 L1_localBPT = new ArrayST(xml_data, &interface_ip, 366 "Local Predictor, Level 1", 367 Core_device, clockRate, core_params.opt_local, 368 core_params.core_ty); 369 L1_localBPT->area.set_area(L1_localBPT->area.get_area() + 370 L1_localBPT->local_result.area); 371 area.set_area(area.get_area()+ L1_localBPT->local_result.area); 372 373 //Local BPT (Level 2) 374 data = int(ceil(branch_pred_params.local_l2_predictor_size / 375 BITS_PER_BYTE)); 376 size = data * branch_pred_params.local_predictor_entries; 377 378 interface_ip.cache_sz = size; 379 interface_ip.line_sz = data; 380 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 381 interface_ip.access_mode = Fast; 382 interface_ip.obj_func_dyn_energy = 0; 383 interface_ip.obj_func_dyn_power = 0; 384 interface_ip.obj_func_leak_power = 0; 385 interface_ip.obj_func_cycle_t = 1; 386 interface_ip.num_rw_ports = 0; 387 interface_ip.num_rd_ports = core_params.predictionW; 388 interface_ip.num_wr_ports = core_params.predictionW; 389 interface_ip.num_se_rd_ports = 0; 390 interface_ip.num_search_ports = 0; 391 interface_ip.throughput = 1.0 / clockRate; 392 interface_ip.latency = 1.0 / clockRate; 393 L2_localBPT = new ArrayST(xml_data, &interface_ip, 394 "Local Predictor, Level 2", 395 Core_device, clockRate, core_params.opt_local, 396 core_params.core_ty); 397 area.set_area(area.get_area() + L2_localBPT->local_result.area); 398 399 //Chooser 400 data = int(ceil(branch_pred_params.chooser_predictor_bits / 401 BITS_PER_BYTE)); 402 size = data * branch_pred_params.chooser_predictor_entries; 403 404 interface_ip.cache_sz = size; 405 interface_ip.line_sz = data; 406 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 407 interface_ip.access_mode = Fast; 408 interface_ip.obj_func_dyn_energy = 0; 409 interface_ip.obj_func_dyn_power = 0; 410 interface_ip.obj_func_leak_power = 0; 411 interface_ip.obj_func_cycle_t = 1; 412 interface_ip.num_rw_ports = 0; 413 interface_ip.num_rd_ports = core_params.predictionW; 414 interface_ip.num_wr_ports = core_params.predictionW; 415 interface_ip.num_se_rd_ports = 0; 416 interface_ip.num_search_ports = 0; 417 interface_ip.throughput = 1.0 / clockRate; 418 interface_ip.latency = 1.0 / clockRate; 419 chooser = new ArrayST(xml_data, &interface_ip, "Predictor Chooser", 420 Core_device, clockRate, core_params.opt_local, 421 core_params.core_ty); 422 area.set_area(area.get_area() + chooser->local_result.area); 423 424 //RAS return address stacks are Duplicated for each thread. 425 data = int(ceil(core_params.pc_width / BITS_PER_BYTE)); 426 size = data * core_params.RAS_size; 427 428 interface_ip.cache_sz = size; 429 interface_ip.line_sz = data; 430 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 431 interface_ip.access_mode = Fast; 432 interface_ip.obj_func_dyn_energy = 0; 433 interface_ip.obj_func_dyn_power = 0; 434 interface_ip.obj_func_leak_power = 0; 435 interface_ip.obj_func_cycle_t = 1; 436 interface_ip.num_rw_ports = 0; 437 interface_ip.num_rd_ports = core_params.predictionW; 438 interface_ip.num_wr_ports = core_params.predictionW; 439 interface_ip.num_se_rd_ports = 0; 440 interface_ip.num_search_ports = 0; 441 interface_ip.is_cache = false; 442 interface_ip.pure_ram = true; 443 interface_ip.throughput = 1.0 / clockRate; 444 interface_ip.latency = 1.0 / clockRate; 445 RAS = new ArrayST(xml_data, &interface_ip, "RAS", Core_device, clockRate, 446 core_params.opt_local, core_params.core_ty); 447 RAS->output_data.area *= core_params.num_hthreads; 448 area.set_area(area.get_area() + RAS->local_result.area * 449 core_params.num_hthreads); 450 451} 452 453void 454BranchPredictor::set_params_stats() { 455 int num_children = xml_data->nChildNode("component"); 456 int i; 457 for (i = 0; i < num_children; i++) { 458 XMLNode* child = xml_data->getChildNodePtr("component", &i); 459 XMLCSTR type = child->getAttribute("type"); 460 461 if (!type) 462 warnMissingComponentType(child->getAttribute("id")); 463 464 STRCMP(type, "BranchPredictor") { 465 int sub_num_children = child->nChildNode("param"); 466 int j; 467 for (j = 0; j < sub_num_children; j++) { 468 XMLNode* paramNode = child->getChildNodePtr("param", &j); 469 XMLCSTR node_name = paramNode->getAttribute("name"); 470 XMLCSTR value = paramNode->getAttribute("value"); 471 472 if (!node_name) 473 warnMissingParamName(paramNode->getAttribute("id")); 474 475 ASSIGN_INT_IF("assoc", branch_pred_params.assoc); 476 ASSIGN_INT_IF("nbanks", branch_pred_params.nbanks); 477 ASSIGN_INT_IF("local_l1_predictor_size", 478 branch_pred_params.local_l1_predictor_size); 479 ASSIGN_INT_IF("local_l2_predictor_size", 480 branch_pred_params.local_l2_predictor_size); 481 ASSIGN_INT_IF("local_predictor_entries", 482 branch_pred_params.local_predictor_entries); 483 ASSIGN_INT_IF("global_predictor_entries", 484 branch_pred_params.global_predictor_entries); 485 ASSIGN_INT_IF("global_predictor_bits", 486 branch_pred_params.global_predictor_bits); 487 ASSIGN_INT_IF("chooser_predictor_entries", 488 branch_pred_params.chooser_predictor_entries); 489 ASSIGN_INT_IF("chooser_predictor_bits", 490 branch_pred_params.chooser_predictor_bits); 491 492 else { 493 warnUnrecognizedParam(node_name); 494 } 495 } 496 // The core reads in the number of branches and the number of 497 // function calls and these values are passed through the 498 // core_stats variable, so we don't need to read them in here 499 } 500 } 501} 502 503SchedulerU::SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_, 504 const CoreParameters & _core_params, 505 const CoreStatistics & _core_stats, bool exist_) 506 : McPATComponent(_xml_data), int_inst_window(NULL), 507 fp_inst_window(NULL), ROB(NULL), int_instruction_selection(NULL), 508 fp_instruction_selection(NULL), 509 interface_ip(*interface_ip_), 510 core_params(_core_params), core_stats(_core_stats), exist(exist_) { 511 if (!exist) return; 512 int tag; 513 int data; 514 int size; 515 int line; 516 bool is_default = true; 517 string tmp_name; 518 519 clockRate = core_params.clockRate; 520 name = "Instruction Scheduler"; 521 if ((core_params.core_ty == Inorder && core_params.multithreaded)) { 522 //Instruction issue queue, in-order multi-issue or multithreaded 523 //processor also has this structure. Unified window for Inorder 524 //processors 525 //This tag width is the normal thread state bits based on 526 //Niagara Design 527 tag = int(log2(core_params.num_hthreads) * core_params.perThreadState); 528 data = core_params.instruction_length; 529 line = int(ceil(data / BITS_PER_BYTE)); 530 size = core_params.instruction_window_size * line; 531 if (size < MIN_BUFFER_SIZE) { 532 size = MIN_BUFFER_SIZE; 533 } 534 535 //NOTE: x86 inst can be very lengthy, up to 15B. 536 //Source: Intel® 64 and IA-32 Architectures 537 //Software Developer’s Manual 538 interface_ip.cache_sz = size; 539 interface_ip.line_sz = line; 540 interface_ip.assoc = core_params.scheduler_assoc; 541 interface_ip.nbanks = core_params.scheduler_nbanks; 542 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 543 interface_ip.specific_tag = tag > 0; 544 interface_ip.tag_w = tag; 545 interface_ip.access_mode = Sequential; 546 interface_ip.obj_func_dyn_energy = 0; 547 interface_ip.obj_func_dyn_power = 0; 548 interface_ip.obj_func_leak_power = 0; 549 interface_ip.obj_func_cycle_t = 1; 550 interface_ip.num_rw_ports = 0; 551 interface_ip.num_rd_ports = core_params.peak_issueW; 552 interface_ip.num_wr_ports = core_params.peak_issueW; 553 interface_ip.num_se_rd_ports = 0; 554 interface_ip.num_search_ports = core_params.peak_issueW; 555 interface_ip.is_cache = true; 556 interface_ip.pure_cam = false; 557 interface_ip.pure_ram = false; 558 interface_ip.throughput = 1.0 / clockRate; 559 interface_ip.latency = 1.0 / clockRate; 560 int_inst_window = new ArrayST(xml_data, &interface_ip, 561 "InstFetchQueue", Core_device, clockRate, 562 core_params.opt_local, 563 core_params.core_ty); 564 int_inst_window->output_data.area *= core_params.num_pipelines; 565 area.set_area(area.get_area() + int_inst_window->local_result.area * 566 core_params.num_pipelines); 567 Iw_height = int_inst_window->local_result.cache_ht; 568 569 /* 570 * selection logic 571 * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up 572 * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who 573 * at the issue stage. 574 */ 575 576 int_instruction_selection = 577 new selection_logic(xml_data, is_default, 578 core_params.instruction_window_size, 579 core_params.peak_issueW * 580 core_params.num_hthreads, 581 &interface_ip, 582 "Int Instruction Selection Logic", 583 core_stats.inst_window_wakeup_accesses, 584 clockRate, Core_device, core_params.core_ty); 585 586 if (core_params.fp_instruction_window_size > 0) { 587 fp_instruction_selection = 588 new selection_logic(xml_data, is_default, 589 core_params.fp_instruction_window_size, 590 core_params.fp_issueW * 591 core_params.num_hthreads, 592 &interface_ip, 593 "FP Instruction Selection Logic", 594 core_stats.fp_inst_window_wakeup_accesses, 595 clockRate, Core_device, 596 core_params.core_ty); 597 } 598 } 599 600 if (core_params.core_ty == OOO) { 601 /* 602 * CAM based instruction window 603 * For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored 604 * For RS based OOO it is the Reservation station, where both tags and values of phy regs are stored 605 * It is written once and read twice(two operands) before an instruction can be issued. 606 * X86 instruction can be very long up to 15B. add instruction length in XML 607 */ 608 if (core_params.scheu_ty == PhysicalRegFile) { 609 tag = core_params.phy_ireg_width; 610 data = int((ceil((core_params.instruction_length + 611 NUM_SOURCE_OPERANDS * 612 (core_params.phy_ireg_width - 613 core_params.arch_ireg_width)) / 614 (double)NUM_SOURCE_OPERANDS) / 615 BITS_PER_BYTE)); 616 tmp_name = "Integer Instruction Window"; 617 } else { 618 tag = core_params.phy_ireg_width; 619 data = int(ceil(((core_params.instruction_length + 620 NUM_SOURCE_OPERANDS * 621 (core_params.phy_ireg_width - 622 core_params.arch_ireg_width) + 623 2 * core_params.int_data_width) / 624 (double)NUM_SOURCE_OPERANDS) / 625 BITS_PER_BYTE)); 626 tmp_name = "Integer Reservation Station"; 627 } 628 629 size = data * core_params.instruction_window_size; 630 631 interface_ip.cache_sz = size; 632 interface_ip.line_sz = data; 633 interface_ip.assoc = core_params.scheduler_assoc; 634 interface_ip.nbanks = core_params.scheduler_nbanks; 635 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 636 interface_ip.specific_tag = tag > 0; 637 interface_ip.tag_w = tag; 638 interface_ip.access_mode = Normal; 639 interface_ip.obj_func_dyn_energy = 0; 640 interface_ip.obj_func_dyn_power = 0; 641 interface_ip.obj_func_leak_power = 0; 642 interface_ip.obj_func_cycle_t = 1; 643 interface_ip.num_rw_ports = 0; 644 interface_ip.num_rd_ports = core_params.peak_issueW; 645 interface_ip.num_wr_ports = core_params.peak_issueW; 646 interface_ip.num_se_rd_ports = 0; 647 interface_ip.num_search_ports = core_params.peak_issueW; 648 interface_ip.is_cache = true; 649 interface_ip.pure_cam = false; 650 interface_ip.pure_ram = false; 651 interface_ip.throughput = NUM_SOURCE_OPERANDS * 1.0 / clockRate; 652 interface_ip.latency = NUM_SOURCE_OPERANDS * 1.0 / clockRate; 653 int_inst_window = new ArrayST(xml_data, &interface_ip, tmp_name, 654 Core_device, clockRate, 655 core_params.opt_local, 656 core_params.core_ty); 657 int_inst_window->output_data.area *= core_params.num_pipelines; 658 area.set_area(area.get_area() + int_inst_window->local_result.area * 659 core_params.num_pipelines); 660 Iw_height = int_inst_window->local_result.cache_ht; 661 662 //FU inst window 663 if (core_params.scheu_ty == PhysicalRegFile) { 664 tag = NUM_SOURCE_OPERANDS * core_params.phy_freg_width; 665 data = int(ceil((core_params.instruction_length + 666 NUM_SOURCE_OPERANDS * 667 (core_params.phy_freg_width - 668 core_params.arch_freg_width)) / BITS_PER_BYTE)); 669 tmp_name = "FP Instruction Window"; 670 } else { 671 tag = NUM_SOURCE_OPERANDS * core_params.phy_ireg_width; 672 data = int(ceil((core_params.instruction_length + 673 NUM_SOURCE_OPERANDS * 674 (core_params.phy_freg_width - 675 core_params.arch_freg_width) + 676 NUM_SOURCE_OPERANDS * core_params.fp_data_width) / 677 BITS_PER_BYTE)); 678 tmp_name = "FP Reservation Station"; 679 } 680 681 size = data * core_params.fp_instruction_window_size; 682 683 interface_ip.cache_sz = size; 684 interface_ip.line_sz = data; 685 interface_ip.assoc = core_params.scheduler_assoc; 686 interface_ip.nbanks = core_params.scheduler_nbanks; 687 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 688 interface_ip.specific_tag = tag > 0; 689 interface_ip.tag_w = tag; 690 interface_ip.access_mode = Normal; 691 interface_ip.obj_func_dyn_energy = 0; 692 interface_ip.obj_func_dyn_power = 0; 693 interface_ip.obj_func_leak_power = 0; 694 interface_ip.obj_func_cycle_t = 1; 695 interface_ip.num_rw_ports = 0; 696 interface_ip.num_rd_ports = core_params.fp_issueW; 697 interface_ip.num_wr_ports = core_params.fp_issueW; 698 interface_ip.num_se_rd_ports = 0; 699 interface_ip.num_search_ports = core_params.fp_issueW; 700 interface_ip.is_cache = true; 701 interface_ip.pure_cam = false; 702 interface_ip.pure_ram = false; 703 interface_ip.throughput = 1.0 / clockRate; 704 interface_ip.latency = 1.0 / clockRate; 705 fp_inst_window = 706 new ArrayST(xml_data, &interface_ip, tmp_name, Core_device, 707 clockRate, core_params.opt_local, core_params.core_ty); 708 fp_inst_window->output_data.area *= core_params.num_fp_pipelines; 709 area.set_area(area.get_area() + fp_inst_window->local_result.area 710 *core_params.num_fp_pipelines); 711 fp_Iw_height = fp_inst_window->local_result.cache_ht; 712 713 if (core_params.ROB_size > 0) { 714 /* 715 * if ROB_size = 0, then the target processor does not support hardware-based 716 * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which 717 * means branch must be resolved before instruction issued into instruction window, since 718 * there is no change to flush miss-predict branch path after instructions are issued in this situation. 719 * 720 * ROB.ROB size = inflight inst. ROB is unified for int and fp inst. 721 * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7. 722 * However, this approach is abandoned due to its high power and poor scalablility. 723 * McPAT uses current implementation of ROB as circular buffer. 724 * ROB is written once when instruction is issued and read once when the instruction is committed. * 725 */ 726 int robExtra = int(ceil(ROB_STATUS_BITS + 727 log2(core_params.num_hthreads))); 728 729 if (core_params.scheu_ty == PhysicalRegFile) { 730 //PC is to id the instruction for recover exception. 731 //inst is used to map the renamed dest. registers. so that 732 //commit stage can know which reg/RRAT to update 733 data = int(ceil((robExtra + core_params.pc_width + 734 core_params.phy_ireg_width) / BITS_PER_BYTE)); 735 } else { 736 //in RS based OOO, ROB also contains value of destination reg 737 data = int(ceil((robExtra + core_params.pc_width + 738 core_params.phy_ireg_width + 739 core_params.fp_data_width) / BITS_PER_BYTE)); 740 } 741 742 interface_ip.cache_sz = data * core_params.ROB_size; 743 interface_ip.line_sz = data; 744 interface_ip.assoc = core_params.ROB_assoc; 745 interface_ip.nbanks = core_params.ROB_nbanks; 746 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 747 interface_ip.specific_tag = core_params.ROB_tag_width > 0; 748 interface_ip.tag_w = core_params.ROB_tag_width; 749 interface_ip.access_mode = Sequential; 750 interface_ip.obj_func_dyn_energy = 0; 751 interface_ip.obj_func_dyn_power = 0; 752 interface_ip.obj_func_leak_power = 0; 753 interface_ip.obj_func_cycle_t = 1; 754 interface_ip.num_rw_ports = 0; 755 interface_ip.num_rd_ports = core_params.peak_commitW; 756 interface_ip.num_wr_ports = core_params.peak_issueW; 757 interface_ip.num_se_rd_ports = 0; 758 interface_ip.num_search_ports = 0; 759 interface_ip.is_cache = false; 760 interface_ip.pure_cam = false; 761 interface_ip.pure_ram = true; 762 interface_ip.throughput = 1.0 / clockRate; 763 interface_ip.latency = 1.0 / clockRate; 764 ROB = new ArrayST(xml_data, &interface_ip, "Reorder Buffer", 765 Core_device, clockRate, core_params.opt_local, 766 core_params.core_ty); 767 ROB->output_data.area *= core_params.num_pipelines; 768 area.set_area(area.get_area() + ROB->local_result.area * 769 core_params.num_pipelines); 770 ROB_height = ROB->local_result.cache_ht; 771 } 772 773 int_instruction_selection = 774 new selection_logic(xml_data, is_default, 775 core_params.instruction_window_size, 776 core_params.peak_issueW, &interface_ip, 777 "Int Instruction Selection Logic", 778 core_stats.inst_window_wakeup_accesses, 779 clockRate, Core_device, core_params.core_ty); 780 781 if (core_params.fp_instruction_window_size > 0) { 782 fp_instruction_selection = 783 new selection_logic(xml_data, is_default, 784 core_params.fp_instruction_window_size, 785 core_params.fp_issueW, &interface_ip, 786 "FP Instruction Selection Logic", 787 core_stats.fp_inst_window_wakeup_accesses, 788 clockRate, Core_device, 789 core_params.core_ty); 790 } 791 792 } 793} 794 795LoadStoreU::LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_, 796 const CoreParameters & _core_params, 797 const CoreStatistics & _core_stats, bool exist_) 798 : McPATComponent(_xml_data), dcache(NULL), LSQ(NULL), LoadQ(NULL), 799 interface_ip(*interface_ip_), 800 core_params(_core_params), core_stats(_core_stats), exist(exist_) { 801 if (!exist) return; 802 int tag; 803 int line; 804 int size; 805 int ldst_opcode = core_params.opcode_width; 806 807 clockRate = core_params.clockRate; 808 name = "Load/Store Unit"; 809 810 // Check if there is a dcache child: 811 int i; 812 dcache = NULL; 813 for( i = 0; i < xml_data->nChildNode("component"); i++ ) { 814 XMLNode* childXML = xml_data->getChildNodePtr("component", &i); 815 XMLCSTR type = childXML->getAttribute("type"); 816 817 if (!type) 818 warnMissingComponentType(childXML->getAttribute("id")); 819 820 STRCMP(type, "CacheUnit") { 821 XMLCSTR name = childXML->getAttribute("name"); 822 if (strcmp(name, "Data Cache") == 0 || 823 strcmp(name, "dcache") == 0) { 824 dcache = new CacheUnit(childXML, &interface_ip); 825 children.push_back(dcache); 826 } 827 } 828 } 829 830 /* 831 * LSU--in-order processors do not have separate load queue: unified lsq 832 * partitioned among threads 833 * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ 834 */ 835 tag = ldst_opcode + virtual_address_width + 836 int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; 837 line = int(ceil(data_path_width / BITS_PER_BYTE)); 838 size = core_params.store_buffer_size * line * core_params.num_hthreads; 839 840 interface_ip.cache_sz = size; 841 interface_ip.line_sz = line; 842 interface_ip.assoc = core_params.store_buffer_assoc; 843 interface_ip.nbanks = core_params.store_buffer_nbanks; 844 interface_ip.out_w = line * BITS_PER_BYTE; 845 interface_ip.specific_tag = tag > 0; 846 interface_ip.tag_w = tag; 847 interface_ip.access_mode = Sequential; 848 interface_ip.obj_func_dyn_energy = 0; 849 interface_ip.obj_func_dyn_power = 0; 850 interface_ip.obj_func_leak_power = 0; 851 interface_ip.obj_func_cycle_t = 1; 852 interface_ip.num_rw_ports = 0; 853 interface_ip.num_rd_ports = core_params.memory_ports; 854 interface_ip.num_wr_ports = core_params.memory_ports; 855 interface_ip.num_se_rd_ports = 0; 856 interface_ip.num_search_ports = core_params.memory_ports; 857 interface_ip.is_cache = true; 858 interface_ip.pure_ram = false; 859 interface_ip.pure_cam = false; 860 interface_ip.throughput = 1.0 / clockRate; 861 interface_ip.latency = 1.0 / clockRate; 862 LSQ = new ArrayST(xml_data, &interface_ip, "Store Queue", Core_device, 863 clockRate, core_params.opt_local, core_params.core_ty); 864 area.set_area(area.get_area() + LSQ->local_result.area); 865 area.set_area(area.get_area()*cdb_overhead); 866 lsq_height = LSQ->local_result.cache_ht * sqrt(cdb_overhead); 867 868 if ((core_params.core_ty == OOO) && (core_params.load_buffer_size > 0)) { 869 tag = ldst_opcode + virtual_address_width + 870 int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; 871 line = int(ceil(data_path_width / BITS_PER_BYTE)); 872 size = core_params.load_buffer_size * line * core_params.num_hthreads; 873 874 interface_ip.cache_sz = size; 875 interface_ip.line_sz = line; 876 interface_ip.assoc = core_params.load_buffer_assoc; 877 interface_ip.nbanks = core_params.load_buffer_nbanks; 878 interface_ip.out_w = line * BITS_PER_BYTE; 879 interface_ip.specific_tag = tag > 0; 880 interface_ip.tag_w = tag; 881 interface_ip.access_mode = Sequential; 882 interface_ip.obj_func_dyn_energy = 0; 883 interface_ip.obj_func_dyn_power = 0; 884 interface_ip.obj_func_leak_power = 0; 885 interface_ip.obj_func_cycle_t = 1; 886 interface_ip.num_rw_ports = 0; 887 interface_ip.num_rd_ports = core_params.memory_ports; 888 interface_ip.num_wr_ports = core_params.memory_ports; 889 interface_ip.num_se_rd_ports = 0; 890 interface_ip.num_search_ports = core_params.memory_ports; 891 interface_ip.is_cache = true; 892 interface_ip.pure_ram = false; 893 interface_ip.pure_cam = false; 894 interface_ip.throughput = 1.0 / clockRate; 895 interface_ip.latency = 1.0 / clockRate; 896 LoadQ = new ArrayST(xml_data, &interface_ip, "Load Queue", Core_device, 897 clockRate, core_params.opt_local, 898 core_params.core_ty); 899 LoadQ->area.set_area(LoadQ->area.get_area() + 900 LoadQ->local_result.area); 901 area.set_area(area.get_area()*cdb_overhead); 902 lsq_height = (LSQ->local_result.cache_ht + 903 LoadQ->local_result.cache_ht) * sqrt(cdb_overhead); 904 } 905 906} 907 908MemManU::MemManU(XMLNode* _xml_data, InputParameter* interface_ip_, 909 const CoreParameters & _core_params, 910 const CoreStatistics & _core_stats, bool exist_) 911 : McPATComponent(_xml_data), itlb(NULL), dtlb(NULL), 912 interface_ip(*interface_ip_), 913 core_params(_core_params), core_stats(_core_stats), exist(exist_) { 914 if (!exist) return; 915 int tag; 916 int data; 917 int line; 918 919 clockRate = core_params.clockRate; 920 name = "Memory Management Unit"; 921 922 set_params_stats(); 923 924 // These are shared between ITLB and DTLB 925 interface_ip.is_cache = true; 926 interface_ip.pure_cam = false; 927 interface_ip.pure_ram = false; 928 //Itlb TLBs are partioned among threads according to Nigara and Nehalem 929 tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) + 930 int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; 931 data = physical_address_width - int(floor(log2(virtual_memory_page_size))); 932 line = int(ceil(data / BITS_PER_BYTE)); 933 934 interface_ip.cache_sz = mem_man_params.itlb_number_entries * line; 935 interface_ip.line_sz = line; 936 interface_ip.assoc = mem_man_params.itlb_assoc; 937 interface_ip.nbanks = mem_man_params.itlb_nbanks; 938 interface_ip.out_w = line * BITS_PER_BYTE; 939 interface_ip.specific_tag = tag > 0; 940 interface_ip.tag_w = tag; 941 interface_ip.access_mode = Normal; 942 interface_ip.obj_func_dyn_energy = 0; 943 interface_ip.obj_func_dyn_power = 0; 944 interface_ip.obj_func_leak_power = 0; 945 interface_ip.obj_func_cycle_t = 1; 946 interface_ip.num_rw_ports = core_params.number_instruction_fetch_ports; 947 interface_ip.num_rd_ports = 0; 948 interface_ip.num_wr_ports = 0; 949 interface_ip.num_se_rd_ports = 0; 950 interface_ip.num_search_ports = core_params.number_instruction_fetch_ports; 951 interface_ip.throughput = mem_man_params.itlb_throughput / clockRate; 952 interface_ip.latency = mem_man_params.itlb_latency / clockRate; 953 itlb = new ArrayST(xml_data, &interface_ip, "Instruction TLB", Core_device, 954 clockRate, core_params.opt_local, core_params.core_ty); 955 area.set_area(area.get_area() + itlb->local_result.area); 956 957 //dtlb 958 tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) + 959 int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; 960 data = physical_address_width - int(floor(log2(virtual_memory_page_size))); 961 line = int(ceil(data / BITS_PER_BYTE)); 962 963 interface_ip.cache_sz = mem_man_params.dtlb_number_entries * line; 964 interface_ip.line_sz = line; 965 interface_ip.assoc = mem_man_params.dtlb_assoc; 966 interface_ip.nbanks = mem_man_params.dtlb_nbanks; 967 interface_ip.out_w = line * BITS_PER_BYTE; 968 interface_ip.specific_tag = tag > 0; 969 interface_ip.tag_w = tag; 970 interface_ip.access_mode = Normal; 971 interface_ip.obj_func_dyn_energy = 0; 972 interface_ip.obj_func_dyn_power = 0; 973 interface_ip.obj_func_leak_power = 0; 974 interface_ip.obj_func_cycle_t = 1; 975 interface_ip.num_rw_ports = 0; 976 interface_ip.num_rd_ports = core_params.memory_ports; 977 interface_ip.num_wr_ports = core_params.memory_ports; 978 interface_ip.num_se_rd_ports = 0; 979 interface_ip.num_search_ports = core_params.memory_ports; 980 interface_ip.throughput = mem_man_params.dtlb_throughput / clockRate; 981 interface_ip.latency = mem_man_params.dtlb_latency / clockRate; 982 dtlb = new ArrayST(xml_data, &interface_ip, "Data TLB", Core_device, 983 clockRate, core_params.opt_local, core_params.core_ty); 984 area.set_area(area.get_area() + dtlb->local_result.area); 985 986} 987 988void 989MemManU::set_params_stats() { 990 memset(&mem_man_params, 0, sizeof(MemoryManagementParams)); 991 memset(&mem_man_stats, 0, sizeof(MemoryManagementStats)); 992 int num_children = xml_data->nChildNode("component"); 993 int i; 994 for (i = 0; i < num_children; i++) { 995 XMLNode* child = xml_data->getChildNodePtr("component", &i); 996 XMLCSTR type = child->getAttribute("type"); 997 998 if (!type) 999 warnMissingComponentType(child->getAttribute("id")); 1000 1001 STRCMP(type, "InstructionTLB") { 1002 int sub_num_children = child->nChildNode("param"); 1003 int j; 1004 for (j = 0; j < sub_num_children; j++) { 1005 XMLNode* paramNode = child->getChildNodePtr("param", &j); 1006 XMLCSTR node_name = paramNode->getAttribute("name"); 1007 XMLCSTR value = paramNode->getAttribute("value"); 1008 1009 if (!node_name) 1010 warnMissingParamName(paramNode->getAttribute("id")); 1011 1012 ASSIGN_INT_IF("number_entries", 1013 mem_man_params.itlb_number_entries); 1014 ASSIGN_FP_IF("latency", mem_man_params.itlb_latency); 1015 ASSIGN_FP_IF("throughput", mem_man_params.itlb_throughput); 1016 ASSIGN_FP_IF("assoc", mem_man_params.itlb_assoc); 1017 ASSIGN_FP_IF("nbanks", mem_man_params.itlb_nbanks); 1018 1019 else { 1020 warnUnrecognizedParam(node_name); 1021 } 1022 } 1023 sub_num_children = child->nChildNode("stat"); 1024 for (j = 0; j < sub_num_children; j++) { 1025 XMLNode* statNode = child->getChildNodePtr("stat", &j); 1026 XMLCSTR node_name = statNode->getAttribute("name"); 1027 XMLCSTR value = statNode->getAttribute("value"); 1028 1029 if (!node_name) 1030 warnMissingStatName(statNode->getAttribute("id")); 1031 1032 ASSIGN_FP_IF("total_accesses", 1033 mem_man_stats.itlb_total_accesses); 1034 ASSIGN_FP_IF("total_misses", mem_man_stats.itlb_total_misses); 1035 ASSIGN_FP_IF("conflicts", mem_man_stats.itlb_conflicts); 1036 1037 else { 1038 warnUnrecognizedStat(node_name); 1039 } 1040 } 1041 } STRCMP(type, "DataTLB") { 1042 int sub_num_children = child->nChildNode("param"); 1043 int j; 1044 for (j = 0; j < sub_num_children; j++) { 1045 XMLNode* paramNode = child->getChildNodePtr("param", &j); 1046 XMLCSTR node_name = paramNode->getAttribute("name"); 1047 XMLCSTR value = paramNode->getAttribute("value"); 1048 1049 if (!node_name) 1050 warnMissingParamName(paramNode->getAttribute("id")); 1051 1052 ASSIGN_INT_IF("number_entries", 1053 mem_man_params.dtlb_number_entries); 1054 ASSIGN_FP_IF("latency", mem_man_params.dtlb_latency); 1055 ASSIGN_FP_IF("throughput", mem_man_params.dtlb_throughput); 1056 ASSIGN_FP_IF("assoc", mem_man_params.dtlb_assoc); 1057 ASSIGN_FP_IF("nbanks", mem_man_params.dtlb_nbanks); 1058 1059 else { 1060 warnUnrecognizedParam(node_name); 1061 } 1062 } 1063 sub_num_children = child->nChildNode("stat"); 1064 for (j = 0; j < sub_num_children; j++) { 1065 XMLNode* statNode = child->getChildNodePtr("stat", &j); 1066 XMLCSTR node_name = statNode->getAttribute("name"); 1067 XMLCSTR value = statNode->getAttribute("value"); 1068 1069 if (!node_name) 1070 warnMissingStatName(statNode->getAttribute("id")); 1071 1072 ASSIGN_FP_IF("read_accesses", 1073 mem_man_stats.dtlb_read_accesses); 1074 ASSIGN_FP_IF("read_misses", mem_man_stats.dtlb_read_misses); 1075 ASSIGN_FP_IF("write_accesses", 1076 mem_man_stats.dtlb_write_accesses); 1077 ASSIGN_FP_IF("write_misses", mem_man_stats.dtlb_write_misses); 1078 ASSIGN_FP_IF("conflicts", mem_man_stats.dtlb_conflicts); 1079 1080 else { 1081 warnUnrecognizedStat(node_name); 1082 } 1083 } 1084 } 1085 } 1086} 1087 1088RegFU::RegFU(XMLNode* _xml_data, InputParameter* interface_ip_, 1089 const CoreParameters & _core_params, 1090 const CoreStatistics & _core_stats, bool exist_) 1091 : McPATComponent(_xml_data), IRF(NULL), FRF(NULL), RFWIN(NULL), 1092 interface_ip(*interface_ip_), 1093 core_params(_core_params), core_stats(_core_stats), exist(exist_) { 1094 /* 1095 * processors have separate architectural register files for each thread. 1096 * therefore, the bypass buses need to travel across all the register files. 1097 */ 1098 if (!exist) return; 1099 int data; 1100 int line; 1101 1102 clockRate = core_params.clockRate; 1103 name = "Register File Unit"; 1104 1105 //**********************************IRF************************************ 1106 data = core_params.int_data_width; 1107 line = int(ceil(data / BITS_PER_BYTE)); 1108 1109 interface_ip.cache_sz = core_params.num_IRF_entry * line; 1110 interface_ip.line_sz = line; 1111 interface_ip.assoc = core_params.phy_Regs_IRF_assoc; 1112 interface_ip.nbanks = core_params.phy_Regs_IRF_nbanks; 1113 interface_ip.out_w = line * BITS_PER_BYTE; 1114 interface_ip.specific_tag = core_params.phy_Regs_IRF_tag_width > 0; 1115 interface_ip.tag_w = core_params.phy_Regs_IRF_tag_width; 1116 interface_ip.access_mode = Sequential; 1117 interface_ip.obj_func_dyn_energy = 0; 1118 interface_ip.obj_func_dyn_power = 0; 1119 interface_ip.obj_func_leak_power = 0; 1120 interface_ip.obj_func_cycle_t = 1; 1121 interface_ip.num_rw_ports = 0; 1122 interface_ip.num_rd_ports = core_params.phy_Regs_IRF_rd_ports; 1123 interface_ip.num_wr_ports = core_params.phy_Regs_IRF_wr_ports; 1124 interface_ip.num_se_rd_ports = 0; 1125 interface_ip.num_search_ports = 0; 1126 interface_ip.is_cache = false; 1127 interface_ip.pure_cam = false; 1128 interface_ip.pure_ram = true; 1129 interface_ip.throughput = 1.0 / clockRate; 1130 interface_ip.latency = 1.0 / clockRate; 1131 IRF = new ArrayST(xml_data, &interface_ip, "Integer Register File", 1132 Core_device, clockRate, core_params.opt_local, 1133 core_params.core_ty); 1134 IRF->output_data.area *= core_params.num_hthreads * 1135 core_params.num_pipelines * cdb_overhead; 1136 area.set_area(area.get_area() + IRF->local_result.area * 1137 core_params.num_hthreads * core_params.num_pipelines * 1138 cdb_overhead); 1139 1140 //**********************************FRF************************************ 1141 data = core_params.fp_data_width; 1142 line = int(ceil(data / BITS_PER_BYTE)); 1143 1144 interface_ip.cache_sz = core_params.num_FRF_entry * line; 1145 interface_ip.line_sz = line; 1146 interface_ip.assoc = core_params.phy_Regs_FRF_assoc; 1147 interface_ip.nbanks = core_params.phy_Regs_FRF_nbanks; 1148 interface_ip.out_w = line * BITS_PER_BYTE; 1149 interface_ip.specific_tag = core_params.phy_Regs_FRF_tag_width > 0; 1150 interface_ip.tag_w = core_params.phy_Regs_FRF_tag_width; 1151 interface_ip.access_mode = Sequential; 1152 interface_ip.obj_func_dyn_energy = 0; 1153 interface_ip.obj_func_dyn_power = 0; 1154 interface_ip.obj_func_leak_power = 0; 1155 interface_ip.obj_func_cycle_t = 1; 1156 interface_ip.num_rw_ports = 0; 1157 interface_ip.num_rd_ports = core_params.phy_Regs_FRF_rd_ports; 1158 interface_ip.num_wr_ports = core_params.phy_Regs_FRF_wr_ports; 1159 interface_ip.num_se_rd_ports = 0; 1160 interface_ip.num_search_ports = 0; 1161 interface_ip.is_cache = false; 1162 interface_ip.pure_cam = false; 1163 interface_ip.pure_ram = true; 1164 interface_ip.throughput = 1.0 / clockRate; 1165 interface_ip.latency = 1.0 / clockRate; 1166 FRF = new ArrayST(xml_data, &interface_ip, "FP Register File", Core_device, 1167 clockRate, core_params.opt_local, core_params.core_ty); 1168 FRF->output_data.area *= core_params.num_hthreads * 1169 core_params.num_fp_pipelines * cdb_overhead; 1170 area.set_area(area.get_area() + FRF->local_result.area * 1171 core_params.num_hthreads * core_params.num_fp_pipelines * 1172 cdb_overhead); 1173 int_regfile_height = IRF->local_result.cache_ht * 1174 core_params.num_hthreads * sqrt(cdb_overhead); 1175 fp_regfile_height = FRF->local_result.cache_ht * core_params.num_hthreads * 1176 sqrt(cdb_overhead); 1177 //since a EXU is associated with each pipeline, the cdb should not have 1178 //longer length. 1179 1180 if (core_params.regWindowing) { 1181 //*********************************REG_WIN***************************** 1182 //ECC, and usually 2 regs are transfered together during window 1183 //shifting.Niagara Mega cell 1184 data = core_params.int_data_width; 1185 line = int(ceil(data / BITS_PER_BYTE)); 1186 1187 interface_ip.cache_sz = core_params.register_window_size * 1188 IRF->l_ip.cache_sz * core_params.num_hthreads; 1189 interface_ip.line_sz = line; 1190 interface_ip.assoc = core_params.register_window_assoc; 1191 interface_ip.nbanks = core_params.register_window_nbanks; 1192 interface_ip.out_w = line * BITS_PER_BYTE; 1193 interface_ip.specific_tag = core_params.register_window_tag_width > 0; 1194 interface_ip.tag_w = core_params.register_window_tag_width; 1195 interface_ip.access_mode = Sequential; 1196 interface_ip.obj_func_dyn_energy = 0; 1197 interface_ip.obj_func_dyn_power = 0; 1198 interface_ip.obj_func_leak_power = 0; 1199 interface_ip.obj_func_cycle_t = 1; 1200 interface_ip.num_rw_ports = core_params.register_window_rw_ports; 1201 interface_ip.num_rd_ports = 0; 1202 interface_ip.num_wr_ports = 0; 1203 interface_ip.num_se_rd_ports = 0; 1204 interface_ip.num_search_ports = 0; 1205 interface_ip.is_cache = false; 1206 interface_ip.pure_cam = false; 1207 interface_ip.pure_ram = true; 1208 interface_ip.throughput = 1209 core_params.register_window_throughput / clockRate; 1210 interface_ip.latency = 1211 core_params.register_window_latency / clockRate; 1212 RFWIN = new ArrayST(xml_data, &interface_ip, "RegWindow", Core_device, 1213 clockRate, core_params.opt_local, 1214 core_params.core_ty); 1215 RFWIN->output_data.area *= core_params.num_pipelines; 1216 area.set_area(area.get_area() + RFWIN->local_result.area * 1217 core_params.num_pipelines); 1218 } 1219} 1220 1221EXECU::EXECU(XMLNode* _xml_data, 1222 InputParameter* interface_ip_, double lsq_height_, 1223 const CoreParameters & _core_params, 1224 const CoreStatistics & _core_stats, bool exist_) 1225 : McPATComponent(_xml_data), rfu(NULL), scheu(NULL), fp_u(NULL), 1226 exeu(NULL), mul(NULL), int_bypass(NULL), intTagBypass(NULL), 1227 int_mul_bypass(NULL), intTag_mul_Bypass(NULL), fp_bypass(NULL), 1228 fpTagBypass(NULL), interface_ip(*interface_ip_), 1229 lsq_height(lsq_height_), core_params(_core_params), 1230 core_stats(_core_stats), exist(exist_) { 1231 if (!exist) return; 1232 double fu_height = 0.0; 1233 clockRate = core_params.clockRate; 1234 name = "Execution Unit"; 1235 rfu = new RegFU(xml_data, &interface_ip, core_params, core_stats); 1236 if (core_params.core_ty == OOO || 1237 (core_params.core_ty == Inorder && core_params.multithreaded)) { 1238 scheu = new SchedulerU(xml_data, &interface_ip, core_params, 1239 core_stats); 1240 area.set_area(area.get_area() + scheu->area.get_area() ); 1241 } 1242 exeu = new FunctionalUnit(xml_data, &interface_ip, core_params, 1243 core_stats, ALU); 1244 area.set_area(area.get_area() + exeu->area.get_area() + 1245 rfu->area.get_area()); 1246 fu_height = exeu->FU_height; 1247 if (core_params.num_fpus > 0) { 1248 fp_u = new FunctionalUnit(xml_data, &interface_ip, 1249 core_params, core_stats, FPU); 1250 area.set_area(area.get_area() + fp_u->area.get_area()); 1251 } 1252 if (core_params.num_muls > 0) { 1253 mul = new FunctionalUnit(xml_data, &interface_ip, 1254 core_params, core_stats, MUL); 1255 area.set_area(area.get_area() + mul->area.get_area()); 1256 fu_height += mul->FU_height; 1257 } 1258 /* 1259 * broadcast logic, including int-broadcast; int_tag-broadcast; 1260 * fp-broadcast; fp_tag-broadcast 1261 * integer by pass has two paths and fp has 3 paths. 1262 * on the same bus there are multiple tri-state drivers and muxes that go 1263 * to different components on the same bus 1264 */ 1265 interface_ip.wt = core_params.execu_broadcast_wt; 1266 interface_ip.wire_is_mat_type = core_params.execu_wire_mat_type; 1267 interface_ip.wire_os_mat_type = core_params.execu_wire_mat_type; 1268 interface_ip.throughput = core_params.broadcast_numerator / clockRate; 1269 interface_ip.latency = core_params.broadcast_numerator / clockRate; 1270 double scheu_Iw_height = 0.0; 1271 double scheu_ROB_height = 0.0; 1272 double scheu_fp_Iw_height = 0.0; 1273 if (scheu) { 1274 scheu_Iw_height = scheu->Iw_height; 1275 scheu_ROB_height = scheu->ROB_height; 1276 scheu_fp_Iw_height = scheu->fp_Iw_height; 1277 } 1278 1279 // Common bypass logic parameters 1280 double base_w = core_params.execu_bypass_base_width; 1281 double base_h = core_params.execu_bypass_base_height; 1282 int level = core_params.execu_bypass_start_wiring_level; 1283 double route_over_perc = core_params.execu_bypass_route_over_perc; 1284 Wire_type wire_type = core_params.execu_bypass_wire_type; 1285 int data_w; 1286 double len; 1287 1288 if (core_params.core_ty == Inorder) { 1289 data_w = int(ceil(data_path_width / 32.0)*32); 1290 len = rfu->int_regfile_height + exeu->FU_height + lsq_height; 1291 int_bypass = new Interconnect(xml_data, "Int Bypass Data", Core_device, 1292 base_w, base_h, data_w, len, 1293 &interface_ip, level, clockRate, false, 1294 route_over_perc, core_params.opt_local, 1295 core_params.core_ty, wire_type); 1296 1297 data_w = core_params.perThreadState; 1298 len = rfu->int_regfile_height + exeu->FU_height + lsq_height + 1299 scheu_Iw_height; 1300 intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", 1301 Core_device, 1302 base_w, base_h, data_w, len, 1303 &interface_ip, level, clockRate, false, 1304 route_over_perc, core_params.opt_local, 1305 core_params.core_ty, wire_type); 1306 1307 if (core_params.num_muls > 0) { 1308 data_w = int(ceil(data_path_width / 32.0)*32*1.5); 1309 len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + 1310 lsq_height; 1311 int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", 1312 Core_device, base_w, base_h, 1313 data_w, len, &interface_ip, 1314 level, clockRate, false, 1315 route_over_perc, 1316 core_params.opt_local, 1317 core_params.core_ty, wire_type); 1318 1319 data_w = core_params.perThreadState; 1320 len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + 1321 lsq_height + scheu_Iw_height; 1322 intTag_mul_Bypass = new Interconnect(xml_data, "Mul Bypass Tag", 1323 Core_device, base_w, base_h, 1324 data_w, len, &interface_ip, 1325 level, clockRate, false, 1326 route_over_perc, 1327 core_params.opt_local, 1328 core_params.core_ty, 1329 wire_type); 1330 } 1331 1332 if (core_params.num_fpus > 0) { 1333 data_w = int(ceil(data_path_width / 32.0)*32*1.5); 1334 len = rfu->fp_regfile_height + fp_u->FU_height; 1335 fp_bypass = new Interconnect(xml_data, "FP Bypass Data", 1336 Core_device, 1337 base_w, base_h, data_w, len, 1338 &interface_ip, level, clockRate, 1339 false, route_over_perc, 1340 core_params.opt_local, 1341 core_params.core_ty, wire_type); 1342 1343 data_w = core_params.perThreadState; 1344 len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + 1345 scheu_Iw_height; 1346 fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", 1347 Core_device, base_w, base_h, data_w, 1348 len, &interface_ip, level, 1349 clockRate, false, route_over_perc, 1350 core_params.opt_local, 1351 core_params.core_ty, wire_type); 1352 } 1353 } else {//OOO 1354 if (core_params.scheu_ty == PhysicalRegFile) { 1355 /* For physical register based OOO, 1356 * data broadcast interconnects cover across functional units, lsq, 1357 * inst windows and register files, 1358 * while tag broadcast interconnects also cover across ROB 1359 */ 1360 data_w = int(ceil(core_params.int_data_width)); 1361 len = rfu->int_regfile_height + exeu->FU_height + lsq_height; 1362 int_bypass = new Interconnect(xml_data, "Int Bypass Data", 1363 Core_device, base_w, base_h, data_w, 1364 len, &interface_ip, level, clockRate, 1365 false, route_over_perc, 1366 core_params.opt_local, 1367 core_params.core_ty, wire_type); 1368 1369 data_w = core_params.phy_ireg_width; 1370 len = rfu->int_regfile_height + exeu->FU_height + lsq_height + 1371 scheu_Iw_height + scheu_ROB_height; 1372 intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", 1373 Core_device, base_w, base_h, 1374 data_w, len, &interface_ip, level, 1375 clockRate, false, route_over_perc, 1376 core_params.opt_local, 1377 core_params.core_ty, wire_type); 1378 1379 if (core_params.num_muls > 0) { 1380 data_w = int(ceil(core_params.int_data_width)); 1381 len = rfu->int_regfile_height + exeu->FU_height + 1382 mul->FU_height + lsq_height; 1383 int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", 1384 Core_device, base_w, base_h, 1385 data_w, len, &interface_ip, 1386 level, clockRate, false, 1387 route_over_perc, 1388 core_params.opt_local, 1389 core_params.core_ty, 1390 wire_type); 1391 1392 data_w = core_params.phy_ireg_width; 1393 len = rfu->int_regfile_height + exeu->FU_height + 1394 mul->FU_height + lsq_height + scheu_Iw_height + 1395 scheu_ROB_height; 1396 intTag_mul_Bypass = new Interconnect(xml_data, 1397 "Mul Bypass Tag", 1398 Core_device, base_w, 1399 base_h, data_w, len, 1400 &interface_ip, level, 1401 clockRate, false, 1402 route_over_perc, 1403 core_params.opt_local, 1404 core_params.core_ty, 1405 wire_type); 1406 } 1407 1408 if (core_params.num_fpus > 0) { 1409 data_w = int(ceil(core_params.fp_data_width)); 1410 len = rfu->fp_regfile_height + fp_u->FU_height; 1411 fp_bypass = new Interconnect(xml_data, "FP Bypass Data", 1412 Core_device, base_w, base_h, 1413 data_w, len, &interface_ip, level, 1414 clockRate, false, route_over_perc, 1415 core_params.opt_local, 1416 core_params.core_ty, wire_type); 1417 1418 data_w = core_params.phy_freg_width; 1419 len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + 1420 scheu_fp_Iw_height + scheu_ROB_height; 1421 fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", 1422 Core_device, base_w, base_h, 1423 data_w, len, &interface_ip, 1424 level, clockRate, false, 1425 route_over_perc, 1426 core_params.opt_local, 1427 core_params.core_ty, wire_type); 1428 } 1429 } else { 1430 /* 1431 * In RS based processor both data and tag are broadcast together, 1432 * covering functional units, lsq, nst windows, register files, and ROBs 1433 */ 1434 data_w = int(ceil(core_params.int_data_width)); 1435 len = rfu->int_regfile_height + exeu->FU_height + lsq_height + 1436 scheu_Iw_height + scheu_ROB_height; 1437 int_bypass = new Interconnect(xml_data, "Int Bypass Data", 1438 Core_device, base_w, base_h, data_w, 1439 len, &interface_ip, level, clockRate, 1440 false, route_over_perc, 1441 core_params.opt_local, 1442 core_params.core_ty, wire_type); 1443 1444 data_w = core_params.phy_ireg_width; 1445 len = rfu->int_regfile_height + exeu->FU_height + lsq_height + 1446 scheu_Iw_height + scheu_ROB_height; 1447 intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", 1448 Core_device, base_w, base_h, 1449 data_w, len, &interface_ip, level, 1450 clockRate, false, route_over_perc, 1451 core_params.opt_local, 1452 core_params.core_ty, wire_type); 1453 if (core_params.num_muls > 0) { 1454 data_w = int(ceil(core_params.int_data_width)); 1455 len = rfu->int_regfile_height + exeu->FU_height + 1456 mul->FU_height + lsq_height + scheu_Iw_height + 1457 scheu_ROB_height; 1458 int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", 1459 Core_device, base_w, base_h, 1460 data_w, len, &interface_ip, 1461 level, clockRate, false, 1462 route_over_perc, 1463 core_params.opt_local, 1464 core_params.core_ty, 1465 wire_type); 1466 1467 data_w = core_params.phy_ireg_width; 1468 len = rfu->int_regfile_height + exeu->FU_height + 1469 mul->FU_height + lsq_height + scheu_Iw_height + 1470 scheu_ROB_height; 1471 intTag_mul_Bypass = new Interconnect(xml_data, 1472 "Mul Bypass Tag", 1473 Core_device, base_w, 1474 base_h, data_w, len, 1475 &interface_ip, level, 1476 clockRate, false, 1477 route_over_perc, 1478 core_params.opt_local, 1479 core_params.core_ty, 1480 wire_type); 1481 } 1482 1483 if (core_params.num_fpus > 0) { 1484 data_w = int(ceil(core_params.fp_data_width)); 1485 len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + 1486 scheu_fp_Iw_height + scheu_ROB_height; 1487 fp_bypass = new Interconnect(xml_data, "FP Bypass Data", 1488 Core_device, base_w, base_h, 1489 data_w, len, &interface_ip, level, 1490 clockRate, false, route_over_perc, 1491 core_params.opt_local, 1492 core_params.core_ty, wire_type); 1493 1494 data_w = core_params.phy_freg_width; 1495 len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + 1496 scheu_fp_Iw_height + scheu_ROB_height; 1497 fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", 1498 Core_device, base_w, base_h, 1499 data_w, len, &interface_ip, 1500 level, clockRate, false, 1501 route_over_perc, 1502 core_params.opt_local, 1503 core_params.core_ty, wire_type); 1504 } 1505 } 1506 } 1507 if (int_bypass) { 1508 children.push_back(int_bypass); 1509 } 1510 if (intTagBypass) { 1511 children.push_back(intTagBypass); 1512 } 1513 if (int_mul_bypass) { 1514 children.push_back(int_mul_bypass); 1515 } 1516 if (intTag_mul_Bypass) { 1517 children.push_back(intTag_mul_Bypass); 1518 } 1519 if (fp_bypass) { 1520 children.push_back(fp_bypass); 1521 } 1522 if (fpTagBypass) { 1523 children.push_back(fpTagBypass); 1524 } 1525 1526 area.set_area(area.get_area() + int_bypass->area.get_area() + 1527 intTagBypass->area.get_area()); 1528 if (core_params.num_muls > 0) { 1529 area.set_area(area.get_area() + int_mul_bypass->area.get_area() + 1530 intTag_mul_Bypass->area.get_area()); 1531 } 1532 if (core_params.num_fpus > 0) { 1533 area.set_area(area.get_area() + fp_bypass->area.get_area() + 1534 fpTagBypass->area.get_area()); 1535 } 1536} 1537 1538RENAMINGU::RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_, 1539 const CoreParameters & _core_params, 1540 const CoreStatistics & _core_stats, bool exist_) 1541 : McPATComponent(_xml_data), iFRAT(NULL), fFRAT(NULL), iRRAT(NULL), 1542 fRRAT(NULL), ifreeL(NULL), ffreeL(NULL), idcl(NULL), fdcl(NULL), 1543 RAHT(NULL), interface_ip(*interface_ip_), 1544 core_params(_core_params), core_stats(_core_stats), exist(exist_) { 1545 if (!exist) return; 1546 int tag; 1547 int data; 1548 int out_w; 1549 int size; 1550 1551 // Assumption: 1552 // We make an implicit design assumption based on the specific structure 1553 // that is being modeled. 1554 // 1. RAM-based RATs are direct mapped. However, if the associated 1555 // scheduler is a reservation station style, the RATs are fully 1556 // associative. 1557 // 2. Non-CAM based RATs and free lists do not have tags. 1558 // 3. Free lists are direct mapped. 1559 1560 const int RAM_BASED_RAT_ASSOC = 1; 1561 const int RS_RAT_ASSOC = 0; 1562 const int NON_CAM_BASED_TAG_WIDTH = 0; 1563 const int FREELIST_ASSOC = 1; 1564 1565 clockRate = core_params.clockRate; 1566 name = "Rename Unit"; 1567 if (core_params.core_ty == OOO) { 1568 //integer pipeline 1569 if (core_params.scheu_ty == PhysicalRegFile) { 1570 if (core_params.rm_ty == RAMbased) { 1571 //FRAT with global checkpointing (GCs) please see paper tech 1572 //report for detailed explaintions 1573 1574 data = int(ceil(core_params.phy_ireg_width * 1575 (1 + core_params.globalCheckpoint) / 1576 BITS_PER_BYTE)); 1577 out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); 1578 1579 size = data * core_params.archi_Regs_IRF_size; 1580 1581 interface_ip.cache_sz = size; 1582 interface_ip.line_sz = data; 1583 interface_ip.assoc = RAM_BASED_RAT_ASSOC; 1584 interface_ip.nbanks = core_params.front_rat_nbanks; 1585 interface_ip.out_w = out_w * BITS_PER_BYTE; 1586 interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; 1587 interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; 1588 interface_ip.access_mode = Fast; 1589 interface_ip.obj_func_dyn_energy = 0; 1590 interface_ip.obj_func_dyn_power = 0; 1591 interface_ip.obj_func_leak_power = 0; 1592 interface_ip.obj_func_cycle_t = 1; 1593 interface_ip.num_rw_ports = core_params.front_rat_rw_ports; 1594 interface_ip.num_rd_ports = 1595 NUM_SOURCE_OPERANDS * core_params.decodeW; 1596 interface_ip.num_wr_ports = core_params.decodeW; 1597 interface_ip.num_se_rd_ports = 0; 1598 interface_ip.num_search_ports = 0; 1599 interface_ip.is_cache = false; 1600 interface_ip.pure_cam = false; 1601 interface_ip.pure_ram = true; 1602 interface_ip.throughput = 1.0 / clockRate; 1603 interface_ip.latency = 1.0 / clockRate; 1604 iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", 1605 Core_device, clockRate, 1606 core_params.opt_local, 1607 core_params.core_ty); 1608 iFRAT->output_data.area *= core_params.num_hthreads; 1609 area.set_area(area.get_area() + iFRAT->area.get_area()); 1610 1611 //FRAT floating point 1612 data = int(ceil(core_params.phy_freg_width * 1613 (1 + core_params.globalCheckpoint) / 1614 BITS_PER_BYTE)); 1615 out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); 1616 size = data * core_params.archi_Regs_FRF_size; 1617 1618 interface_ip.cache_sz = size; 1619 interface_ip.line_sz = data; 1620 interface_ip.assoc = RAM_BASED_RAT_ASSOC; 1621 interface_ip.nbanks = core_params.front_rat_nbanks; 1622 interface_ip.out_w = out_w * BITS_PER_BYTE; 1623 interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; 1624 interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; 1625 interface_ip.access_mode = Fast; 1626 interface_ip.obj_func_dyn_energy = 0; 1627 interface_ip.obj_func_dyn_power = 0; 1628 interface_ip.obj_func_leak_power = 0; 1629 interface_ip.obj_func_cycle_t = 1; 1630 interface_ip.num_rw_ports = core_params.front_rat_rw_ports; 1631 interface_ip.num_rd_ports = 1632 NUM_SOURCE_OPERANDS * core_params.fp_decodeW; 1633 interface_ip.num_wr_ports = core_params.fp_decodeW; 1634 interface_ip.num_se_rd_ports = 0; 1635 interface_ip.num_search_ports = 0; 1636 interface_ip.is_cache = false; 1637 interface_ip.pure_cam = false; 1638 interface_ip.pure_ram = true; 1639 interface_ip.throughput = 1.0 / clockRate; 1640 interface_ip.latency = 1.0 / clockRate; 1641 fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", 1642 Core_device, clockRate, 1643 core_params.opt_local, 1644 core_params.core_ty); 1645 fFRAT->output_data.area *= core_params.num_hthreads; 1646 area.set_area(area.get_area() + fFRAT->area.get_area()); 1647 1648 } else if ((core_params.rm_ty == CAMbased)) { 1649 //IRAT 1650 tag = core_params.arch_ireg_width; 1651 //the address of CAM needed to be sent out 1652 data = int(ceil((core_params.arch_ireg_width + 1 * 1653 core_params.globalCheckpoint) / 1654 BITS_PER_BYTE)); 1655 out_w = int(ceil(core_params.arch_ireg_width / BITS_PER_BYTE)); 1656 size = data * core_params.phy_Regs_IRF_size; 1657 1658 interface_ip.cache_sz = size; 1659 interface_ip.line_sz = data; 1660 interface_ip.assoc = CAM_ASSOC; 1661 interface_ip.nbanks = core_params.front_rat_nbanks; 1662 interface_ip.out_w = out_w * BITS_PER_BYTE; 1663 interface_ip.specific_tag = tag > 0; 1664 interface_ip.tag_w = tag; 1665 interface_ip.access_mode = Fast; 1666 interface_ip.obj_func_dyn_energy = 0; 1667 interface_ip.obj_func_dyn_power = 0; 1668 interface_ip.obj_func_leak_power = 0; 1669 interface_ip.obj_func_cycle_t = 1; 1670 interface_ip.num_rw_ports = core_params.front_rat_rw_ports; 1671 interface_ip.num_rd_ports = core_params.decodeW; 1672 interface_ip.num_wr_ports = core_params.decodeW; 1673 interface_ip.num_se_rd_ports = 0; 1674 interface_ip.num_search_ports = 1675 NUM_SOURCE_OPERANDS * core_params.decodeW; 1676 interface_ip.is_cache = true; 1677 interface_ip.pure_cam = false; 1678 interface_ip.pure_ram = false; 1679 interface_ip.throughput = 1.0 / clockRate; 1680 interface_ip.latency = 1.0 / clockRate; 1681 iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", 1682 Core_device, clockRate, 1683 core_params.opt_local, 1684 core_params.core_ty); 1685 iFRAT->output_data.area *= core_params.num_hthreads; 1686 area.set_area(area.get_area() + iFRAT->area.get_area()); 1687 1688 //FRAT for FP 1689 tag = core_params.arch_freg_width; 1690 //the address of CAM needed to be sent out 1691 data = int(ceil((core_params.arch_freg_width + 1 * 1692 core_params.globalCheckpoint) / 1693 BITS_PER_BYTE)); 1694 out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE)); 1695 size = data * core_params.phy_Regs_FRF_size; 1696 1697 interface_ip.cache_sz = size; 1698 interface_ip.line_sz = data; 1699 interface_ip.assoc = CAM_ASSOC; 1700 interface_ip.nbanks = core_params.front_rat_nbanks; 1701 interface_ip.out_w = out_w * BITS_PER_BYTE; 1702 interface_ip.specific_tag = tag > 0; 1703 interface_ip.tag_w = tag; 1704 interface_ip.access_mode = Fast; 1705 interface_ip.obj_func_dyn_energy = 0; 1706 interface_ip.obj_func_dyn_power = 0; 1707 interface_ip.obj_func_leak_power = 0; 1708 interface_ip.obj_func_cycle_t = 1; 1709 interface_ip.num_rw_ports = core_params.front_rat_rw_ports; 1710 interface_ip.num_rd_ports = core_params.fp_decodeW; 1711 interface_ip.num_wr_ports = core_params.fp_decodeW; 1712 interface_ip.num_se_rd_ports = 0; 1713 interface_ip.num_search_ports = 1714 NUM_SOURCE_OPERANDS * core_params.fp_decodeW; 1715 interface_ip.is_cache = true; 1716 interface_ip.pure_cam = false; 1717 interface_ip.pure_ram = false; 1718 interface_ip.throughput = 1.0 / clockRate; 1719 interface_ip.latency = 1.0 / clockRate; 1720 fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", 1721 Core_device, clockRate, 1722 core_params.opt_local, 1723 core_params.core_ty); 1724 fFRAT->output_data.area *= core_params.num_hthreads; 1725 area.set_area(area.get_area() + fFRAT->area.get_area()); 1726 } 1727 1728 //RRAT is always RAM based, does not have GCs, and is used only for 1729 //record latest non-speculative mapping 1730 data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); 1731 size = data * core_params.archi_Regs_IRF_size * 1732 NUM_SOURCE_OPERANDS; 1733 1734 interface_ip.cache_sz = size; 1735 interface_ip.line_sz = data; 1736 interface_ip.assoc = RAM_BASED_RAT_ASSOC; 1737 interface_ip.nbanks = core_params.retire_rat_nbanks; 1738 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 1739 interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; 1740 interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; 1741 interface_ip.access_mode = Sequential; 1742 interface_ip.obj_func_dyn_energy = 0; 1743 interface_ip.obj_func_dyn_power = 0; 1744 interface_ip.obj_func_leak_power = 0; 1745 interface_ip.obj_func_cycle_t = 1; 1746 interface_ip.num_rw_ports = core_params.retire_rat_rw_ports; 1747 interface_ip.num_rd_ports = core_params.commitW; 1748 interface_ip.num_wr_ports = core_params.commitW; 1749 interface_ip.num_se_rd_ports = 0; 1750 interface_ip.num_search_ports = 0; 1751 interface_ip.is_cache = false; 1752 interface_ip.pure_cam = false; 1753 interface_ip.pure_ram = true; 1754 interface_ip.throughput = 1.0 / clockRate; 1755 interface_ip.latency = 1.0 / clockRate; 1756 iRRAT = new ArrayST(xml_data, &interface_ip, "Int Retire RAT", 1757 Core_device, clockRate, core_params.opt_local, 1758 core_params.core_ty); 1759 iRRAT->output_data.area *= core_params.num_hthreads; 1760 area.set_area(area.get_area() + iRRAT->area.get_area()); 1761 1762 //RRAT for FP 1763 data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); 1764 size = data * core_params.archi_Regs_FRF_size * 1765 NUM_SOURCE_OPERANDS; 1766 1767 interface_ip.cache_sz = size; 1768 interface_ip.line_sz = data; 1769 interface_ip.assoc = RAM_BASED_RAT_ASSOC; 1770 interface_ip.nbanks = core_params.retire_rat_nbanks; 1771 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 1772 interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; 1773 interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; 1774 interface_ip.access_mode = Sequential; 1775 interface_ip.obj_func_dyn_energy = 0; 1776 interface_ip.obj_func_dyn_power = 0; 1777 interface_ip.obj_func_leak_power = 0; 1778 interface_ip.obj_func_cycle_t = 1; 1779 interface_ip.num_rw_ports = core_params.retire_rat_rw_ports; 1780 interface_ip.num_rd_ports = core_params.fp_decodeW; 1781 interface_ip.num_wr_ports = core_params.fp_decodeW; 1782 interface_ip.num_se_rd_ports = 0; 1783 interface_ip.num_search_ports = 0; 1784 interface_ip.is_cache = false; 1785 interface_ip.pure_cam = false; 1786 interface_ip.pure_ram = true; 1787 interface_ip.throughput = 1.0 / clockRate; 1788 interface_ip.latency = 1.0 / clockRate; 1789 fRRAT = new ArrayST(xml_data, &interface_ip, "FP Retire RAT", 1790 Core_device, clockRate, core_params.opt_local, 1791 core_params.core_ty); 1792 fRRAT->output_data.area *= core_params.num_hthreads; 1793 area.set_area(area.get_area() + fRRAT->area.get_area()); 1794 1795 //Freelist of renaming unit always RAM based 1796 //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist 1797 // 2)When instruction commits the Phyregisters/ROB needed to be recycled. 1798 //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width 1799 data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); 1800 size = data * core_params.num_ifreelist_entries; 1801 1802 interface_ip.cache_sz = size; 1803 interface_ip.line_sz = data; 1804 interface_ip.assoc = FREELIST_ASSOC; 1805 interface_ip.nbanks = core_params.freelist_nbanks; 1806 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 1807 interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; 1808 interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; 1809 interface_ip.access_mode = Sequential; 1810 interface_ip.obj_func_dyn_energy = 0; 1811 interface_ip.obj_func_dyn_power = 0; 1812 interface_ip.obj_func_leak_power = 0; 1813 interface_ip.obj_func_cycle_t = 1; 1814 interface_ip.num_rw_ports = core_params.freelist_rw_ports; 1815 interface_ip.num_rd_ports = core_params.decodeW; 1816 interface_ip.num_wr_ports = 1817 core_params.decodeW - 1 + core_params.commitW; 1818 interface_ip.num_se_rd_ports = 0; 1819 interface_ip.num_search_ports = 0; 1820 interface_ip.is_cache = false; 1821 interface_ip.pure_cam = false; 1822 interface_ip.pure_ram = true; 1823 interface_ip.throughput = 1.0 / clockRate; 1824 interface_ip.latency = 1.0 / clockRate; 1825 ifreeL = new ArrayST(xml_data, &interface_ip, "Integer Free List", 1826 Core_device, clockRate, core_params.opt_local, 1827 core_params.core_ty); 1828 ifreeL->output_data.area *= core_params.num_hthreads; 1829 area.set_area(area.get_area() + ifreeL->area.get_area()); 1830 1831 //freelist for FP 1832 data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); 1833 size = data * core_params.num_ffreelist_entries; 1834 1835 interface_ip.cache_sz = size; 1836 interface_ip.line_sz = data; 1837 interface_ip.assoc = FREELIST_ASSOC; 1838 interface_ip.nbanks = core_params.freelist_nbanks; 1839 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 1840 interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; 1841 interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; 1842 interface_ip.access_mode = Sequential; 1843 interface_ip.obj_func_dyn_energy = 0; 1844 interface_ip.obj_func_dyn_power = 0; 1845 interface_ip.obj_func_leak_power = 0; 1846 interface_ip.obj_func_cycle_t = 1; 1847 interface_ip.num_rw_ports = core_params.freelist_rw_ports; 1848 interface_ip.num_rd_ports = core_params.fp_decodeW; 1849 interface_ip.num_wr_ports = 1850 core_params.fp_decodeW - 1 + core_params.commitW; 1851 interface_ip.num_se_rd_ports = 0; 1852 interface_ip.num_search_ports = 0; 1853 interface_ip.is_cache = false; 1854 interface_ip.pure_cam = false; 1855 interface_ip.pure_ram = true; 1856 interface_ip.throughput = 1.0 / clockRate; 1857 interface_ip.latency = 1.0 / clockRate; 1858 ffreeL = new ArrayST(xml_data, &interface_ip, "FP Free List", 1859 Core_device, clockRate, core_params.opt_local, 1860 core_params.core_ty); 1861 ffreeL->output_data.area *= core_params.num_hthreads; 1862 area.set_area(area.get_area() + ffreeL->area.get_area()); 1863 1864 } else if (core_params.scheu_ty == ReservationStation) { 1865 if (core_params.rm_ty == RAMbased) { 1866 tag = core_params.phy_ireg_width; 1867 data = int(ceil(core_params.phy_ireg_width * 1868 (1 + core_params.globalCheckpoint) / 1869 BITS_PER_BYTE)); 1870 out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); 1871 size = data * core_params.archi_Regs_IRF_size; 1872 1873 interface_ip.cache_sz = size; 1874 interface_ip.line_sz = data; 1875 interface_ip.assoc = RS_RAT_ASSOC; 1876 interface_ip.nbanks = core_params.front_rat_nbanks; 1877 interface_ip.out_w = out_w * BITS_PER_BYTE; 1878 interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; 1879 interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; 1880 interface_ip.access_mode = Fast; 1881 interface_ip.obj_func_dyn_energy = 0; 1882 interface_ip.obj_func_dyn_power = 0; 1883 interface_ip.obj_func_leak_power = 0; 1884 interface_ip.obj_func_cycle_t = 1; 1885 interface_ip.num_rw_ports = core_params.front_rat_rw_ports; 1886 interface_ip.num_rd_ports = 1887 NUM_SOURCE_OPERANDS * core_params.decodeW; 1888 interface_ip.num_wr_ports = core_params.decodeW; 1889 interface_ip.num_se_rd_ports = 0; 1890 interface_ip.num_search_ports = core_params.commitW; 1891 interface_ip.is_cache = true; 1892 interface_ip.pure_cam = false; 1893 interface_ip.pure_ram = false; 1894 interface_ip.throughput = 1.0 / clockRate; 1895 interface_ip.latency = 1.0 / clockRate; 1896 iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", 1897 Core_device, clockRate, 1898 core_params.opt_local, 1899 core_params.core_ty); 1900 iFRAT->local_result.adjust_area(); 1901 iFRAT->output_data.area *= core_params.num_hthreads; 1902 area.set_area(area.get_area() + iFRAT->area.get_area()); 1903 1904 //FP 1905 tag = core_params.phy_freg_width; 1906 data = int(ceil(core_params.phy_freg_width * 1907 (1 + core_params.globalCheckpoint) / 1908 BITS_PER_BYTE)); 1909 out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); 1910 size = data * core_params.archi_Regs_FRF_size; 1911 1912 interface_ip.cache_sz = size; 1913 interface_ip.line_sz = data; 1914 interface_ip.assoc = RS_RAT_ASSOC; 1915 interface_ip.nbanks = core_params.front_rat_nbanks; 1916 interface_ip.out_w = out_w * BITS_PER_BYTE; 1917 interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; 1918 interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; 1919 interface_ip.access_mode = Fast; 1920 interface_ip.obj_func_dyn_energy = 0; 1921 interface_ip.obj_func_dyn_power = 0; 1922 interface_ip.obj_func_leak_power = 0; 1923 interface_ip.obj_func_cycle_t = 1; 1924 interface_ip.num_rw_ports = core_params.front_rat_rw_ports; 1925 interface_ip.num_rd_ports = 1926 NUM_SOURCE_OPERANDS * core_params.fp_decodeW; 1927 interface_ip.num_wr_ports = core_params.fp_decodeW; 1928 interface_ip.num_se_rd_ports = 0; 1929 interface_ip.num_search_ports = core_params.fp_issueW; 1930 interface_ip.is_cache = true; 1931 interface_ip.pure_cam = false; 1932 interface_ip.pure_ram = false; 1933 interface_ip.throughput = 1.0 / clockRate; 1934 interface_ip.latency = 1.0 / clockRate; 1935 fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", 1936 Core_device, clockRate, 1937 core_params.opt_local, 1938 core_params.core_ty); 1939 fFRAT->local_result.adjust_area(); 1940 fFRAT->output_data.area *= core_params.num_hthreads; 1941 area.set_area(area.get_area() + fFRAT->area.get_area()); 1942 1943 } else if ((core_params.rm_ty == CAMbased)) { 1944 //FRAT 1945 //the address of CAM needed to be sent out 1946 tag = core_params.arch_ireg_width; 1947 data = int(ceil (core_params.arch_ireg_width + 1948 1 * core_params.globalCheckpoint / 1949 BITS_PER_BYTE)); 1950 out_w = int(ceil (core_params.arch_ireg_width / 1951 BITS_PER_BYTE)); 1952 size = data * core_params.phy_Regs_IRF_size; 1953 1954 interface_ip.cache_sz = size; 1955 interface_ip.line_sz = data; 1956 interface_ip.assoc = CAM_ASSOC; 1957 interface_ip.nbanks = core_params.front_rat_nbanks; 1958 interface_ip.out_w = out_w * BITS_PER_BYTE; 1959 interface_ip.specific_tag = tag > 0; 1960 interface_ip.tag_w = tag; 1961 interface_ip.access_mode = Fast; 1962 interface_ip.obj_func_dyn_energy = 0; 1963 interface_ip.obj_func_dyn_power = 0; 1964 interface_ip.obj_func_leak_power = 0; 1965 interface_ip.obj_func_cycle_t = 1; 1966 interface_ip.num_rw_ports = core_params.front_rat_rw_ports; 1967 interface_ip.num_rd_ports = core_params.decodeW; 1968 interface_ip.num_wr_ports = core_params.decodeW; 1969 interface_ip.num_se_rd_ports = 0; 1970 interface_ip.num_search_ports = 1971 NUM_SOURCE_OPERANDS * core_params.decodeW; 1972 interface_ip.is_cache = true; 1973 interface_ip.pure_cam = false; 1974 interface_ip.pure_ram = false; 1975 interface_ip.throughput = 1.0 / clockRate; 1976 interface_ip.latency = 1.0 / clockRate; 1977 iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", 1978 Core_device, clockRate, 1979 core_params.opt_local, 1980 core_params.core_ty); 1981 iFRAT->output_data.area *= core_params.num_hthreads; 1982 area.set_area(area.get_area() + iFRAT->area.get_area()); 1983 1984 //FRAT 1985 tag = core_params.arch_freg_width; 1986 //the address of CAM needed to be sent out 1987 data = int(ceil(core_params.arch_freg_width + 1988 1 * core_params.globalCheckpoint / 1989 BITS_PER_BYTE)); 1990 out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE)); 1991 size = data * core_params.phy_Regs_FRF_size; 1992 1993 interface_ip.cache_sz = size; 1994 interface_ip.line_sz = data; 1995 interface_ip.assoc = CAM_ASSOC; 1996 interface_ip.nbanks = core_params.front_rat_nbanks; 1997 interface_ip.out_w = out_w * BITS_PER_BYTE; 1998 interface_ip.specific_tag = tag > 0; 1999 interface_ip.tag_w = tag; 2000 interface_ip.access_mode = Fast; 2001 interface_ip.obj_func_dyn_energy = 0; 2002 interface_ip.obj_func_dyn_power = 0; 2003 interface_ip.obj_func_leak_power = 0; 2004 interface_ip.obj_func_cycle_t = 1; 2005 interface_ip.num_rw_ports = core_params.front_rat_rw_ports; 2006 interface_ip.num_rd_ports = core_params.decodeW; 2007 interface_ip.num_wr_ports = core_params.fp_decodeW; 2008 interface_ip.num_se_rd_ports = 0; 2009 interface_ip.num_search_ports = 2010 NUM_SOURCE_OPERANDS * core_params.fp_decodeW; 2011 interface_ip.is_cache = true; 2012 interface_ip.pure_cam = false; 2013 interface_ip.pure_ram = false; 2014 interface_ip.throughput = 1.0 / clockRate; 2015 interface_ip.latency = 1.0 / clockRate; 2016 fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", 2017 Core_device, clockRate, 2018 core_params.opt_local, 2019 core_params.core_ty); 2020 fFRAT->output_data.area *= core_params.num_hthreads; 2021 area.set_area(area.get_area() + fFRAT->area.get_area()); 2022 2023 } 2024 //No RRAT for RS based OOO 2025 //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified 2026 data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); 2027 size = data * core_params.num_ifreelist_entries; 2028 2029 interface_ip.cache_sz = size; 2030 interface_ip.line_sz = data; 2031 interface_ip.assoc = FREELIST_ASSOC; 2032 interface_ip.nbanks = core_params.freelist_nbanks; 2033 interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; 2034 interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; 2035 interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; 2036 interface_ip.access_mode = Fast; 2037 interface_ip.obj_func_dyn_energy = 0; 2038 interface_ip.obj_func_dyn_power = 0; 2039 interface_ip.obj_func_leak_power = 0; 2040 interface_ip.obj_func_cycle_t = 1; 2041 interface_ip.num_rw_ports = core_params.freelist_rw_ports; 2042 interface_ip.num_rd_ports = core_params.decodeW; 2043 interface_ip.num_wr_ports = 2044 core_params.decodeW - 1 + core_params.commitW; 2045 interface_ip.num_se_rd_ports = 0; 2046 interface_ip.num_search_ports = 0; 2047 interface_ip.is_cache = false; 2048 interface_ip.pure_cam = false; 2049 interface_ip.pure_ram = true; 2050 interface_ip.throughput = 1.0 / clockRate; 2051 interface_ip.latency = 1.0 / clockRate; 2052 ifreeL = new ArrayST(xml_data, &interface_ip, "Unified Free List", 2053 Core_device, clockRate, core_params.opt_local, 2054 core_params.core_ty); 2055 ifreeL->output_data.area *= core_params.num_hthreads; 2056 area.set_area(area.get_area() + ifreeL->area.get_area()); 2057 } 2058 2059 } 2060 idcl = 2061 new dep_resource_conflict_check(xml_data, 2062 "Instruction Dependency Check?", 2063 &interface_ip, core_params, 2064 core_params.phy_ireg_width, 2065 clockRate); 2066 fdcl = 2067 new dep_resource_conflict_check(xml_data, 2068 "FP Dependency Check?", &interface_ip, 2069 core_params, 2070 core_params.phy_freg_width, clockRate); 2071} 2072 2073Core::Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_) 2074 : McPATComponent(_xml_data), ifu(NULL), lsu(NULL), mmu(NULL), 2075 exu(NULL), rnu(NULL), corepipe (NULL), undiffCore(NULL), l2cache (NULL), 2076 ithCore(_ithCore), interface_ip(*interface_ip_) { 2077 2078 ostringstream os; 2079 os << ithCore; 2080 name = "Core " + os.str(); 2081 2082 int i = 0; 2083 XMLNode* childXML; 2084 for (i = 0; i < xml_data->nChildNode("component"); i++) { 2085 childXML = xml_data->getChildNodePtr("component", &i); 2086 XMLCSTR type = childXML->getAttribute("type"); 2087 if (!type) 2088 warnMissingComponentType(childXML->getAttribute("id")); 2089 2090 STRCMP(type, "CacheUnit") { 2091 XMLCSTR comp_name = childXML->getAttribute("id"); 2092 if (!comp_name) 2093 continue; 2094 2095 STRCMP(comp_name, "system.L20") { 2096 l2cache = new CacheUnit(childXML, &interface_ip); 2097 children.push_back(l2cache); 2098 } 2099 } 2100 } 2101 2102 set_core_param(); 2103 clockRate = core_params.clockRate; 2104 2105 ifu = new InstFetchU(xml_data, &interface_ip, core_params, 2106 core_stats); 2107 children.push_back(ifu); 2108 lsu = new LoadStoreU(xml_data, &interface_ip, core_params, 2109 core_stats); 2110 children.push_back(lsu); 2111 mmu = new MemManU(xml_data, &interface_ip, core_params, 2112 core_stats); 2113 children.push_back(mmu); 2114 exu = new EXECU(xml_data, &interface_ip, lsu->lsq_height, 2115 core_params, core_stats); 2116 children.push_back(exu); 2117 undiffCore = new UndiffCore(xml_data, &interface_ip, core_params); 2118 children.push_back(undiffCore); 2119 if (core_params.core_ty == OOO) { 2120 rnu = new RENAMINGU(xml_data, &interface_ip, core_params, 2121 core_stats); 2122 children.push_back(rnu); 2123 } 2124 corepipe = new Pipeline(xml_data, &interface_ip, core_params); 2125 children.push_back(corepipe); 2126 2127 double pipeline_area_per_unit; 2128 if (core_params.core_ty == OOO) { 2129 pipeline_area_per_unit = (corepipe->area.get_area() * 2130 core_params.num_pipelines) / 5.0; 2131 if (rnu->exist) { 2132 rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); 2133 } 2134 } else { 2135 pipeline_area_per_unit = (corepipe->area.get_area() * 2136 core_params.num_pipelines) / 4.0; 2137 } 2138 2139 // Move all of this to computeArea 2140 //area.set_area(area.get_area()+ corepipe->area.get_area()); 2141 if (ifu->exist) { 2142 ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); 2143 area.set_area(area.get_area() + ifu->area.get_area()); 2144 } 2145 if (lsu->exist) { 2146 lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); 2147 area.set_area(area.get_area() + lsu->area.get_area()); 2148 } 2149 if (exu->exist) { 2150 exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); 2151 area.set_area(area.get_area() + exu->area.get_area()); 2152 } 2153 if (mmu->exist) { 2154 mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); 2155 area.set_area(area.get_area() + mmu->area.get_area()); 2156 } 2157 2158 if (core_params.core_ty == OOO) { 2159 if (rnu->exist) { 2160 2161 area.set_area(area.get_area() + rnu->area.get_area()); 2162 } 2163 } 2164 2165 if (undiffCore->exist) { 2166 area.set_area(area.get_area() + undiffCore->area.get_area()); 2167 } 2168 2169 if (l2cache) { 2170 area.set_area(area.get_area() + l2cache->area.get_area()); 2171 } 2172} 2173 2174 2175void BranchPredictor::computeEnergy() { 2176 if (!exist) return; 2177 2178 // ASSUMPTION: All instructions access the branch predictors at Fetch and 2179 // only branch instrucions update the predictors regardless 2180 // of the correctness of the prediction. 2181 double tdp_read_accesses = 2182 core_params.predictionW * core_stats.BR_duty_cycle; 2183 globalBPT->tdp_stats.reset(); 2184 globalBPT->tdp_stats.readAc.access = tdp_read_accesses; 2185 globalBPT->tdp_stats.writeAc.access = 0; 2186 globalBPT->rtp_stats.reset(); 2187 globalBPT->rtp_stats.readAc.access = core_stats.total_instructions; 2188 globalBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; 2189 globalBPT->power_t.reset(); 2190 globalBPT->power_t.readOp.dynamic += 2191 globalBPT->local_result.power.readOp.dynamic * 2192 globalBPT->tdp_stats.readAc.access + 2193 globalBPT->local_result.power.writeOp.dynamic * 2194 globalBPT->tdp_stats.writeAc.access; 2195 globalBPT->power_t = globalBPT->power_t + 2196 globalBPT->local_result.power * pppm_lkg; 2197 globalBPT->rt_power.reset(); 2198 globalBPT->rt_power.readOp.dynamic += 2199 globalBPT->local_result.power.readOp.dynamic * 2200 globalBPT->rtp_stats.readAc.access + 2201 globalBPT->local_result.power.writeOp.dynamic * 2202 globalBPT->rtp_stats.writeAc.access; 2203 2204 L1_localBPT->tdp_stats.reset(); 2205 L1_localBPT->tdp_stats.readAc.access = tdp_read_accesses; 2206 L1_localBPT->tdp_stats.writeAc.access = 0; 2207 L1_localBPT->rtp_stats.reset(); 2208 L1_localBPT->rtp_stats.readAc.access = core_stats.total_instructions; 2209 L1_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; 2210 L1_localBPT->power_t.reset(); 2211 L1_localBPT->power_t.readOp.dynamic += 2212 L1_localBPT->local_result.power.readOp.dynamic * 2213 L1_localBPT->tdp_stats.readAc.access + 2214 L1_localBPT->local_result.power.writeOp.dynamic * 2215 L1_localBPT->tdp_stats.writeAc.access; 2216 L1_localBPT->power_t = L1_localBPT->power_t + 2217 L1_localBPT->local_result.power * pppm_lkg; 2218 L1_localBPT->rt_power.reset(); 2219 L1_localBPT->rt_power.readOp.dynamic += 2220 L1_localBPT->local_result.power.readOp.dynamic * 2221 L1_localBPT->rtp_stats.readAc.access + 2222 L1_localBPT->local_result.power.writeOp.dynamic * 2223 L1_localBPT->rtp_stats.writeAc.access; 2224 2225 L2_localBPT->tdp_stats.reset(); 2226 L2_localBPT->tdp_stats.readAc.access = tdp_read_accesses; 2227 L2_localBPT->tdp_stats.writeAc.access = 0; 2228 L2_localBPT->rtp_stats.reset(); 2229 L2_localBPT->rtp_stats.readAc.access = core_stats.branch_instructions; 2230 L2_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; 2231 L2_localBPT->power_t.reset(); 2232 L2_localBPT->power_t.readOp.dynamic += 2233 L2_localBPT->local_result.power.readOp.dynamic * 2234 L2_localBPT->tdp_stats.readAc.access + 2235 L2_localBPT->local_result.power.writeOp.dynamic * 2236 L2_localBPT->tdp_stats.writeAc.access; 2237 L2_localBPT->power_t = L2_localBPT->power_t + 2238 L2_localBPT->local_result.power * pppm_lkg; 2239 L2_localBPT->rt_power.reset(); 2240 L2_localBPT->rt_power.readOp.dynamic += 2241 L2_localBPT->local_result.power.readOp.dynamic * 2242 L2_localBPT->rtp_stats.readAc.access + 2243 L2_localBPT->local_result.power.writeOp.dynamic * 2244 L2_localBPT->rtp_stats.writeAc.access; 2245 2246 chooser->tdp_stats.reset(); 2247 chooser->tdp_stats.readAc.access = tdp_read_accesses; 2248 chooser->tdp_stats.writeAc.access = 0; 2249 chooser->rtp_stats.reset(); 2250 chooser->rtp_stats.readAc.access = core_stats.total_instructions; 2251 chooser->rtp_stats.writeAc.access = core_stats.branch_instructions; 2252 chooser->power_t.reset(); 2253 chooser->power_t.readOp.dynamic += 2254 chooser->local_result.power.readOp.dynamic * 2255 chooser->tdp_stats.readAc.access + 2256 chooser->local_result.power.writeOp.dynamic * 2257 chooser->tdp_stats.writeAc.access; 2258 chooser->power_t = 2259 chooser->power_t + chooser->local_result.power * pppm_lkg; 2260 chooser->rt_power.reset(); 2261 chooser->rt_power.readOp.dynamic += 2262 chooser->local_result.power.readOp.dynamic * 2263 chooser->rtp_stats.readAc.access + 2264 chooser->local_result.power.writeOp.dynamic * 2265 chooser->rtp_stats.writeAc.access; 2266 2267 RAS->tdp_stats.reset(); 2268 RAS->tdp_stats.readAc.access = tdp_read_accesses; 2269 RAS->tdp_stats.writeAc.access = 0; 2270 RAS->rtp_stats.reset(); 2271 RAS->rtp_stats.readAc.access = core_stats.function_calls; 2272 RAS->rtp_stats.writeAc.access = core_stats.function_calls; 2273 RAS->power_t.reset(); 2274 RAS->power_t.readOp.dynamic += 2275 RAS->local_result.power.readOp.dynamic * RAS->tdp_stats.readAc.access + 2276 RAS->local_result.power.writeOp.dynamic * 2277 RAS->tdp_stats.writeAc.access; 2278 RAS->power_t = RAS->power_t + RAS->local_result.power * 2279 core_params.pppm_lkg_multhread; 2280 RAS->rt_power.reset(); 2281 RAS->rt_power.readOp.dynamic += RAS->local_result.power.readOp.dynamic * 2282 RAS->rtp_stats.readAc.access + 2283 RAS->local_result.power.writeOp.dynamic * 2284 RAS->rtp_stats.writeAc.access; 2285 2286 output_data.reset(); 2287 if (globalBPT) { 2288 globalBPT->output_data.peak_dynamic_power = 2289 globalBPT->power_t.readOp.dynamic * clockRate; 2290 globalBPT->output_data.runtime_dynamic_energy = 2291 globalBPT->rt_power.readOp.dynamic; 2292 output_data += globalBPT->output_data; 2293 } 2294 if (L1_localBPT) { 2295 L1_localBPT->output_data.peak_dynamic_power = 2296 L1_localBPT->power_t.readOp.dynamic * clockRate; 2297 L1_localBPT->output_data.runtime_dynamic_energy = 2298 L1_localBPT->rt_power.readOp.dynamic; 2299 output_data += L1_localBPT->output_data; 2300 } 2301 if (L2_localBPT) { 2302 L2_localBPT->output_data.peak_dynamic_power = 2303 L2_localBPT->power_t.readOp.dynamic * clockRate; 2304 L2_localBPT->output_data.runtime_dynamic_energy = 2305 L2_localBPT->rt_power.readOp.dynamic; 2306 output_data += L2_localBPT->output_data; 2307 } 2308 if (chooser) { 2309 chooser->output_data.peak_dynamic_power = 2310 chooser->power_t.readOp.dynamic * clockRate; 2311 chooser->output_data.runtime_dynamic_energy = 2312 chooser->rt_power.readOp.dynamic; 2313 output_data += chooser->output_data; 2314 } 2315 if (RAS) { 2316 RAS->output_data.peak_dynamic_power = 2317 RAS->power_t.readOp.dynamic * clockRate; 2318 RAS->output_data.subthreshold_leakage_power = 2319 RAS->power_t.readOp.leakage * core_params.num_hthreads; 2320 RAS->output_data.gate_leakage_power = 2321 RAS->power_t.readOp.gate_leakage * core_params.num_hthreads; 2322 RAS->output_data.runtime_dynamic_energy = RAS->rt_power.readOp.dynamic; 2323 output_data += RAS->output_data; 2324 } 2325} 2326 2327void BranchPredictor::displayData(uint32_t indent, int plevel) { 2328 if (!exist) return; 2329 2330 McPATComponent::displayData(indent, plevel); 2331 2332 globalBPT->displayData(indent + 4, plevel); 2333 L1_localBPT->displayData(indent + 4, plevel); 2334 L2_localBPT->displayData(indent + 4, plevel); 2335 chooser->displayData(indent + 4, plevel); 2336 RAS->displayData(indent + 4, plevel); 2337} 2338 2339void InstFetchU::computeEnergy() { 2340 if (!exist) return; 2341 2342 if (BPT) { 2343 BPT->computeEnergy(); 2344 } 2345 2346 IB->tdp_stats.reset(); 2347 IB->tdp_stats.readAc.access = core_params.peak_issueW; 2348 IB->tdp_stats.writeAc.access = core_params.peak_issueW; 2349 IB->rtp_stats.reset(); 2350 IB->rtp_stats.readAc.access = core_stats.total_instructions; 2351 IB->rtp_stats.writeAc.access = core_stats.total_instructions; 2352 IB->power_t.reset(); 2353 IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic * 2354 IB->tdp_stats.readAc.access + 2355 IB->local_result.power.writeOp.dynamic * IB->tdp_stats.writeAc.access; 2356 IB->power_t = IB->power_t + IB->local_result.power * pppm_lkg; 2357 IB->rt_power.reset(); 2358 IB->rt_power.readOp.dynamic += IB->local_result.power.readOp.dynamic * 2359 IB->rtp_stats.readAc.access + 2360 IB->local_result.power.writeOp.dynamic * IB->rtp_stats.writeAc.access; 2361 2362 if (core_params.predictionW > 0) { 2363 BTB->tdp_stats.reset(); 2364 BTB->tdp_stats.readAc.access = core_params.predictionW; 2365 BTB->tdp_stats.writeAc.access = 0; 2366 BTB->rtp_stats.reset(); 2367 BTB->rtp_stats.readAc.access = inst_fetch_stats.btb_read_accesses; 2368 BTB->rtp_stats.writeAc.access = inst_fetch_stats.btb_write_accesses; 2369 BTB->power_t.reset(); 2370 BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic * 2371 BTB->tdp_stats.readAc.access + 2372 BTB->local_result.power.writeOp.dynamic * 2373 BTB->tdp_stats.writeAc.access; 2374 BTB->rt_power.reset(); 2375 BTB->rt_power.readOp.dynamic += 2376 BTB->local_result.power.readOp.dynamic * 2377 BTB->rtp_stats.readAc.access + 2378 BTB->local_result.power.writeOp.dynamic * 2379 BTB->rtp_stats.writeAc.access; 2380 } 2381 2382 ID_inst->tdp_stats.reset(); 2383 ID_inst->tdp_stats.readAc.access = core_params.decodeW; 2384 ID_inst->power_t.reset(); 2385 ID_inst->power_t = ID_misc->power; 2386 ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic * 2387 ID_inst->tdp_stats.readAc.access; 2388 ID_inst->rtp_stats.reset(); 2389 ID_inst->rtp_stats.readAc.access = core_stats.total_instructions; 2390 ID_inst->rt_power.reset(); 2391 ID_inst->rt_power.readOp.dynamic = ID_inst->power.readOp.dynamic * 2392 ID_inst->rtp_stats.readAc.access; 2393 2394 ID_operand->tdp_stats.reset(); 2395 ID_operand->tdp_stats.readAc.access = core_params.decodeW; 2396 ID_operand->power_t.reset(); 2397 ID_operand->power_t = ID_misc->power; 2398 ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic * 2399 ID_operand->tdp_stats.readAc.access; 2400 ID_operand->rtp_stats.reset(); 2401 ID_operand->rtp_stats.readAc.access = core_stats.total_instructions; 2402 ID_operand->rt_power.reset(); 2403 ID_operand->rt_power.readOp.dynamic = ID_operand->power.readOp.dynamic * 2404 ID_operand->rtp_stats.readAc.access; 2405 2406 ID_misc->tdp_stats.reset(); 2407 ID_misc->tdp_stats.readAc.access = core_params.decodeW; 2408 ID_misc->power_t.reset(); 2409 ID_misc->power_t = ID_misc->power; 2410 ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic * 2411 ID_misc->tdp_stats.readAc.access; 2412 ID_misc->rtp_stats.reset(); 2413 ID_misc->rtp_stats.readAc.access = core_stats.total_instructions; 2414 ID_misc->rt_power.reset(); 2415 ID_misc->rt_power.readOp.dynamic = ID_misc->power.readOp.dynamic * 2416 ID_misc->rtp_stats.readAc.access; 2417 2418 power.reset(); 2419 rt_power.reset(); 2420 McPATComponent::computeEnergy(); 2421 2422 output_data.reset(); 2423 if (icache) { 2424 output_data += icache->output_data; 2425 } 2426 if (IB) { 2427 IB->output_data.peak_dynamic_power = 2428 IB->power_t.readOp.dynamic * clockRate; 2429 IB->output_data.runtime_dynamic_energy = IB->rt_power.readOp.dynamic; 2430 output_data += IB->output_data; 2431 } 2432 if (BTB) { 2433 BTB->output_data.peak_dynamic_power = 2434 BTB->power_t.readOp.dynamic * clockRate; 2435 BTB->output_data.runtime_dynamic_energy = BTB->rt_power.readOp.dynamic; 2436 output_data += BTB->output_data; 2437 } 2438 if (BPT) { 2439 output_data += BPT->output_data; 2440 } 2441 if (ID_inst) { 2442 ID_inst->output_data.peak_dynamic_power = 2443 ID_inst->power_t.readOp.dynamic * clockRate; 2444 ID_inst->output_data.runtime_dynamic_energy = 2445 ID_inst->rt_power.readOp.dynamic; 2446 output_data += ID_inst->output_data; 2447 } 2448 if (ID_operand) { 2449 ID_operand->output_data.peak_dynamic_power = 2450 ID_operand->power_t.readOp.dynamic * clockRate; 2451 ID_operand->output_data.runtime_dynamic_energy = 2452 ID_operand->rt_power.readOp.dynamic; 2453 output_data += ID_operand->output_data; 2454 } 2455 if (ID_misc) { 2456 ID_misc->output_data.peak_dynamic_power = 2457 ID_misc->power_t.readOp.dynamic * clockRate; 2458 ID_misc->output_data.runtime_dynamic_energy = 2459 ID_misc->rt_power.readOp.dynamic; 2460 output_data += ID_misc->output_data; 2461 } 2462} 2463 2464void InstFetchU::displayData(uint32_t indent, int plevel) { 2465 if (!exist) return; 2466 2467 McPATComponent::displayData(indent, plevel); 2468 2469 if (core_params.predictionW > 0) { 2470 BTB->displayData(indent + 4, plevel); 2471 if (BPT->exist) { 2472 BPT->displayData(indent + 4, plevel); 2473 } 2474 } 2475 IB->displayData(indent + 4, plevel); 2476 ID_inst->displayData(indent + 4, plevel); 2477 ID_operand->displayData(indent + 4, plevel); 2478 ID_misc->displayData(indent + 4, plevel); 2479} 2480 2481void RENAMINGU::computeEnergy() { 2482 if (!exist) return; 2483 2484 idcl->tdp_stats.reset(); 2485 idcl->rtp_stats.reset(); 2486 idcl->power_t.reset(); 2487 idcl->rt_power.reset(); 2488 if (core_params.core_ty == OOO) { 2489 idcl->tdp_stats.readAc.access = core_params.decodeW; 2490 idcl->rtp_stats.readAc.access = 3 * core_params.decodeW * 2491 core_params.decodeW * core_stats.rename_reads; 2492 } else if (core_params.issueW > 1) { 2493 idcl->tdp_stats.readAc.access = core_params.decodeW; 2494 idcl->rtp_stats.readAc.access = 2 * core_stats.int_instructions; 2495 } 2496 idcl->power_t.readOp.dynamic = idcl->tdp_stats.readAc.access * 2497 idcl->power.readOp.dynamic; 2498 idcl->power_t.readOp.leakage = idcl->power.readOp.leakage * 2499 core_params.num_hthreads; 2500 idcl->power_t.readOp.gate_leakage = idcl->power.readOp.gate_leakage * 2501 core_params.num_hthreads; 2502 idcl->rt_power.readOp.dynamic = idcl->rtp_stats.readAc.access * 2503 idcl->power.readOp.dynamic; 2504 2505 fdcl->tdp_stats.reset(); 2506 fdcl->rtp_stats.reset(); 2507 fdcl->power_t.reset(); 2508 fdcl->rt_power.reset(); 2509 if (core_params.core_ty == OOO) { 2510 fdcl->tdp_stats.readAc.access = core_params.decodeW; 2511 fdcl->rtp_stats.readAc.access = 3 * core_params.fp_issueW * 2512 core_params.fp_issueW * core_stats.fp_rename_writes; 2513 } else if (core_params.issueW > 1) { 2514 fdcl->tdp_stats.readAc.access = core_params.decodeW; 2515 fdcl->rtp_stats.readAc.access = core_stats.fp_instructions; 2516 } 2517 fdcl->power_t.readOp.dynamic = fdcl->tdp_stats.readAc.access * 2518 fdcl->power.readOp.dynamic; 2519 fdcl->power_t.readOp.leakage = fdcl->power.readOp.leakage * 2520 core_params.num_hthreads; 2521 fdcl->power_t.readOp.gate_leakage = fdcl->power.readOp.gate_leakage * 2522 core_params.num_hthreads; 2523 fdcl->rt_power.readOp.dynamic = fdcl->rtp_stats.readAc.access * 2524 fdcl->power.readOp.dynamic; 2525 2526 if (iRRAT) { 2527 iRRAT->tdp_stats.reset(); 2528 iRRAT->tdp_stats.readAc.access = iRRAT->l_ip.num_rd_ports; 2529 iRRAT->tdp_stats.writeAc.access = iRRAT->l_ip.num_wr_ports; 2530 iRRAT->rtp_stats.reset(); 2531 iRRAT->rtp_stats.readAc.access = core_stats.rename_writes; 2532 iRRAT->rtp_stats.writeAc.access = core_stats.rename_writes; 2533 iRRAT->power_t.reset(); 2534 iRRAT->power_t.readOp.dynamic += 2535 iRRAT->tdp_stats.readAc.access * iRRAT->power.readOp.dynamic + 2536 iRRAT->tdp_stats.writeAc.access * iRRAT->power.writeOp.dynamic; 2537 iRRAT->rt_power.reset(); 2538 iRRAT->rt_power.readOp.dynamic += 2539 iRRAT->rtp_stats.readAc.access * iRRAT->power.readOp.dynamic + 2540 iRRAT->rtp_stats.writeAc.access * iRRAT->power.writeOp.dynamic; 2541 iRRAT->power_t.readOp.leakage = 2542 iRRAT->power.readOp.leakage * core_params.num_hthreads; 2543 iRRAT->power_t.readOp.gate_leakage = 2544 iRRAT->power.readOp.gate_leakage * core_params.num_hthreads; 2545 } 2546 2547 if (ifreeL) { 2548 ifreeL->tdp_stats.reset(); 2549 ifreeL->tdp_stats.readAc.access = core_params.decodeW; 2550 ifreeL->tdp_stats.writeAc.access = core_params.decodeW; 2551 ifreeL->rtp_stats.reset(); 2552 if (core_params.scheu_ty == PhysicalRegFile) { 2553 ifreeL->rtp_stats.readAc.access = core_stats.rename_reads; 2554 ifreeL->rtp_stats.writeAc.access = 2 * core_stats.rename_writes; 2555 } else if (core_params.scheu_ty == ReservationStation) { 2556 ifreeL->rtp_stats.readAc.access = 2557 core_stats.rename_reads + core_stats.fp_rename_reads; 2558 ifreeL->rtp_stats.writeAc.access = 2559 2 * (core_stats.rename_writes + core_stats.fp_rename_writes); 2560 } 2561 ifreeL->power_t.reset(); 2562 ifreeL->power_t.readOp.dynamic += 2563 ifreeL->tdp_stats.readAc.access * ifreeL->power.readOp.dynamic + 2564 ifreeL->tdp_stats.writeAc.access * ifreeL->power.writeOp.dynamic; 2565 ifreeL->rt_power.reset(); 2566 ifreeL->rt_power.readOp.dynamic += 2567 ifreeL->rtp_stats.readAc.access * ifreeL->power.readOp.dynamic + 2568 ifreeL->rtp_stats.writeAc.access * ifreeL->power.writeOp.dynamic; 2569 ifreeL->power_t.readOp.leakage = 2570 ifreeL->power.readOp.leakage * core_params.num_hthreads; 2571 ifreeL->power_t.readOp.gate_leakage = 2572 ifreeL->power.readOp.gate_leakage * core_params.num_hthreads; 2573 } 2574 2575 if (fRRAT) { 2576 fRRAT->tdp_stats.reset(); 2577 fRRAT->tdp_stats.readAc.access = fRRAT->l_ip.num_rd_ports; 2578 fRRAT->tdp_stats.writeAc.access = fRRAT->l_ip.num_wr_ports; 2579 fRRAT->rtp_stats.reset(); 2580 fRRAT->rtp_stats.readAc.access = core_stats.fp_rename_writes; 2581 fRRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes; 2582 fRRAT->power_t.reset(); 2583 fRRAT->power_t.readOp.dynamic += 2584 fRRAT->tdp_stats.readAc.access * fRRAT->power.readOp.dynamic + 2585 fRRAT->tdp_stats.writeAc.access * fRRAT->power.writeOp.dynamic; 2586 fRRAT->rt_power.reset(); 2587 fRRAT->rt_power.readOp.dynamic += 2588 fRRAT->rtp_stats.readAc.access * fRRAT->power.readOp.dynamic + 2589 fRRAT->rtp_stats.writeAc.access * fRRAT->power.writeOp.dynamic; 2590 fRRAT->power_t.readOp.leakage = 2591 fRRAT->power.readOp.leakage * core_params.num_hthreads; 2592 fRRAT->power_t.readOp.gate_leakage = 2593 fRRAT->power.readOp.gate_leakage * core_params.num_hthreads; 2594 } 2595 2596 if (ffreeL) { 2597 ffreeL->tdp_stats.reset(); 2598 ffreeL->tdp_stats.readAc.access = core_params.decodeW; 2599 ffreeL->tdp_stats.writeAc.access = core_params.decodeW; 2600 ffreeL->rtp_stats.reset(); 2601 ffreeL->rtp_stats.readAc.access = core_stats.fp_rename_reads; 2602 ffreeL->rtp_stats.writeAc.access = 2 * core_stats.fp_rename_writes; 2603 ffreeL->power_t.reset(); 2604 ffreeL->power_t.readOp.dynamic += 2605 ffreeL->tdp_stats.readAc.access * ffreeL->power.readOp.dynamic + 2606 ffreeL->tdp_stats.writeAc.access * ffreeL->power.writeOp.dynamic; 2607 ffreeL->rt_power.reset(); 2608 ffreeL->rt_power.readOp.dynamic += 2609 ffreeL->rtp_stats.readAc.access * ffreeL->power.readOp.dynamic + 2610 ffreeL->rtp_stats.writeAc.access * ffreeL->power.writeOp.dynamic; 2611 ffreeL->power_t.readOp.leakage = 2612 ffreeL->power.readOp.leakage * core_params.num_hthreads; 2613 ffreeL->power_t.readOp.gate_leakage = 2614 ffreeL->power.readOp.gate_leakage * core_params.num_hthreads; 2615 } 2616 2617 if (iFRAT) { 2618 tdp_stats.reset(); 2619 if (core_params.rm_ty == RAMbased) { 2620 iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_rd_ports; 2621 iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports; 2622 iFRAT->tdp_stats.searchAc.access = iFRAT->l_ip.num_search_ports; 2623 } else if ((core_params.rm_ty == CAMbased)) { 2624 iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_search_ports; 2625 iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports; 2626 } 2627 rtp_stats.reset(); 2628 iFRAT->rtp_stats.readAc.access = core_stats.rename_reads; 2629 iFRAT->rtp_stats.writeAc.access = core_stats.rename_writes; 2630 if (core_params.scheu_ty == ReservationStation && 2631 core_params.rm_ty == RAMbased) { 2632 iFRAT->rtp_stats.searchAc.access = 2633 core_stats.committed_int_instructions; 2634 } 2635 iFRAT->power_t.reset(); 2636 iFRAT->power_t.readOp.dynamic += iFRAT->tdp_stats.readAc.access 2637 * (iFRAT->local_result.power.readOp.dynamic 2638 + idcl->power.readOp.dynamic) 2639 + iFRAT->tdp_stats.writeAc.access 2640 * iFRAT->local_result.power.writeOp.dynamic 2641 + iFRAT->tdp_stats.searchAc.access 2642 * iFRAT->local_result.power.searchOp.dynamic; 2643 iFRAT->power_t.readOp.leakage = 2644 iFRAT->power.readOp.leakage * core_params.num_hthreads; 2645 iFRAT->power_t.readOp.gate_leakage = 2646 iFRAT->power.readOp.gate_leakage * core_params.num_hthreads; 2647 iFRAT->rt_power.reset(); 2648 iFRAT->rt_power.readOp.dynamic += iFRAT->rtp_stats.readAc.access 2649 * (iFRAT->local_result.power.readOp.dynamic 2650 + idcl->power.readOp.dynamic) 2651 + iFRAT->rtp_stats.writeAc.access 2652 * iFRAT->local_result.power.writeOp.dynamic 2653 + iFRAT->rtp_stats.searchAc.access 2654 * iFRAT->local_result.power.searchOp.dynamic; 2655 } 2656 2657 if (fFRAT) { 2658 tdp_stats.reset(); 2659 fFRAT->tdp_stats.writeAc.access = fFRAT->l_ip.num_wr_ports; 2660 if ((core_params.rm_ty == CAMbased)) { 2661 fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_search_ports; 2662 } else if (core_params.rm_ty == RAMbased) { 2663 fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_rd_ports; 2664 if (core_params.scheu_ty == ReservationStation) { 2665 fFRAT->tdp_stats.searchAc.access = fFRAT->l_ip.num_search_ports; 2666 } 2667 } 2668 rtp_stats.reset(); 2669 fFRAT->rtp_stats.readAc.access = core_stats.fp_rename_reads; 2670 fFRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes; 2671 if (core_params.scheu_ty == ReservationStation && 2672 core_params.rm_ty == RAMbased) { 2673 fFRAT->rtp_stats.searchAc.access = 2674 core_stats.committed_fp_instructions; 2675 } 2676 fFRAT->power_t.reset(); 2677 fFRAT->power_t.readOp.dynamic += fFRAT->tdp_stats.readAc.access 2678 * (fFRAT->local_result.power.readOp.dynamic 2679 + fdcl->power.readOp.dynamic) 2680 + fFRAT->tdp_stats.writeAc.access 2681 * fFRAT->local_result.power.writeOp.dynamic 2682 + fFRAT->tdp_stats.searchAc.access 2683 * fFRAT->local_result.power.searchOp.dynamic; 2684 fFRAT->power_t.readOp.leakage = 2685 fFRAT->power.readOp.leakage * core_params.num_hthreads; 2686 fFRAT->power_t.readOp.gate_leakage = 2687 fFRAT->power.readOp.gate_leakage * core_params.num_hthreads; 2688 fFRAT->rt_power.reset(); 2689 fFRAT->rt_power.readOp.dynamic += fFRAT->rtp_stats.readAc.access 2690 * (fFRAT->local_result.power.readOp.dynamic 2691 + fdcl->power.readOp.dynamic) 2692 + fFRAT->rtp_stats.writeAc.access 2693 * fFRAT->local_result.power.writeOp.dynamic 2694 + fFRAT->rtp_stats.searchAc.access 2695 * fFRAT->local_result.power.searchOp.dynamic; 2696 } 2697 2698 output_data.reset(); 2699 if (iFRAT) { 2700 iFRAT->output_data.peak_dynamic_power = 2701 iFRAT->power_t.readOp.dynamic * clockRate; 2702 iFRAT->output_data.subthreshold_leakage_power = 2703 iFRAT->power_t.readOp.leakage; 2704 iFRAT->output_data.gate_leakage_power = 2705 iFRAT->power_t.readOp.gate_leakage; 2706 iFRAT->output_data.runtime_dynamic_energy = 2707 iFRAT->rt_power.readOp.dynamic; 2708 output_data += iFRAT->output_data; 2709 } 2710 if (fFRAT) { 2711 fFRAT->output_data.peak_dynamic_power = 2712 fFRAT->power_t.readOp.dynamic * clockRate; 2713 fFRAT->output_data.subthreshold_leakage_power = 2714 fFRAT->power_t.readOp.leakage; 2715 fFRAT->output_data.gate_leakage_power = 2716 fFRAT->power_t.readOp.gate_leakage; 2717 fFRAT->output_data.runtime_dynamic_energy = 2718 fFRAT->rt_power.readOp.dynamic; 2719 output_data += fFRAT->output_data; 2720 } 2721 if (iRRAT) { 2722 iRRAT->output_data.peak_dynamic_power = 2723 iRRAT->power_t.readOp.dynamic * clockRate; 2724 iRRAT->output_data.subthreshold_leakage_power = 2725 iRRAT->power_t.readOp.leakage; 2726 iRRAT->output_data.gate_leakage_power = 2727 iRRAT->power_t.readOp.gate_leakage; 2728 iRRAT->output_data.runtime_dynamic_energy = 2729 iRRAT->rt_power.readOp.dynamic; 2730 output_data += iRRAT->output_data; 2731 } 2732 if (fRRAT) { 2733 fRRAT->output_data.peak_dynamic_power = 2734 fRRAT->power_t.readOp.dynamic * clockRate; 2735 fRRAT->output_data.subthreshold_leakage_power = 2736 fRRAT->power_t.readOp.leakage; 2737 fRRAT->output_data.gate_leakage_power = 2738 fRRAT->power_t.readOp.gate_leakage; 2739 fRRAT->output_data.runtime_dynamic_energy = 2740 fRRAT->rt_power.readOp.dynamic; 2741 output_data += fRRAT->output_data; 2742 } 2743 if (ifreeL) { 2744 ifreeL->output_data.peak_dynamic_power = 2745 ifreeL->power_t.readOp.dynamic * clockRate; 2746 ifreeL->output_data.subthreshold_leakage_power = 2747 ifreeL->power_t.readOp.leakage; 2748 ifreeL->output_data.gate_leakage_power = 2749 ifreeL->power_t.readOp.gate_leakage; 2750 ifreeL->output_data.runtime_dynamic_energy = 2751 ifreeL->rt_power.readOp.dynamic; 2752 output_data += ifreeL->output_data; 2753 } 2754 if (ffreeL) { 2755 ffreeL->output_data.peak_dynamic_power = 2756 ffreeL->power_t.readOp.dynamic * clockRate; 2757 ffreeL->output_data.subthreshold_leakage_power = 2758 ffreeL->power_t.readOp.leakage; 2759 ffreeL->output_data.gate_leakage_power = 2760 ffreeL->power_t.readOp.gate_leakage; 2761 ffreeL->output_data.runtime_dynamic_energy = 2762 ffreeL->rt_power.readOp.dynamic; 2763 output_data += ffreeL->output_data; 2764 } 2765 if (idcl) { 2766 idcl->output_data.peak_dynamic_power = 2767 idcl->power_t.readOp.dynamic * clockRate; 2768 idcl->output_data.subthreshold_leakage_power = 2769 idcl->power_t.readOp.leakage; 2770 idcl->output_data.gate_leakage_power = 2771 idcl->power_t.readOp.gate_leakage; 2772 idcl->output_data.runtime_dynamic_energy = 2773 idcl->rt_power.readOp.dynamic; 2774 output_data += idcl->output_data; 2775 } 2776 if (fdcl) { 2777 fdcl->output_data.peak_dynamic_power = 2778 fdcl->power_t.readOp.dynamic * clockRate; 2779 fdcl->output_data.subthreshold_leakage_power = 2780 fdcl->power_t.readOp.leakage; 2781 fdcl->output_data.gate_leakage_power = 2782 fdcl->power_t.readOp.gate_leakage; 2783 fdcl->output_data.runtime_dynamic_energy = 2784 fdcl->rt_power.readOp.dynamic; 2785 output_data += fdcl->output_data; 2786 } 2787 if (RAHT) { 2788 output_data += RAHT->output_data; 2789 } 2790} 2791 2792void RENAMINGU::displayData(uint32_t indent, int plevel) { 2793 if (!exist) return; 2794 2795 McPATComponent::displayData(indent, plevel); 2796 2797 if (core_params.core_ty == OOO) { 2798 iFRAT->displayData(indent + 4, plevel); 2799 fFRAT->displayData(indent + 4, plevel); 2800 ifreeL->displayData(indent + 4, plevel); 2801 2802 if (core_params.scheu_ty == PhysicalRegFile) { 2803 iRRAT->displayData(indent + 4, plevel); 2804 fRRAT->displayData(indent + 4, plevel); 2805 ffreeL->displayData(indent + 4, plevel); 2806 } 2807 } 2808 idcl->displayData(indent + 4, plevel); 2809 fdcl->displayData(indent + 4, plevel); 2810} 2811 2812void SchedulerU::computeEnergy() { 2813 if (!exist) return; 2814 2815 double ROB_duty_cycle; 2816 ROB_duty_cycle = 1; 2817 2818 if (int_instruction_selection) { 2819 int_instruction_selection->computeEnergy(); 2820 } 2821 2822 if (fp_instruction_selection) { 2823 fp_instruction_selection->computeEnergy(); 2824 } 2825 2826 if (int_inst_window) { 2827 int_inst_window->tdp_stats.reset(); 2828 int_inst_window->rtp_stats.reset(); 2829 int_inst_window->power_t.reset(); 2830 int_inst_window->rt_power.reset(); 2831 if (core_params.core_ty == OOO) { 2832 int_inst_window->tdp_stats.readAc.access = 2833 core_params.issueW * core_params.num_pipelines; 2834 int_inst_window->tdp_stats.writeAc.access = 2835 core_params.issueW * core_params.num_pipelines; 2836 int_inst_window->tdp_stats.searchAc.access = 2837 core_params.issueW * core_params.num_pipelines; 2838 2839 int_inst_window->power_t.readOp.dynamic += 2840 int_inst_window->local_result.power.readOp.dynamic * 2841 int_inst_window->tdp_stats.readAc.access + 2842 int_inst_window->local_result.power.searchOp.dynamic * 2843 int_inst_window->tdp_stats.searchAc.access + 2844 int_inst_window->local_result.power.writeOp.dynamic * 2845 int_inst_window->tdp_stats.writeAc.access; 2846 2847 int_inst_window->rtp_stats.readAc.access = 2848 core_stats.inst_window_reads; 2849 int_inst_window->rtp_stats.writeAc.access = 2850 core_stats.inst_window_writes; 2851 int_inst_window->rtp_stats.searchAc.access = 2852 core_stats.inst_window_wakeup_accesses; 2853 2854 int_inst_window->rt_power.readOp.dynamic += 2855 int_inst_window->local_result.power.readOp.dynamic * 2856 int_inst_window->rtp_stats.readAc.access + 2857 int_inst_window->local_result.power.searchOp.dynamic * 2858 int_inst_window->rtp_stats.searchAc.access + 2859 int_inst_window->local_result.power.writeOp.dynamic * 2860 int_inst_window->rtp_stats.writeAc.access; 2861 } else if (core_params.multithreaded) { 2862 int_inst_window->tdp_stats.readAc.access = 2863 core_params.issueW * core_params.num_pipelines; 2864 int_inst_window->tdp_stats.writeAc.access = 2865 core_params.issueW * core_params.num_pipelines; 2866 int_inst_window->tdp_stats.searchAc.access = 2867 core_params.issueW * core_params.num_pipelines; 2868 2869 int_inst_window->power_t.readOp.dynamic += 2870 int_inst_window->local_result.power.readOp.dynamic * 2871 int_inst_window->tdp_stats.readAc.access + 2872 int_inst_window->local_result.power.searchOp.dynamic * 2873 int_inst_window->tdp_stats.searchAc.access + 2874 int_inst_window->local_result.power.writeOp.dynamic * 2875 int_inst_window->tdp_stats.writeAc.access; 2876 2877 int_inst_window->rtp_stats.readAc.access = 2878 core_stats.int_instructions + core_stats.fp_instructions; 2879 int_inst_window->rtp_stats.writeAc.access = 2880 core_stats.int_instructions + core_stats.fp_instructions; 2881 int_inst_window->rtp_stats.searchAc.access = 2882 2 * (core_stats.int_instructions + core_stats.fp_instructions); 2883 2884 int_inst_window->rt_power.readOp.dynamic += 2885 int_inst_window->local_result.power.readOp.dynamic * 2886 int_inst_window->rtp_stats.readAc.access + 2887 int_inst_window->local_result.power.searchOp.dynamic * 2888 int_inst_window->rtp_stats.searchAc.access + 2889 int_inst_window->local_result.power.writeOp.dynamic * 2890 int_inst_window->rtp_stats.writeAc.access; 2891 } 2892 } 2893 2894 if (fp_inst_window) { 2895 fp_inst_window->tdp_stats.reset(); 2896 fp_inst_window->tdp_stats.readAc.access = 2897 fp_inst_window->l_ip.num_rd_ports * core_params.num_fp_pipelines; 2898 fp_inst_window->tdp_stats.writeAc.access = 2899 fp_inst_window->l_ip.num_wr_ports * core_params.num_fp_pipelines; 2900 fp_inst_window->tdp_stats.searchAc.access = 2901 fp_inst_window->l_ip.num_search_ports * 2902 core_params.num_fp_pipelines; 2903 2904 fp_inst_window->rtp_stats.reset(); 2905 fp_inst_window->rtp_stats.readAc.access = 2906 core_stats.fp_inst_window_reads; 2907 fp_inst_window->rtp_stats.writeAc.access = 2908 core_stats.fp_inst_window_writes; 2909 fp_inst_window->rtp_stats.searchAc.access = 2910 core_stats.fp_inst_window_wakeup_accesses; 2911 2912 fp_inst_window->power_t.reset(); 2913 fp_inst_window->power_t.readOp.dynamic += 2914 fp_inst_window->power.readOp.dynamic * 2915 fp_inst_window->tdp_stats.readAc.access + 2916 fp_inst_window->power.searchOp.dynamic * 2917 fp_inst_window->tdp_stats.searchAc.access + 2918 fp_inst_window->power.writeOp.dynamic * 2919 fp_inst_window->tdp_stats.writeAc.access; 2920 2921 fp_inst_window->rt_power.reset(); 2922 fp_inst_window->rt_power.readOp.dynamic += 2923 fp_inst_window->power.readOp.dynamic * 2924 fp_inst_window->rtp_stats.readAc.access + 2925 fp_inst_window->power.searchOp.dynamic * 2926 fp_inst_window->rtp_stats.searchAc.access + 2927 fp_inst_window->power.writeOp.dynamic * 2928 fp_inst_window->rtp_stats.writeAc.access; 2929 } 2930 2931 if (ROB) { 2932 ROB->tdp_stats.reset(); 2933 ROB->tdp_stats.readAc.access = core_params.commitW * 2934 core_params.num_pipelines * ROB_duty_cycle; 2935 ROB->tdp_stats.writeAc.access = core_params.issueW * 2936 core_params.num_pipelines * ROB_duty_cycle; 2937 ROB->rtp_stats.reset(); 2938 ROB->rtp_stats.readAc.access = core_stats.ROB_reads; 2939 ROB->rtp_stats.writeAc.access = core_stats.ROB_writes; 2940 ROB->power_t.reset(); 2941 ROB->power_t.readOp.dynamic += 2942 ROB->local_result.power.readOp.dynamic * 2943 ROB->tdp_stats.readAc.access + 2944 ROB->local_result.power.writeOp.dynamic * 2945 ROB->tdp_stats.writeAc.access; 2946 ROB->rt_power.reset(); 2947 ROB->rt_power.readOp.dynamic += 2948 ROB->local_result.power.readOp.dynamic * 2949 ROB->rtp_stats.readAc.access + 2950 ROB->local_result.power.writeOp.dynamic * 2951 ROB->rtp_stats.writeAc.access; 2952 } 2953 2954 output_data.reset(); 2955 if (int_inst_window) { 2956 int_inst_window->output_data.subthreshold_leakage_power = 2957 int_inst_window->power_t.readOp.leakage; 2958 int_inst_window->output_data.gate_leakage_power = 2959 int_inst_window->power_t.readOp.gate_leakage; 2960 int_inst_window->output_data.peak_dynamic_power = 2961 int_inst_window->power_t.readOp.dynamic * clockRate; 2962 int_inst_window->output_data.runtime_dynamic_energy = 2963 int_inst_window->rt_power.readOp.dynamic; 2964 output_data += int_inst_window->output_data; 2965 } 2966 if (fp_inst_window) { 2967 fp_inst_window->output_data.subthreshold_leakage_power = 2968 fp_inst_window->power_t.readOp.leakage; 2969 fp_inst_window->output_data.gate_leakage_power = 2970 fp_inst_window->power_t.readOp.gate_leakage; 2971 fp_inst_window->output_data.peak_dynamic_power = 2972 fp_inst_window->power_t.readOp.dynamic * clockRate; 2973 fp_inst_window->output_data.runtime_dynamic_energy = 2974 fp_inst_window->rt_power.readOp.dynamic; 2975 output_data += fp_inst_window->output_data; 2976 } 2977 if (ROB) { 2978 ROB->output_data.peak_dynamic_power = 2979 ROB->power_t.readOp.dynamic * clockRate; 2980 ROB->output_data.runtime_dynamic_energy = 2981 ROB->rt_power.readOp.dynamic; 2982 output_data += ROB->output_data; 2983 } 2984 2985 // Integer and FP instruction selection logic is not included in the 2986 // roll-up due to the uninitialized area 2987 /* 2988 if (int_instruction_selection) { 2989 output_data += int_instruction_selection->output_data; 2990 } 2991 if (fp_instruction_selection) { 2992 output_data += fp_instruction_selection->output_data; 2993 } 2994 */ 2995} 2996 2997void SchedulerU::displayData(uint32_t indent, int plevel) { 2998 if (!exist) return; 2999 3000 McPATComponent::displayData(indent, plevel); 3001 3002 if (core_params.core_ty == OOO) { 3003 int_inst_window->displayData(indent + 4, plevel); 3004 fp_inst_window->displayData(indent + 4, plevel); 3005 if (core_params.ROB_size > 0) { 3006 ROB->displayData(indent + 4, plevel); 3007 } 3008 } else if (core_params.multithreaded) { 3009 int_inst_window->displayData(indent + 4, plevel); 3010 } 3011 3012 // Integer and FP instruction selection logic is not included in the 3013 // roll-up due to the uninitialized area 3014 /* 3015 if (int_instruction_selection) { 3016 int_instruction_selection->displayData(indent + 4, plevel); 3017 } 3018 if (fp_instruction_selection) { 3019 fp_instruction_selection->displayData(indent + 4, plevel); 3020 } 3021 */ 3022} 3023 3024void LoadStoreU::computeEnergy() { 3025 if (!exist) return; 3026 3027 LSQ->tdp_stats.reset(); 3028 LSQ->tdp_stats.readAc.access = LSQ->l_ip.num_search_ports * 3029 core_stats.LSU_duty_cycle; 3030 LSQ->tdp_stats.writeAc.access = LSQ->l_ip.num_search_ports * 3031 core_stats.LSU_duty_cycle; 3032 LSQ->rtp_stats.reset(); 3033 // Flush overhead conidered 3034 LSQ->rtp_stats.readAc.access = (core_stats.load_instructions + 3035 core_stats.store_instructions) * 2; 3036 LSQ->rtp_stats.writeAc.access = (core_stats.load_instructions + 3037 core_stats.store_instructions) * 2; 3038 LSQ->power_t.reset(); 3039 //every memory access invloves at least two operations on LSQ 3040 LSQ->power_t.readOp.dynamic += LSQ->tdp_stats.readAc.access * 3041 (LSQ->local_result.power.searchOp.dynamic + 3042 LSQ->local_result.power.readOp.dynamic) + 3043 LSQ->tdp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic; 3044 LSQ->rt_power.reset(); 3045 //every memory access invloves at least two operations on LSQ 3046 LSQ->rt_power.readOp.dynamic += LSQ->rtp_stats.readAc.access * 3047 (LSQ->local_result.power.searchOp.dynamic + 3048 LSQ->local_result.power.readOp.dynamic) + 3049 LSQ->rtp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic; 3050 3051 if (LoadQ) { 3052 LoadQ->tdp_stats.reset(); 3053 LoadQ->tdp_stats.readAc.access = LoadQ->l_ip.num_search_ports * 3054 core_stats.LSU_duty_cycle; 3055 LoadQ->tdp_stats.writeAc.access = LoadQ->l_ip.num_search_ports * 3056 core_stats.LSU_duty_cycle; 3057 LoadQ->rtp_stats.reset(); 3058 LoadQ->rtp_stats.readAc.access = core_stats.load_instructions + 3059 core_stats.store_instructions; 3060 LoadQ->rtp_stats.writeAc.access = core_stats.load_instructions + 3061 core_stats.store_instructions; 3062 LoadQ->power_t.reset(); 3063 //every memory access invloves at least two operations on LoadQ 3064 LoadQ->power_t.readOp.dynamic += 3065 LoadQ->tdp_stats.readAc.access * 3066 (LoadQ->local_result.power.searchOp.dynamic + 3067 LoadQ->local_result.power.readOp.dynamic) + 3068 LoadQ->tdp_stats.writeAc.access * 3069 LoadQ->local_result.power.writeOp.dynamic; 3070 LoadQ->rt_power.reset(); 3071 //every memory access invloves at least two operations on LoadQ 3072 LoadQ->rt_power.readOp.dynamic += LoadQ->rtp_stats.readAc.access * 3073 (LoadQ->local_result.power.searchOp.dynamic + 3074 LoadQ->local_result.power.readOp.dynamic) + 3075 LoadQ->rtp_stats.writeAc.access * 3076 LoadQ->local_result.power.writeOp.dynamic; 3077 } 3078 3079 McPATComponent::computeEnergy(); 3080 3081 output_data.reset(); 3082 if (dcache) { 3083 output_data += dcache->output_data; 3084 } 3085 if (LSQ) { 3086 LSQ->output_data.peak_dynamic_power = 3087 LSQ->power_t.readOp.dynamic * clockRate; 3088 LSQ->output_data.runtime_dynamic_energy = LSQ->rt_power.readOp.dynamic; 3089 output_data += LSQ->output_data; 3090 } 3091 if (LoadQ) { 3092 LoadQ->output_data.peak_dynamic_power = 3093 LoadQ->power_t.readOp.dynamic * clockRate; 3094 LoadQ->output_data.runtime_dynamic_energy = 3095 LoadQ->rt_power.readOp.dynamic; 3096 output_data += LoadQ->output_data; 3097 } 3098} 3099 3100void LoadStoreU::displayData(uint32_t indent, int plevel) { 3101 if (!exist) return; 3102 3103 McPATComponent::displayData(indent, plevel); 3104 3105 if (LoadQ) { 3106 LoadQ->displayData(indent + 4, plevel); 3107 } 3108 LSQ->displayData(indent + 4, plevel); 3109 3110} 3111 3112void MemManU::computeEnergy() { 3113 if (!exist) return; 3114 3115 itlb->tdp_stats.reset(); 3116 itlb->tdp_stats.readAc.access = itlb->l_ip.num_search_ports; 3117 itlb->tdp_stats.readAc.miss = 0; 3118 itlb->tdp_stats.readAc.hit = itlb->tdp_stats.readAc.access - 3119 itlb->tdp_stats.readAc.miss; 3120 itlb->rtp_stats.reset(); 3121 itlb->rtp_stats.readAc.access = mem_man_stats.itlb_total_accesses; 3122 itlb->rtp_stats.writeAc.access = mem_man_stats.itlb_total_misses; 3123 3124 itlb->power_t.reset(); 3125 //FA spent most power in tag, so use total access not hits 3126 itlb->power_t.readOp.dynamic += itlb->tdp_stats.readAc.access * 3127 itlb->local_result.power.searchOp.dynamic + 3128 itlb->tdp_stats.readAc.miss * 3129 itlb->local_result.power.writeOp.dynamic; 3130 itlb->rt_power.reset(); 3131 //FA spent most power in tag, so use total access not hits 3132 itlb->rt_power.readOp.dynamic += itlb->rtp_stats.readAc.access * 3133 itlb->local_result.power.searchOp.dynamic + 3134 itlb->rtp_stats.writeAc.access * 3135 itlb->local_result.power.writeOp.dynamic; 3136 3137 dtlb->tdp_stats.reset(); 3138 dtlb->tdp_stats.readAc.access = dtlb->l_ip.num_search_ports * 3139 core_stats.LSU_duty_cycle; 3140 dtlb->tdp_stats.readAc.miss = 0; 3141 dtlb->tdp_stats.readAc.hit = dtlb->tdp_stats.readAc.access - 3142 dtlb->tdp_stats.readAc.miss; 3143 dtlb->rtp_stats.reset(); 3144 dtlb->rtp_stats.readAc.access = mem_man_stats.dtlb_read_accesses + 3145 mem_man_stats.dtlb_write_misses; 3146 dtlb->rtp_stats.writeAc.access = mem_man_stats.dtlb_write_accesses + 3147 mem_man_stats.dtlb_read_misses; 3148 3149 dtlb->power_t.reset(); 3150 //FA spent most power in tag, so use total access not hits 3151 dtlb->power_t.readOp.dynamic += dtlb->tdp_stats.readAc.access * 3152 dtlb->local_result.power.searchOp.dynamic + 3153 dtlb->tdp_stats.readAc.miss * 3154 dtlb->local_result.power.writeOp.dynamic; 3155 dtlb->rt_power.reset(); 3156 //FA spent most power in tag, so use total access not hits 3157 dtlb->rt_power.readOp.dynamic += dtlb->rtp_stats.readAc.access * 3158 dtlb->local_result.power.searchOp.dynamic + 3159 dtlb->rtp_stats.writeAc.access * 3160 dtlb->local_result.power.writeOp.dynamic; 3161 3162 output_data.reset(); 3163 if (itlb) { 3164 itlb->output_data.peak_dynamic_power = itlb->power_t.readOp.dynamic * 3165 clockRate; 3166 itlb->output_data.runtime_dynamic_energy = 3167 itlb->rt_power.readOp.dynamic; 3168 output_data += itlb->output_data; 3169 } 3170 if (dtlb) { 3171 dtlb->output_data.peak_dynamic_power = 3172 dtlb->power_t.readOp.dynamic * clockRate; 3173 dtlb->output_data.runtime_dynamic_energy = 3174 dtlb->rt_power.readOp.dynamic; 3175 output_data += dtlb->output_data; 3176 } 3177} 3178 3179void MemManU::displayData(uint32_t indent, int plevel) { 3180 if (!exist) return; 3181 3182 McPATComponent::displayData(indent, plevel); 3183 3184 itlb->displayData(indent + 4, plevel); 3185 dtlb->displayData(indent + 4, plevel); 3186} 3187 3188void RegFU::computeEnergy() { 3189 /* 3190 * Architecture RF and physical RF cannot be present at the same time. 3191 * Therefore, the RF stats can only refer to either ARF or PRF; 3192 * And the same stats can be used for both. 3193 */ 3194 if (!exist) return; 3195 3196 IRF->tdp_stats.reset(); 3197 IRF->tdp_stats.readAc.access = 3198 core_params.issueW * NUM_INT_INST_SOURCE_OPERANDS * 3199 (core_stats.ALU_duty_cycle * 1.1 + 3200 (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) * 3201 core_params.num_pipelines; 3202 IRF->tdp_stats.writeAc.access = 3203 core_params.issueW * 3204 (core_stats.ALU_duty_cycle * 1.1 + 3205 (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) * 3206 core_params.num_pipelines; 3207 IRF->rtp_stats.reset(); 3208 IRF->rtp_stats.readAc.access = core_stats.int_regfile_reads; 3209 IRF->rtp_stats.writeAc.access = core_stats.int_regfile_writes; 3210 if (core_params.regWindowing) { 3211 IRF->rtp_stats.readAc.access += core_stats.function_calls * 3212 RFWIN_ACCESS_MULTIPLIER; 3213 IRF->rtp_stats.writeAc.access += core_stats.function_calls * 3214 RFWIN_ACCESS_MULTIPLIER; 3215 } 3216 IRF->power_t.reset(); 3217 IRF->power_t.readOp.dynamic += IRF->tdp_stats.readAc.access * 3218 IRF->local_result.power.readOp.dynamic + 3219 IRF->tdp_stats.writeAc.access * 3220 IRF->local_result.power.writeOp.dynamic; 3221 IRF->rt_power.reset(); 3222 IRF->rt_power.readOp.dynamic += 3223 IRF->rtp_stats.readAc.access * IRF->local_result.power.readOp.dynamic + 3224 IRF->rtp_stats.writeAc.access * IRF->local_result.power.writeOp.dynamic; 3225 3226 FRF->tdp_stats.reset(); 3227 FRF->tdp_stats.readAc.access = 3228 FRF->l_ip.num_rd_ports * core_stats.FPU_duty_cycle * 1.05 * 3229 core_params.num_fp_pipelines; 3230 FRF->tdp_stats.writeAc.access = 3231 FRF->l_ip.num_wr_ports * core_stats.FPU_duty_cycle * 1.05 * 3232 core_params.num_fp_pipelines; 3233 FRF->rtp_stats.reset(); 3234 FRF->rtp_stats.readAc.access = core_stats.float_regfile_reads; 3235 FRF->rtp_stats.writeAc.access = core_stats.float_regfile_writes; 3236 if (core_params.regWindowing) { 3237 FRF->rtp_stats.readAc.access += core_stats.function_calls * 3238 RFWIN_ACCESS_MULTIPLIER; 3239 FRF->rtp_stats.writeAc.access += core_stats.function_calls * 3240 RFWIN_ACCESS_MULTIPLIER; 3241 } 3242 FRF->power_t.reset(); 3243 FRF->power_t.readOp.dynamic += 3244 FRF->tdp_stats.readAc.access * FRF->local_result.power.readOp.dynamic + 3245 FRF->tdp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic; 3246 FRF->rt_power.reset(); 3247 FRF->rt_power.readOp.dynamic += 3248 FRF->rtp_stats.readAc.access * FRF->local_result.power.readOp.dynamic + 3249 FRF->rtp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic; 3250 3251 if (core_params.regWindowing) { 3252 RFWIN->tdp_stats.reset(); 3253 RFWIN->tdp_stats.readAc.access = 0; 3254 RFWIN->tdp_stats.writeAc.access = 0; 3255 RFWIN->rtp_stats.reset(); 3256 RFWIN->rtp_stats.readAc.access = 3257 core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER; 3258 RFWIN->rtp_stats.writeAc.access = 3259 core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER; 3260 RFWIN->power_t.reset(); 3261 RFWIN->power_t.readOp.dynamic += 3262 RFWIN->tdp_stats.readAc.access * 3263 RFWIN->local_result.power.readOp.dynamic + 3264 RFWIN->tdp_stats.writeAc.access * 3265 RFWIN->local_result.power.writeOp.dynamic; 3266 RFWIN->rt_power.reset(); 3267 RFWIN->rt_power.readOp.dynamic += 3268 RFWIN->rtp_stats.readAc.access * 3269 RFWIN->local_result.power.readOp.dynamic + 3270 RFWIN->rtp_stats.writeAc.access * 3271 RFWIN->local_result.power.writeOp.dynamic; 3272 } 3273 3274 output_data.reset(); 3275 if (IRF) { 3276 IRF->output_data.peak_dynamic_power = 3277 IRF->power_t.readOp.dynamic * clockRate; 3278 IRF->output_data.subthreshold_leakage_power *= 3279 core_params.num_hthreads; 3280 IRF->output_data.gate_leakage_power *= core_params.num_hthreads; 3281 IRF->output_data.runtime_dynamic_energy = IRF->rt_power.readOp.dynamic; 3282 output_data += IRF->output_data; 3283 } 3284 if (FRF) { 3285 FRF->output_data.peak_dynamic_power = 3286 FRF->power_t.readOp.dynamic * clockRate; 3287 FRF->output_data.subthreshold_leakage_power *= 3288 core_params.num_hthreads; 3289 FRF->output_data.gate_leakage_power *= core_params.num_hthreads; 3290 FRF->output_data.runtime_dynamic_energy = FRF->rt_power.readOp.dynamic; 3291 output_data += FRF->output_data; 3292 } 3293 if (RFWIN) { 3294 RFWIN->output_data.peak_dynamic_power = 3295 RFWIN->power_t.readOp.dynamic * clockRate; 3296 RFWIN->output_data.runtime_dynamic_energy = 3297 RFWIN->rt_power.readOp.dynamic; 3298 output_data += RFWIN->output_data; 3299 } 3300} 3301 3302void RegFU::displayData(uint32_t indent, int plevel) { 3303 if (!exist) return; 3304 3305 McPATComponent::displayData(indent, plevel); 3306 3307 IRF->displayData(indent + 4, plevel); 3308 FRF->displayData(indent + 4, plevel); 3309 if (core_params.regWindowing) { 3310 RFWIN->displayData(indent + 4, plevel); 3311 } 3312} 3313 3314void EXECU::computeEnergy() { 3315 if (!exist) return; 3316 3317 int_bypass->set_params_stats(core_params.execu_int_bypass_ports, 3318 core_stats.ALU_cdb_duty_cycle, 3319 core_stats.cdb_alu_accesses); 3320 3321 intTagBypass->set_params_stats(core_params.execu_int_bypass_ports, 3322 core_stats.ALU_cdb_duty_cycle, 3323 core_stats.cdb_alu_accesses); 3324 3325 if (core_params.num_muls > 0) { 3326 int_mul_bypass->set_params_stats(core_params.execu_mul_bypass_ports, 3327 core_stats.MUL_cdb_duty_cycle, 3328 core_stats.cdb_mul_accesses); 3329 3330 intTag_mul_Bypass->set_params_stats(core_params.execu_mul_bypass_ports, 3331 core_stats.MUL_cdb_duty_cycle, 3332 core_stats.cdb_mul_accesses); 3333 } 3334 3335 if (core_params.num_fpus > 0) { 3336 fp_bypass->set_params_stats(core_params.execu_fp_bypass_ports, 3337 core_stats.FPU_cdb_duty_cycle, 3338 core_stats.cdb_fpu_accesses); 3339 3340 fpTagBypass->set_params_stats(core_params.execu_fp_bypass_ports, 3341 core_stats.FPU_cdb_duty_cycle, 3342 core_stats.cdb_fpu_accesses); 3343 } 3344 3345 McPATComponent::computeEnergy(); 3346 3347 if (rfu) { 3348 rfu->computeEnergy(); 3349 output_data += rfu->output_data; 3350 } 3351 if (scheu) { 3352 scheu->computeEnergy(); 3353 output_data += scheu->output_data; 3354 } 3355 if (fp_u) { 3356 fp_u->computeEnergy(); 3357 output_data += fp_u->output_data; 3358 } 3359 if (exeu) { 3360 exeu->computeEnergy(); 3361 output_data += exeu->output_data; 3362 } 3363 if (mul) { 3364 mul->computeEnergy(); 3365 output_data += mul->output_data; 3366 } 3367} 3368 3369void EXECU::displayData(uint32_t indent, int plevel) { 3370 if (!exist) return; 3371 3372 McPATComponent::displayData(indent, plevel); 3373 3374 rfu->displayData(indent + 4, plevel); 3375 if (scheu) { 3376 scheu->displayData(indent + 4, plevel); 3377 } 3378 exeu->displayData(indent + 4, plevel); 3379 if (core_params.num_fpus > 0) { 3380 fp_u->displayData(indent + 4, plevel); 3381 } 3382 if (core_params.num_muls > 0) { 3383 mul->displayData(indent + 4, plevel); 3384 } 3385} 3386 3387void Core::computeEnergy() { 3388 ifu->computeEnergy(); 3389 lsu->computeEnergy(); 3390 mmu->computeEnergy(); 3391 exu->computeEnergy(); 3392 if (core_params.core_ty == OOO) { 3393 rnu->computeEnergy(); 3394 } 3395 3396 output_data.reset(); 3397 if (ifu) { 3398 output_data += ifu->output_data; 3399 } 3400 if (lsu) { 3401 output_data += lsu->output_data; 3402 } 3403 if (mmu) { 3404 output_data += mmu->output_data; 3405 } 3406 if (exu) { 3407 output_data += exu->output_data; 3408 } 3409 if (rnu) { 3410 output_data += rnu->output_data; 3411 } 3412 if (corepipe) { 3413 output_data += corepipe->output_data; 3414 } 3415 if (undiffCore) { 3416 output_data += undiffCore->output_data; 3417 } 3418 if (l2cache) { 3419 output_data += l2cache->output_data; 3420 } 3421} 3422 3423InstFetchU ::~InstFetchU() { 3424 3425 if (!exist) return; 3426 if (IB) { 3427 delete IB; 3428 IB = NULL; 3429 } 3430 if (ID_inst) { 3431 delete ID_inst; 3432 ID_inst = NULL; 3433 } 3434 if (ID_operand) { 3435 delete ID_operand; 3436 ID_operand = NULL; 3437 } 3438 if (ID_misc) { 3439 delete ID_misc; 3440 ID_misc = NULL; 3441 } 3442 if (core_params.predictionW > 0) { 3443 if (BTB) { 3444 delete BTB; 3445 BTB = NULL; 3446 } 3447 if (BPT) { 3448 delete BPT; 3449 BPT = NULL; 3450 } 3451 } 3452 if (icache) { 3453 delete icache; 3454 } 3455} 3456 3457BranchPredictor ::~BranchPredictor() { 3458 3459 if (!exist) return; 3460 if (globalBPT) { 3461 delete globalBPT; 3462 globalBPT = NULL; 3463 } 3464 if (localBPT) { 3465 delete localBPT; 3466 localBPT = NULL; 3467 } 3468 if (L1_localBPT) { 3469 delete L1_localBPT; 3470 L1_localBPT = NULL; 3471 } 3472 if (L2_localBPT) { 3473 delete L2_localBPT; 3474 L2_localBPT = NULL; 3475 } 3476 if (chooser) { 3477 delete chooser; 3478 chooser = NULL; 3479 } 3480 if (RAS) { 3481 delete RAS; 3482 RAS = NULL; 3483 } 3484} 3485 3486RENAMINGU ::~RENAMINGU() { 3487 3488 if (!exist) return; 3489 if (iFRAT) { 3490 delete iFRAT; 3491 iFRAT = NULL; 3492 } 3493 if (fFRAT) { 3494 delete fFRAT; 3495 fFRAT = NULL; 3496 } 3497 if (iRRAT) { 3498 delete iRRAT; 3499 iRRAT = NULL; 3500 } 3501 if (iFRAT) { 3502 delete iFRAT; 3503 iFRAT = NULL; 3504 } 3505 if (ifreeL) { 3506 delete ifreeL; 3507 ifreeL = NULL; 3508 } 3509 if (ffreeL) { 3510 delete ffreeL; 3511 ffreeL = NULL; 3512 } 3513 if (idcl) { 3514 delete idcl; 3515 idcl = NULL; 3516 } 3517 if (fdcl) { 3518 delete fdcl; 3519 fdcl = NULL; 3520 } 3521 if (RAHT) { 3522 delete RAHT; 3523 RAHT = NULL; 3524 } 3525} 3526 3527LoadStoreU ::~LoadStoreU() { 3528 3529 if (!exist) return; 3530 if (LSQ) { 3531 delete LSQ; 3532 LSQ = NULL; 3533 } 3534 if (dcache) { 3535 delete dcache; 3536 dcache = NULL; 3537 } 3538} 3539 3540MemManU ::~MemManU() { 3541 3542 if (!exist) return; 3543 if (itlb) { 3544 delete itlb; 3545 itlb = NULL; 3546 } 3547 if (dtlb) { 3548 delete dtlb; 3549 dtlb = NULL; 3550 } 3551} 3552 3553RegFU ::~RegFU() { 3554 3555 if (!exist) return; 3556 if (IRF) { 3557 delete IRF; 3558 IRF = NULL; 3559 } 3560 if (FRF) { 3561 delete FRF; 3562 FRF = NULL; 3563 } 3564 if (RFWIN) { 3565 delete RFWIN; 3566 RFWIN = NULL; 3567 } 3568} 3569 3570SchedulerU ::~SchedulerU() { 3571 3572 if (!exist) return; 3573 if (int_inst_window) { 3574 delete int_inst_window; 3575 int_inst_window = NULL; 3576 } 3577 if (fp_inst_window) { 3578 delete int_inst_window; 3579 int_inst_window = NULL; 3580 } 3581 if (ROB) { 3582 delete ROB; 3583 ROB = NULL; 3584 } 3585 if (int_instruction_selection) { 3586 delete int_instruction_selection; 3587 int_instruction_selection = NULL; 3588 } 3589 if (fp_instruction_selection) { 3590 delete fp_instruction_selection; 3591 fp_instruction_selection = NULL; 3592 } 3593} 3594 3595EXECU ::~EXECU() { 3596 3597 if (!exist) return; 3598 if (int_bypass) { 3599 delete int_bypass; 3600 int_bypass = NULL; 3601 } 3602 if (intTagBypass) { 3603 delete intTagBypass; 3604 intTagBypass = NULL; 3605 } 3606 if (int_mul_bypass) { 3607 delete int_mul_bypass; 3608 int_mul_bypass = NULL; 3609 } 3610 if (intTag_mul_Bypass) { 3611 delete intTag_mul_Bypass; 3612 intTag_mul_Bypass = NULL; 3613 } 3614 if (fp_bypass) { 3615 delete fp_bypass; 3616 fp_bypass = NULL; 3617 } 3618 if (fpTagBypass) { 3619 delete fpTagBypass; 3620 fpTagBypass = NULL; 3621 } 3622 if (fp_u) { 3623 delete fp_u; 3624 fp_u = NULL; 3625 } 3626 if (exeu) { 3627 delete exeu; 3628 exeu = NULL; 3629 } 3630 if (mul) { 3631 delete mul; 3632 mul = NULL; 3633 } 3634 if (rfu) { 3635 delete rfu; 3636 rfu = NULL; 3637 } 3638 if (scheu) { 3639 delete scheu; 3640 scheu = NULL; 3641 } 3642} 3643 3644Core::~Core() { 3645 3646 if (ifu) { 3647 delete ifu; 3648 ifu = NULL; 3649 } 3650 if (lsu) { 3651 delete lsu; 3652 lsu = NULL; 3653 } 3654 if (rnu) { 3655 delete rnu; 3656 rnu = NULL; 3657 } 3658 if (mmu) { 3659 delete mmu; 3660 mmu = NULL; 3661 } 3662 if (exu) { 3663 delete exu; 3664 exu = NULL; 3665 } 3666 if (corepipe) { 3667 delete corepipe; 3668 corepipe = NULL; 3669 } 3670 if (undiffCore) { 3671 delete undiffCore; 3672 undiffCore = NULL; 3673 } 3674 if (l2cache) { 3675 delete l2cache; 3676 l2cache = NULL; 3677 } 3678} 3679 3680void Core::initialize_params() { 3681 memset(&core_params, 0, sizeof(CoreParameters)); 3682 core_params.peak_issueW = -1; 3683 core_params.peak_commitW = -1; 3684} 3685 3686void Core::initialize_stats() { 3687 memset(&core_stats, 0, sizeof(CoreStatistics)); 3688 core_stats.IFU_duty_cycle = 1.0; 3689 core_stats.ALU_duty_cycle = 1.0; 3690 core_stats.FPU_duty_cycle = 1.0; 3691 core_stats.MUL_duty_cycle = 1.0; 3692 core_stats.ALU_cdb_duty_cycle = 1.0; 3693 core_stats.FPU_cdb_duty_cycle = 1.0; 3694 core_stats.MUL_cdb_duty_cycle = 1.0; 3695 core_stats.pipeline_duty_cycle = 1.0; 3696 core_stats.IFU_duty_cycle = 1.0; 3697 core_stats.LSU_duty_cycle = 1.0; 3698 core_stats.MemManU_D_duty_cycle = 1.0; 3699 core_stats.MemManU_I_duty_cycle = 1.0; 3700} 3701 3702void Core::set_core_param() { 3703 initialize_params(); 3704 initialize_stats(); 3705 3706 int num_children = xml_data->nChildNode("param"); 3707 int i; 3708 for (i = 0; i < num_children; i++) { 3709 XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); 3710 XMLCSTR node_name = paramNode->getAttribute("name"); 3711 XMLCSTR value = paramNode->getAttribute("value"); 3712 3713 if (!node_name) 3714 warnMissingParamName(paramNode->getAttribute("id")); 3715 3716 ASSIGN_STR_IF("name", name); 3717 ASSIGN_INT_IF("opt_local", core_params.opt_local); 3718 ASSIGN_FP_IF("clock_rate", core_params.clockRate); 3719 ASSIGN_INT_IF("instruction_length", core_params.instruction_length); 3720 ASSIGN_INT_IF("opcode_width", core_params.opcode_width); 3721 ASSIGN_INT_IF("x86", core_params.x86); 3722 ASSIGN_INT_IF("Embedded", core_params.Embedded); 3723 ASSIGN_ENUM_IF("machine_type", core_params.core_ty, Core_type); 3724 ASSIGN_INT_IF("micro_opcode_width", core_params.micro_opcode_length); 3725 ASSIGN_INT_IF("number_hardware_threads", core_params.num_hthreads); 3726 ASSIGN_INT_IF("fetch_width", core_params.fetchW); 3727 ASSIGN_INT_IF("decode_width", core_params.decodeW); 3728 ASSIGN_INT_IF("issue_width", core_params.issueW); 3729 ASSIGN_INT_IF("peak_issue_width", core_params.peak_issueW); 3730 ASSIGN_INT_IF("commit_width", core_params.commitW); 3731 ASSIGN_INT_IF("prediction_width", core_params.predictionW); 3732 ASSIGN_INT_IF("ALU_per_core", core_params.num_alus); 3733 ASSIGN_INT_IF("FPU_per_core", core_params.num_fpus); 3734 ASSIGN_INT_IF("MUL_per_core", core_params.num_muls); 3735 ASSIGN_INT_IF("fp_issue_width", core_params.fp_issueW); 3736 ASSIGN_ENUM_IF("instruction_window_scheme", core_params.scheu_ty, 3737 Scheduler_type); 3738 ASSIGN_ENUM_IF("rename_scheme", core_params.rm_ty, Renaming_type); 3739 ASSIGN_INT_IF("archi_Regs_IRF_size", core_params.archi_Regs_IRF_size); 3740 ASSIGN_INT_IF("archi_Regs_FRF_size", core_params.archi_Regs_FRF_size); 3741 ASSIGN_INT_IF("ROB_size", core_params.ROB_size); 3742 ASSIGN_INT_IF("ROB_assoc", core_params.ROB_assoc); 3743 ASSIGN_INT_IF("ROB_nbanks", core_params.ROB_nbanks); 3744 ASSIGN_INT_IF("ROB_tag_width", core_params.ROB_tag_width); 3745 ASSIGN_INT_IF("scheduler_assoc", core_params.scheduler_assoc); 3746 ASSIGN_INT_IF("scheduler_nbanks", core_params.scheduler_nbanks); 3747 ASSIGN_INT_IF("register_window_size", 3748 core_params.register_window_size); 3749 ASSIGN_INT_IF("register_window_throughput", 3750 core_params.register_window_throughput); 3751 ASSIGN_INT_IF("register_window_latency", 3752 core_params.register_window_latency); 3753 ASSIGN_INT_IF("register_window_assoc", 3754 core_params.register_window_assoc); 3755 ASSIGN_INT_IF("register_window_nbanks", 3756 core_params.register_window_nbanks); 3757 ASSIGN_INT_IF("register_window_tag_width", 3758 core_params.register_window_tag_width); 3759 ASSIGN_INT_IF("register_window_rw_ports", 3760 core_params.register_window_rw_ports); 3761 ASSIGN_INT_IF("phy_Regs_IRF_size", core_params.phy_Regs_IRF_size); 3762 ASSIGN_INT_IF("phy_Regs_IRF_assoc", core_params.phy_Regs_IRF_assoc); 3763 ASSIGN_INT_IF("phy_Regs_IRF_nbanks", core_params.phy_Regs_IRF_nbanks); 3764 ASSIGN_INT_IF("phy_Regs_IRF_tag_width", 3765 core_params.phy_Regs_IRF_tag_width); 3766 ASSIGN_INT_IF("phy_Regs_IRF_rd_ports", 3767 core_params.phy_Regs_IRF_rd_ports); 3768 ASSIGN_INT_IF("phy_Regs_IRF_wr_ports", 3769 core_params.phy_Regs_IRF_wr_ports); 3770 ASSIGN_INT_IF("phy_Regs_FRF_size", core_params.phy_Regs_FRF_size); 3771 ASSIGN_INT_IF("phy_Regs_FRF_assoc", core_params.phy_Regs_FRF_assoc); 3772 ASSIGN_INT_IF("phy_Regs_FRF_nbanks", core_params.phy_Regs_FRF_nbanks); 3773 ASSIGN_INT_IF("phy_Regs_FRF_tag_width", 3774 core_params.phy_Regs_FRF_tag_width); 3775 ASSIGN_INT_IF("phy_Regs_FRF_rd_ports", 3776 core_params.phy_Regs_FRF_rd_ports); 3777 ASSIGN_INT_IF("phy_Regs_FRF_wr_ports", 3778 core_params.phy_Regs_FRF_wr_ports); 3779 ASSIGN_INT_IF("front_rat_nbanks", core_params.front_rat_nbanks); 3780 ASSIGN_INT_IF("front_rat_rw_ports", core_params.front_rat_rw_ports); 3781 ASSIGN_INT_IF("retire_rat_nbanks", core_params.retire_rat_nbanks); 3782 ASSIGN_INT_IF("retire_rat_rw_ports", core_params.retire_rat_rw_ports); 3783 ASSIGN_INT_IF("freelist_nbanks", core_params.freelist_nbanks); 3784 ASSIGN_INT_IF("freelist_rw_ports", core_params.freelist_rw_ports); 3785 ASSIGN_INT_IF("memory_ports", core_params.memory_ports); 3786 ASSIGN_INT_IF("load_buffer_size", core_params.load_buffer_size); 3787 ASSIGN_INT_IF("load_buffer_assoc", core_params.load_buffer_assoc); 3788 ASSIGN_INT_IF("load_buffer_nbanks", core_params.load_buffer_nbanks); 3789 ASSIGN_INT_IF("store_buffer_size", core_params.store_buffer_size); 3790 ASSIGN_INT_IF("store_buffer_assoc", core_params.store_buffer_assoc); 3791 ASSIGN_INT_IF("store_buffer_nbanks", core_params.store_buffer_nbanks); 3792 ASSIGN_INT_IF("instruction_window_size", 3793 core_params.instruction_window_size); 3794 ASSIGN_INT_IF("fp_instruction_window_size", 3795 core_params.fp_instruction_window_size); 3796 ASSIGN_INT_IF("instruction_buffer_size", 3797 core_params.instruction_buffer_size); 3798 ASSIGN_INT_IF("instruction_buffer_assoc", 3799 core_params.instruction_buffer_assoc); 3800 ASSIGN_INT_IF("instruction_buffer_nbanks", 3801 core_params.instruction_buffer_nbanks); 3802 ASSIGN_INT_IF("instruction_buffer_tag_width", 3803 core_params.instruction_buffer_tag_width); 3804 ASSIGN_INT_IF("number_instruction_fetch_ports", 3805 core_params.number_instruction_fetch_ports); 3806 ASSIGN_INT_IF("RAS_size", core_params.RAS_size); 3807 ASSIGN_ENUM_IF("execu_broadcast_wt", core_params.execu_broadcast_wt, 3808 Wire_type); 3809 ASSIGN_INT_IF("execu_wire_mat_type", core_params.execu_wire_mat_type); 3810 ASSIGN_INT_IF("execu_int_bypass_ports", 3811 core_params.execu_int_bypass_ports); 3812 ASSIGN_INT_IF("execu_mul_bypass_ports", 3813 core_params.execu_mul_bypass_ports); 3814 ASSIGN_INT_IF("execu_fp_bypass_ports", 3815 core_params.execu_fp_bypass_ports); 3816 ASSIGN_ENUM_IF("execu_bypass_wire_type", 3817 core_params.execu_bypass_wire_type, Wire_type); 3818 ASSIGN_FP_IF("execu_bypass_base_width", 3819 core_params.execu_bypass_base_width); 3820 ASSIGN_FP_IF("execu_bypass_base_height", 3821 core_params.execu_bypass_base_height); 3822 ASSIGN_INT_IF("execu_bypass_start_wiring_level", 3823 core_params.execu_bypass_start_wiring_level); 3824 ASSIGN_FP_IF("execu_bypass_route_over_perc", 3825 core_params.execu_bypass_route_over_perc); 3826 ASSIGN_FP_IF("broadcast_numerator", core_params.broadcast_numerator); 3827 ASSIGN_INT_IF("int_pipeline_depth", core_params.pipeline_stages); 3828 ASSIGN_INT_IF("fp_pipeline_depth", core_params.fp_pipeline_stages); 3829 ASSIGN_INT_IF("int_pipelines", core_params.num_pipelines); 3830 ASSIGN_INT_IF("fp_pipelines", core_params.num_fp_pipelines); 3831 ASSIGN_INT_IF("globalCheckpoint", core_params.globalCheckpoint); 3832 ASSIGN_INT_IF("perThreadState", core_params.perThreadState); 3833 ASSIGN_INT_IF("instruction_length", core_params.instruction_length); 3834 3835 else { 3836 warnUnrecognizedParam(node_name); 3837 } 3838 } 3839 3840 // Change from MHz to Hz 3841 core_params.clockRate *= 1e6; 3842 clockRate = core_params.clockRate; 3843 3844 core_params.peak_commitW = core_params.peak_issueW; 3845 core_params.fp_decodeW = core_params.fp_issueW; 3846 3847 3848 num_children = xml_data->nChildNode("stat"); 3849 for (i = 0; i < num_children; i++) { 3850 XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); 3851 XMLCSTR node_name = statNode->getAttribute("name"); 3852 XMLCSTR value = statNode->getAttribute("value"); 3853 3854 if (!node_name) 3855 warnMissingStatName(statNode->getAttribute("id")); 3856 3857 ASSIGN_FP_IF("ALU_duty_cycle", core_stats.ALU_duty_cycle); 3858 ASSIGN_FP_IF("FPU_duty_cycle", core_stats.FPU_duty_cycle); 3859 ASSIGN_FP_IF("MUL_duty_cycle", core_stats.MUL_duty_cycle); 3860 ASSIGN_FP_IF("ALU_cdb_duty_cycle", core_stats.ALU_cdb_duty_cycle); 3861 ASSIGN_FP_IF("FPU_cdb_duty_cycle", core_stats.FPU_cdb_duty_cycle); 3862 ASSIGN_FP_IF("MUL_cdb_duty_cycle", core_stats.MUL_cdb_duty_cycle); 3863 ASSIGN_FP_IF("pipeline_duty_cycle", core_stats.pipeline_duty_cycle); 3864 ASSIGN_FP_IF("total_cycles", core_stats.total_cycles); 3865 ASSIGN_FP_IF("busy_cycles", core_stats.busy_cycles); 3866 ASSIGN_FP_IF("idle_cycles", core_stats.idle_cycles); 3867 ASSIGN_FP_IF("IFU_duty_cycle", core_stats.IFU_duty_cycle); 3868 ASSIGN_FP_IF("BR_duty_cycle", core_stats.BR_duty_cycle); 3869 ASSIGN_FP_IF("LSU_duty_cycle", core_stats.LSU_duty_cycle); 3870 ASSIGN_FP_IF("MemManU_D_duty_cycle", core_stats.MemManU_D_duty_cycle); 3871 ASSIGN_FP_IF("MemManU_I_duty_cycle", core_stats.MemManU_I_duty_cycle); 3872 ASSIGN_FP_IF("cdb_fpu_accesses", core_stats.cdb_fpu_accesses); 3873 ASSIGN_FP_IF("cdb_alu_accesses", core_stats.cdb_alu_accesses); 3874 ASSIGN_FP_IF("cdb_mul_accesses", core_stats.cdb_mul_accesses); 3875 ASSIGN_FP_IF("function_calls", core_stats.function_calls); 3876 ASSIGN_FP_IF("total_instructions", core_stats.total_instructions); 3877 ASSIGN_FP_IF("int_instructions", core_stats.int_instructions); 3878 ASSIGN_FP_IF("fp_instructions", core_stats.fp_instructions); 3879 ASSIGN_FP_IF("branch_instructions", core_stats.branch_instructions); 3880 ASSIGN_FP_IF("branch_mispredictions", 3881 core_stats.branch_mispredictions); 3882 ASSIGN_FP_IF("load_instructions", core_stats.load_instructions); 3883 ASSIGN_FP_IF("store_instructions", core_stats.store_instructions); 3884 ASSIGN_FP_IF("committed_instructions", 3885 core_stats.committed_instructions); 3886 ASSIGN_FP_IF("committed_int_instructions", 3887 core_stats.committed_int_instructions); 3888 ASSIGN_FP_IF("committed_fp_instructions", 3889 core_stats.committed_fp_instructions); 3890 ASSIGN_FP_IF("ROB_reads", core_stats.ROB_reads); 3891 ASSIGN_FP_IF("ROB_writes", core_stats.ROB_writes); 3892 ASSIGN_FP_IF("rename_reads", core_stats.rename_reads); 3893 ASSIGN_FP_IF("rename_writes", core_stats.rename_writes); 3894 ASSIGN_FP_IF("fp_rename_reads", core_stats.fp_rename_reads); 3895 ASSIGN_FP_IF("fp_rename_writes", core_stats.fp_rename_writes); 3896 ASSIGN_FP_IF("inst_window_reads", core_stats.inst_window_reads); 3897 ASSIGN_FP_IF("inst_window_writes", core_stats.inst_window_writes); 3898 ASSIGN_FP_IF("inst_window_wakeup_accesses", 3899 core_stats.inst_window_wakeup_accesses); 3900 ASSIGN_FP_IF("fp_inst_window_reads", core_stats.fp_inst_window_reads); 3901 ASSIGN_FP_IF("fp_inst_window_writes", 3902 core_stats.fp_inst_window_writes); 3903 ASSIGN_FP_IF("fp_inst_window_wakeup_accesses", 3904 core_stats.fp_inst_window_wakeup_accesses); 3905 ASSIGN_FP_IF("int_regfile_reads", core_stats.int_regfile_reads); 3906 ASSIGN_FP_IF("float_regfile_reads", core_stats.float_regfile_reads); 3907 ASSIGN_FP_IF("int_regfile_writes", core_stats.int_regfile_writes); 3908 ASSIGN_FP_IF("float_regfile_writes", core_stats.float_regfile_writes); 3909 ASSIGN_FP_IF("context_switches", core_stats.context_switches); 3910 ASSIGN_FP_IF("ialu_accesses", core_stats.ialu_accesses); 3911 ASSIGN_FP_IF("fpu_accesses", core_stats.fpu_accesses); 3912 ASSIGN_FP_IF("mul_accesses", core_stats.mul_accesses); 3913 3914 else { 3915 warnUnrecognizedStat(node_name); 3916 } 3917 } 3918 3919 // Initialize a few variables 3920 core_params.multithreaded = core_params.num_hthreads > 1 ? true : false; 3921 core_params.pc_width = virtual_address_width; 3922 core_params.v_address_width = virtual_address_width; 3923 core_params.p_address_width = physical_address_width; 3924 core_params.int_data_width = int(ceil(data_path_width / 32.0)) * 32; 3925 core_params.fp_data_width = core_params.int_data_width; 3926 core_params.arch_ireg_width = 3927 int(ceil(log2(core_params.archi_Regs_IRF_size))); 3928 core_params.arch_freg_width 3929 = int(ceil(log2(core_params.archi_Regs_FRF_size))); 3930 core_params.num_IRF_entry = core_params.archi_Regs_IRF_size; 3931 core_params.num_FRF_entry = core_params.archi_Regs_FRF_size; 3932 3933 if (core_params.instruction_length <= 0) { 3934 errorNonPositiveParam("instruction_length"); 3935 } 3936 3937 if (core_params.num_hthreads <= 0) { 3938 errorNonPositiveParam("number_hardware_threads"); 3939 } 3940 3941 if (core_params.opcode_width <= 0) { 3942 errorNonPositiveParam("opcode_width"); 3943 } 3944 3945 if (core_params.instruction_buffer_size <= 0) { 3946 errorNonPositiveParam("instruction_buffer_size"); 3947 } 3948 3949 if (core_params.number_instruction_fetch_ports <= 0) { 3950 errorNonPositiveParam("number_instruction_fetch_ports"); 3951 } 3952 3953 if (core_params.peak_issueW <= 0) { 3954 errorNonPositiveParam("peak_issue_width"); 3955 } else { 3956 assert(core_params.peak_commitW > 0); 3957 } 3958 3959 if (core_params.core_ty == OOO) { 3960 if (core_params.scheu_ty == PhysicalRegFile) { 3961 core_params.phy_ireg_width = 3962 int(ceil(log2(core_params.phy_Regs_IRF_size))); 3963 core_params.phy_freg_width = 3964 int(ceil(log2(core_params.phy_Regs_FRF_size))); 3965 core_params.num_ifreelist_entries = 3966 core_params.num_IRF_entry = core_params.phy_Regs_IRF_size; 3967 core_params.num_ffreelist_entries = 3968 core_params.num_FRF_entry = core_params.phy_Regs_FRF_size; 3969 } else if (core_params.scheu_ty == ReservationStation) { 3970 core_params.phy_ireg_width = int(ceil(log2(core_params.ROB_size))); 3971 core_params.phy_freg_width = int(ceil(log2(core_params.ROB_size))); 3972 core_params.num_ifreelist_entries = core_params.ROB_size; 3973 core_params.num_ffreelist_entries = core_params.ROB_size; 3974 } 3975 } 3976 3977 core_params.regWindowing = 3978 (core_params.register_window_size > 0 && 3979 core_params.core_ty == Inorder) ? true : false; 3980 3981 if (core_params.regWindowing) { 3982 if (core_params.register_window_throughput <= 0) { 3983 errorNonPositiveParam("register_window_throughput"); 3984 } else if (core_params.register_window_latency <= 0) { 3985 errorNonPositiveParam("register_window_latency"); 3986 } 3987 } 3988 3989 set_pppm(core_params.pppm_lkg_multhread, 0, core_params.num_hthreads, 3990 core_params.num_hthreads, 0); 3991 3992 if (!((core_params.core_ty == OOO) || (core_params.core_ty == Inorder))) { 3993 cout << "Invalid Core Type" << endl; 3994 exit(0); 3995 } 3996 3997 if (!((core_params.scheu_ty == PhysicalRegFile) || 3998 (core_params.scheu_ty == ReservationStation))) { 3999 cout << "Invalid OOO Scheduler Type" << endl; 4000 exit(0); 4001 } 4002 4003 if (!((core_params.rm_ty == RAMbased) || 4004 (core_params.rm_ty == CAMbased))) { 4005 cout << "Invalid OOO Renaming Type" << endl; 4006 exit(0); 4007 } 4008 4009} 4010