core.cc revision 10152:52c552138ba1
1/***************************************************************************** 2 * McPAT 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. 5 * All Rights Reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer; 11 * redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution; 14 * neither the name of the copyright holders nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” 29 * 30 ***************************************************************************/ 31 32#include <algorithm> 33#include <cassert> 34#include <cmath> 35#include <iostream> 36#include <string> 37 38#include "XML_Parse.h" 39#include "basic_circuit.h" 40#include "const.h" 41#include "core.h" 42#include "io.h" 43#include "parameter.h" 44//#include "globalvar.h" 45 46InstFetchU::InstFetchU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) 47:XML(XML_interface), 48 ithCore(ithCore_), 49 interface_ip(*interface_ip_), 50 coredynp(dyn_p_), 51 IB (0), 52 BTB (0), 53 ID_inst (0), 54 ID_operand (0), 55 ID_misc (0), 56 exist(exist_) 57{ 58 if (!exist) return; 59 int idx, tag, data, size, line, assoc, banks; 60 bool debug= false, is_default = true; 61 62 clockRate = coredynp.clockRate; 63 executionTime = coredynp.executionTime; 64 cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7]; 65 //Assuming all L1 caches are virtually idxed physically tagged. 66 //cache 67 68 size = (int)XML->sys.core[ithCore].icache.icache_config[0]; 69 line = (int)XML->sys.core[ithCore].icache.icache_config[1]; 70 assoc = (int)XML->sys.core[ithCore].icache.icache_config[2]; 71 banks = (int)XML->sys.core[ithCore].icache.icache_config[3]; 72 idx = debug?9:int(ceil(log2(size/line/assoc))); 73 tag = debug?51:(int)XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS; 74 interface_ip.specific_tag = 1; 75 interface_ip.tag_w = tag; 76 interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].icache.icache_config[0]; 77 interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].icache.icache_config[1]; 78 interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].icache.icache_config[2]; 79 interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].icache.icache_config[3]; 80 interface_ip.out_w = interface_ip.line_sz*8; 81 interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; 82 interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; 83 interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; 84 interface_ip.is_cache = true; 85 interface_ip.pure_cam = false; 86 interface_ip.pure_ram = false; 87 // interface_ip.obj_func_dyn_energy = 0; 88 // interface_ip.obj_func_dyn_power = 0; 89 // interface_ip.obj_func_leak_power = 0; 90 // interface_ip.obj_func_cycle_t = 1; 91 interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; 92 interface_ip.num_rd_ports = 0; 93 interface_ip.num_wr_ports = 0; 94 interface_ip.num_se_rd_ports = 0; 95 icache.caches = new ArrayST(&interface_ip, "icache", Core_device, coredynp.opt_local, coredynp.core_ty); 96 scktRatio = g_tp.sckt_co_eff; 97 chip_PR_overhead = g_tp.chip_layout_overhead; 98 macro_PR_overhead = g_tp.macro_layout_overhead; 99 icache.area.set_area(icache.area.get_area()+ icache.caches->local_result.area); 100 area.set_area(area.get_area()+ icache.caches->local_result.area); 101 //output_data_csv(icache.caches.local_result); 102 103 104 /* 105 *iCache controllers 106 *miss buffer Each MSHR contains enough state 107 *to handle one or more accesses of any type to a single memory line. 108 *Due to the generality of the MSHR mechanism, 109 *the amount of state involved is non-trivial: 110 *including the address, pointers to the cache entry and destination register, 111 *written data, and various other pieces of state. 112 */ 113 interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; 114 tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; 115 data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + icache.caches->l_ip.line_sz*8; 116 interface_ip.specific_tag = 1; 117 interface_ip.tag_w = tag; 118 interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); 119 interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[0]*interface_ip.line_sz; 120 interface_ip.assoc = 0; 121 interface_ip.nbanks = 1; 122 interface_ip.out_w = interface_ip.line_sz*8; 123 interface_ip.access_mode = 0; 124 interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;//means cycle time 125 interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;//means access time 126 interface_ip.obj_func_dyn_energy = 0; 127 interface_ip.obj_func_dyn_power = 0; 128 interface_ip.obj_func_leak_power = 0; 129 interface_ip.obj_func_cycle_t = 1; 130 interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; 131 interface_ip.num_rd_ports = 0; 132 interface_ip.num_wr_ports = 0; 133 interface_ip.num_se_rd_ports = 0; 134 interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; 135 icache.missb = new ArrayST(&interface_ip, "icacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); 136 icache.area.set_area(icache.area.get_area()+ icache.missb->local_result.area); 137 area.set_area(area.get_area()+ icache.missb->local_result.area); 138 //output_data_csv(icache.missb.local_result); 139 140 //fill buffer 141 tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; 142 data = icache.caches->l_ip.line_sz; 143 interface_ip.specific_tag = 1; 144 interface_ip.tag_w = tag; 145 interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); 146 interface_ip.cache_sz = data*XML->sys.core[ithCore].icache.buffer_sizes[1]; 147 interface_ip.assoc = 0; 148 interface_ip.nbanks = 1; 149 interface_ip.out_w = interface_ip.line_sz*8; 150 interface_ip.access_mode = 0; 151 interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; 152 interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; 153 interface_ip.obj_func_dyn_energy = 0; 154 interface_ip.obj_func_dyn_power = 0; 155 interface_ip.obj_func_leak_power = 0; 156 interface_ip.obj_func_cycle_t = 1; 157 interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; 158 interface_ip.num_rd_ports = 0; 159 interface_ip.num_wr_ports = 0; 160 interface_ip.num_se_rd_ports = 0; 161 interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; 162 icache.ifb = new ArrayST(&interface_ip, "icacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); 163 icache.area.set_area(icache.area.get_area()+ icache.ifb->local_result.area); 164 area.set_area(area.get_area()+ icache.ifb->local_result.area); 165 //output_data_csv(icache.ifb.local_result); 166 167 //prefetch buffer 168 tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. 169 data = icache.caches->l_ip.line_sz;//separate queue to prevent from cache polution. 170 interface_ip.specific_tag = 1; 171 interface_ip.tag_w = tag; 172 interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); 173 interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[2]*interface_ip.line_sz; 174 interface_ip.assoc = 0; 175 interface_ip.nbanks = 1; 176 interface_ip.out_w = interface_ip.line_sz*8; 177 interface_ip.access_mode = 0; 178 interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; 179 interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; 180 interface_ip.obj_func_dyn_energy = 0; 181 interface_ip.obj_func_dyn_power = 0; 182 interface_ip.obj_func_leak_power = 0; 183 interface_ip.obj_func_cycle_t = 1; 184 interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; 185 interface_ip.num_rd_ports = 0; 186 interface_ip.num_wr_ports = 0; 187 interface_ip.num_se_rd_ports = 0; 188 interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; 189 icache.prefetchb = new ArrayST(&interface_ip, "icacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); 190 icache.area.set_area(icache.area.get_area()+ icache.prefetchb->local_result.area); 191 area.set_area(area.get_area()+ icache.prefetchb->local_result.area); 192 //output_data_csv(icache.prefetchb.local_result); 193 194 //Instruction buffer 195 data = XML->sys.core[ithCore].instruction_length*XML->sys.core[ithCore].peak_issue_width;//icache.caches.l_ip.line_sz; //multiple threads timing sharing the instruction buffer. 196 interface_ip.is_cache = false; 197 interface_ip.pure_ram = true; 198 interface_ip.pure_cam = false; 199 interface_ip.line_sz = int(ceil(data/8.0)); 200 interface_ip.cache_sz = XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz>64? 201 XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz:64; 202 interface_ip.assoc = 1; 203 interface_ip.nbanks = 1; 204 interface_ip.out_w = interface_ip.line_sz*8; 205 interface_ip.access_mode = 0; 206 interface_ip.throughput = 1.0/clockRate; 207 interface_ip.latency = 1.0/clockRate; 208 interface_ip.obj_func_dyn_energy = 0; 209 interface_ip.obj_func_dyn_power = 0; 210 interface_ip.obj_func_leak_power = 0; 211 interface_ip.obj_func_cycle_t = 1; 212 //NOTE: Assuming IB is time slice shared among threads, every fetch op will at least fetch "fetch width" instructions. 213 interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;//XML->sys.core[ithCore].fetch_width; 214 interface_ip.num_rd_ports = 0; 215 interface_ip.num_wr_ports = 0; 216 interface_ip.num_se_rd_ports = 0; 217 IB = new ArrayST(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); 218 IB->area.set_area(IB->area.get_area()+ IB->local_result.area); 219 area.set_area(area.get_area()+ IB->local_result.area); 220 //output_data_csv(IB.IB.local_result); 221 222 // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width; 223 // inst_decoder.init_decoder(is_default, &interface_ip); 224 // inst_decoder.full_decoder_power(); 225 226 if (coredynp.predictionW>0) 227 { 228 /* 229 * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged 230 * It is only a cache without all the buffers in the cache controller since it is more like a 231 * look up table than a cache with cache controller. When access miss, no load from other places 232 * such as main memory (not actively fill the misses), it is passively updated under two circumstances: 233 * 1) when BPT@ID stage finds out current is a taken branch while BTB missed 234 * 2) When BPT@ID stage predicts differently than BTB 235 * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid) 236 * 4) when EXEU find out wrong target has been provided from BTB. 237 * 238 */ 239 size = XML->sys.core[ithCore].BTB.BTB_config[0]; 240 line = XML->sys.core[ithCore].BTB.BTB_config[1]; 241 assoc = XML->sys.core[ithCore].BTB.BTB_config[2]; 242 banks = XML->sys.core[ithCore].BTB.BTB_config[3]; 243 idx = debug?9:int(ceil(log2(size/line/assoc))); 244// tag = debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS; 245 tag = debug?51:XML->sys.virtual_address_width + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS; 246 interface_ip.is_cache = true; 247 interface_ip.pure_ram = false; 248 interface_ip.pure_cam = false; 249 interface_ip.specific_tag = 1; 250 interface_ip.tag_w = tag; 251 interface_ip.cache_sz = debug?32768:size; 252 interface_ip.line_sz = debug?64:line; 253 interface_ip.assoc = debug?8:assoc; 254 interface_ip.nbanks = debug?1:banks; 255 interface_ip.out_w = interface_ip.line_sz*8; 256 interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; 257 interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[4]/clockRate; 258 interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[5]/clockRate; 259 interface_ip.obj_func_dyn_energy = 0; 260 interface_ip.obj_func_dyn_power = 0; 261 interface_ip.obj_func_leak_power = 0; 262 interface_ip.obj_func_cycle_t = 1; 263 interface_ip.num_rw_ports = 1; 264 interface_ip.num_rd_ports = coredynp.predictionW; 265 interface_ip.num_wr_ports = coredynp.predictionW; 266 interface_ip.num_se_rd_ports = 0; 267 BTB = new ArrayST(&interface_ip, "Branch Target Buffer", Core_device, coredynp.opt_local, coredynp.core_ty); 268 BTB->area.set_area(BTB->area.get_area()+ BTB->local_result.area); 269 area.set_area(area.get_area()+ BTB->local_result.area); 270 ///cout<<"area="<<area<<endl; 271 272 BPT = new BranchPredictor(XML, ithCore, &interface_ip,coredynp); 273 area.set_area(area.get_area()+ BPT->area.get_area()); 274 } 275 276 ID_inst = new inst_decoder(is_default, &interface_ip, 277 coredynp.opcode_length, 1/*Decoder should not know how many by itself*/, 278 coredynp.x86, 279 Core_device, coredynp.core_ty); 280 281 ID_operand = new inst_decoder(is_default, &interface_ip, 282 coredynp.arch_ireg_width, 1, 283 coredynp.x86, 284 Core_device, coredynp.core_ty); 285 286 ID_misc = new inst_decoder(is_default, &interface_ip, 287 8/* Prefix field etc upto 14B*/, 1, 288 coredynp.x86, 289 Core_device, coredynp.core_ty); 290 //TODO: X86 decoder should decode the inst in cyclic mode under the control of squencer. 291 //So the dynamic power should be multiplied by a few times. 292 area.set_area(area.get_area()+ (ID_inst->area.get_area() 293 +ID_operand->area.get_area() 294 +ID_misc->area.get_area())*coredynp.decodeW); 295 296} 297 298 299BranchPredictor::BranchPredictor(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) 300:XML(XML_interface), 301 ithCore(ithCore_), 302 interface_ip(*interface_ip_), 303 coredynp(dyn_p_), 304 globalBPT(0), 305 localBPT(0), 306 L1_localBPT(0), 307 L2_localBPT(0), 308 chooser(0), 309 RAS(0), 310 exist(exist_) 311{ 312 /* 313 * Branch Predictor, accessed during ID stage. 314 * McPAT's branch predictor model is the tournament branch predictor used in Alpha 21264, 315 * including global predictor, local two level predictor, and Chooser. 316 * The Branch predictor also includes a RAS (return address stack) for function calls 317 * Branch predictors are tagged by thread ID and modeled as 1-way associative $ 318 * However RAS return address stacks are duplicated for each thread. 319 * TODO:Data Width need to be computed more precisely * 320 */ 321 if (!exist) return; 322 int tag, data; 323 324 clockRate = coredynp.clockRate; 325 executionTime = coredynp.executionTime; 326 interface_ip.assoc = 1; 327 interface_ip.pure_cam = false; 328 if (coredynp.multithreaded) 329 { 330 331 tag = int(log2(coredynp.num_hthreads)+ EXTRA_TAG_BITS); 332 interface_ip.specific_tag = 1; 333 interface_ip.tag_w = tag; 334 335 interface_ip.is_cache = true; 336 interface_ip.pure_ram = false; 337 } 338 else 339 { 340 interface_ip.is_cache = false; 341 interface_ip.pure_ram = true; 342 343 } 344 //Global predictor 345 data = int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits/8.0)); 346 interface_ip.line_sz = data; 347 interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.global_predictor_entries; 348 interface_ip.nbanks = 1; 349 interface_ip.out_w = interface_ip.line_sz*8; 350 interface_ip.access_mode = 2; 351 interface_ip.throughput = 1.0/clockRate; 352 interface_ip.latency = 1.0/clockRate; 353 interface_ip.obj_func_dyn_energy = 0; 354 interface_ip.obj_func_dyn_power = 0; 355 interface_ip.obj_func_leak_power = 0; 356 interface_ip.obj_func_cycle_t = 1; 357 interface_ip.num_rw_ports = 0; 358 interface_ip.num_rd_ports = coredynp.predictionW; 359 interface_ip.num_wr_ports = coredynp.predictionW; 360 interface_ip.num_se_rd_ports = 0; 361 globalBPT = new ArrayST(&interface_ip, "Global Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); 362 globalBPT->area.set_area(globalBPT->area.get_area()+ globalBPT->local_result.area); 363 area.set_area(area.get_area()+ globalBPT->local_result.area); 364 365 //Local BPT (Level 1) 366 data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0]/8.0)); 367 interface_ip.line_sz = data; 368 interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries; 369 interface_ip.nbanks = 1; 370 interface_ip.out_w = interface_ip.line_sz*8; 371 interface_ip.access_mode = 2; 372 interface_ip.throughput = 1.0/clockRate; 373 interface_ip.latency = 1.0/clockRate; 374 interface_ip.obj_func_dyn_energy = 0; 375 interface_ip.obj_func_dyn_power = 0; 376 interface_ip.obj_func_leak_power = 0; 377 interface_ip.obj_func_cycle_t = 1; 378 interface_ip.num_rw_ports = 0; 379 interface_ip.num_rd_ports = coredynp.predictionW; 380 interface_ip.num_wr_ports = coredynp.predictionW; 381 interface_ip.num_se_rd_ports = 0; 382 L1_localBPT = new ArrayST(&interface_ip, "L1 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); 383 L1_localBPT->area.set_area(L1_localBPT->area.get_area()+ L1_localBPT->local_result.area); 384 area.set_area(area.get_area()+ L1_localBPT->local_result.area); 385 386 //Local BPT (Level 2) 387 data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1]/8.0)); 388 interface_ip.line_sz = data; 389 interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries; 390 interface_ip.nbanks = 1; 391 interface_ip.out_w = interface_ip.line_sz*8; 392 interface_ip.access_mode = 2; 393 interface_ip.throughput = 1.0/clockRate; 394 interface_ip.latency = 1.0/clockRate; 395 interface_ip.obj_func_dyn_energy = 0; 396 interface_ip.obj_func_dyn_power = 0; 397 interface_ip.obj_func_leak_power = 0; 398 interface_ip.obj_func_cycle_t = 1; 399 interface_ip.num_rw_ports = 0; 400 interface_ip.num_rd_ports = coredynp.predictionW; 401 interface_ip.num_wr_ports = coredynp.predictionW; 402 interface_ip.num_se_rd_ports = 0; 403 L2_localBPT = new ArrayST(&interface_ip, "L2 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); 404 L2_localBPT->area.set_area(L2_localBPT->area.get_area()+ L2_localBPT->local_result.area); 405 area.set_area(area.get_area()+ L2_localBPT->local_result.area); 406 407 //Chooser 408 data = int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits/8.0)); 409 interface_ip.line_sz = data; 410 interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.chooser_predictor_entries; 411 interface_ip.nbanks = 1; 412 interface_ip.out_w = interface_ip.line_sz*8; 413 interface_ip.access_mode = 2; 414 interface_ip.throughput = 1.0/clockRate; 415 interface_ip.latency = 1.0/clockRate; 416 interface_ip.obj_func_dyn_energy = 0; 417 interface_ip.obj_func_dyn_power = 0; 418 interface_ip.obj_func_leak_power = 0; 419 interface_ip.obj_func_cycle_t = 1; 420 interface_ip.num_rw_ports = 0; 421 interface_ip.num_rd_ports = coredynp.predictionW; 422 interface_ip.num_wr_ports = coredynp.predictionW; 423 interface_ip.num_se_rd_ports = 0; 424 chooser = new ArrayST(&interface_ip, "Predictor Chooser", Core_device, coredynp.opt_local, coredynp.core_ty); 425 chooser->area.set_area(chooser->area.get_area()+ chooser->local_result.area); 426 area.set_area(area.get_area()+ chooser->local_result.area); 427 428 //RAS return address stacks are Duplicated for each thread. 429 interface_ip.is_cache = false; 430 interface_ip.pure_ram = true; 431 data = int(ceil(coredynp.pc_width/8.0)); 432 interface_ip.line_sz = data; 433 interface_ip.cache_sz = data*XML->sys.core[ithCore].RAS_size; 434 interface_ip.assoc = 1; 435 interface_ip.nbanks = 1; 436 interface_ip.out_w = interface_ip.line_sz*8; 437 interface_ip.access_mode = 2; 438 interface_ip.throughput = 1.0/clockRate; 439 interface_ip.latency = 1.0/clockRate; 440 interface_ip.obj_func_dyn_energy = 0; 441 interface_ip.obj_func_dyn_power = 0; 442 interface_ip.obj_func_leak_power = 0; 443 interface_ip.obj_func_cycle_t = 1; 444 interface_ip.num_rw_ports = 0; 445 interface_ip.num_rd_ports = coredynp.predictionW; 446 interface_ip.num_wr_ports = coredynp.predictionW; 447 interface_ip.num_se_rd_ports = 0; 448 RAS = new ArrayST(&interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty); 449 RAS->area.set_area(RAS->area.get_area()+ RAS->local_result.area*coredynp.num_hthreads); 450 area.set_area(area.get_area()+ RAS->local_result.area*coredynp.num_hthreads); 451 452} 453 454SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) 455:XML(XML_interface), 456 ithCore(ithCore_), 457 interface_ip(*interface_ip_), 458 coredynp(dyn_p_), 459 int_inst_window(0), 460 fp_inst_window(0), 461 ROB(0), 462 instruction_selection(0), 463 exist(exist_) 464 { 465 if (!exist) return; 466 int tag, data; 467 bool is_default=true; 468 string tmp_name; 469 470 clockRate = coredynp.clockRate; 471 executionTime = coredynp.executionTime; 472 if ((coredynp.core_ty==Inorder && coredynp.multithreaded)) 473 { 474 //Instruction issue queue, in-order multi-issue or multithreaded processor also has this structure. Unified window for Inorder processors 475 tag = int(log2(XML->sys.core[ithCore].number_hardware_threads)*coredynp.perThreadState);//This is the normal thread state bits based on Niagara Design 476 data = XML->sys.core[ithCore].instruction_length; 477 //NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and IA-32 Architectures 478 //Software Developer’s Manual 479 interface_ip.is_cache = true; 480 interface_ip.pure_cam = false; 481 interface_ip.pure_ram = false; 482 interface_ip.line_sz = int(ceil(data/8.0)); 483 interface_ip.specific_tag = 1; 484 interface_ip.tag_w = tag; 485 interface_ip.cache_sz = XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz>64?XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz:64; 486 interface_ip.assoc = 0; 487 interface_ip.nbanks = 1; 488 interface_ip.out_w = interface_ip.line_sz*8; 489 interface_ip.access_mode = 1; 490 interface_ip.throughput = 1.0/clockRate; 491 interface_ip.latency = 1.0/clockRate; 492 interface_ip.obj_func_dyn_energy = 0; 493 interface_ip.obj_func_dyn_power = 0; 494 interface_ip.obj_func_leak_power = 0; 495 interface_ip.obj_func_cycle_t = 1; 496 interface_ip.num_rw_ports = 0; 497 interface_ip.num_rd_ports = coredynp.peak_issueW; 498 interface_ip.num_wr_ports = coredynp.peak_issueW; 499 interface_ip.num_se_rd_ports = 0; 500 interface_ip.num_search_ports = coredynp.peak_issueW; 501 int_inst_window = new ArrayST(&interface_ip, "InstFetchQueue", Core_device, coredynp.opt_local, coredynp.core_ty); 502 int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); 503 area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); 504 //output_data_csv(iRS.RS.local_result); 505 Iw_height =int_inst_window->local_result.cache_ht; 506 507 /* 508 * selection logic 509 * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up 510 * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who 511 * at the issue stage. 512 */ 513 514 instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size, 515 coredynp.peak_issueW*XML->sys.core[ithCore].number_hardware_threads, 516 &interface_ip, Core_device, coredynp.core_ty); 517 } 518 519 if (coredynp.core_ty==OOO) 520 { 521 /* 522 * CAM based instruction window 523 * For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored 524 * For RS based OOO it is the Reservation station, where both tags and values of phy regs are stored 525 * It is written once and read twice(two operands) before an instruction can be issued. 526 * X86 instruction can be very long up to 15B. add instruction length in XML 527 */ 528 if(coredynp.scheu_ty==PhysicalRegFile) 529 { 530 tag = coredynp.phy_ireg_width; 531 // Each time only half of the tag is compared, but two tag should be stored. 532 // This underestimate the search power 533 data = int((ceil((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width))/2.0)/8.0)); 534 //Data width being divided by 2 means only after both operands available the whole data will be read out. 535 //This is modeled using two equivalent readouts with half of the data width 536 tmp_name = "InstIssueQueue"; 537 } 538 else 539 { 540 tag = coredynp.phy_ireg_width; 541 // Each time only half of the tag is compared, but two tag should be stored. 542 // This underestimate the search power 543 data = int(ceil(((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width)+ 544 2*coredynp.int_data_width)/2.0)/8.0)); 545 //Data width being divided by 2 means only after both operands available the whole data will be read out. 546 //This is modeled using two equivalent readouts with half of the data width 547 548 tmp_name = "IntReservationStation"; 549 } 550 interface_ip.is_cache = true; 551 interface_ip.pure_cam = false; 552 interface_ip.pure_ram = false; 553 interface_ip.line_sz = data; 554 interface_ip.cache_sz = data*XML->sys.core[ithCore].instruction_window_size; 555 interface_ip.assoc = 0; 556 interface_ip.nbanks = 1; 557 interface_ip.out_w = interface_ip.line_sz*8; 558 interface_ip.specific_tag = 1; 559 interface_ip.tag_w = tag; 560 interface_ip.access_mode = 0; 561 interface_ip.throughput = 2*1.0/clockRate; 562 interface_ip.latency = 2*1.0/clockRate; 563 interface_ip.obj_func_dyn_energy = 0; 564 interface_ip.obj_func_dyn_power = 0; 565 interface_ip.obj_func_leak_power = 0; 566 interface_ip.obj_func_cycle_t = 1; 567 interface_ip.num_rw_ports = 0; 568 interface_ip.num_rd_ports = coredynp.peak_issueW; 569 interface_ip.num_wr_ports = coredynp.peak_issueW; 570 interface_ip.num_se_rd_ports = 0; 571 interface_ip.num_search_ports = coredynp.peak_issueW; 572 int_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty); 573 int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); 574 area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); 575 Iw_height =int_inst_window->local_result.cache_ht; 576 //FU inst window 577 if(coredynp.scheu_ty==PhysicalRegFile) 578 { 579 tag = 2*coredynp.phy_freg_width;// TODO: each time only half of the tag is compared 580 data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width))/8.0)); 581 tmp_name = "FPIssueQueue"; 582 } 583 else 584 { 585 tag = 2*coredynp.phy_ireg_width; 586 data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width)+ 587 2*coredynp.fp_data_width)/8.0)); 588 tmp_name = "FPReservationStation"; 589 } 590 interface_ip.is_cache = true; 591 interface_ip.pure_cam = false; 592 interface_ip.pure_ram = false; 593 interface_ip.line_sz = data; 594 interface_ip.cache_sz = data*XML->sys.core[ithCore].fp_instruction_window_size; 595 interface_ip.assoc = 0; 596 interface_ip.nbanks = 1; 597 interface_ip.out_w = interface_ip.line_sz*8; 598 interface_ip.specific_tag = 1; 599 interface_ip.tag_w = tag; 600 interface_ip.access_mode = 0; 601 interface_ip.throughput = 1.0/clockRate; 602 interface_ip.latency = 1.0/clockRate; 603 interface_ip.obj_func_dyn_energy = 0; 604 interface_ip.obj_func_dyn_power = 0; 605 interface_ip.obj_func_leak_power = 0; 606 interface_ip.obj_func_cycle_t = 1; 607 interface_ip.num_rw_ports = 0; 608 interface_ip.num_rd_ports = coredynp.fp_issueW; 609 interface_ip.num_wr_ports = coredynp.fp_issueW; 610 interface_ip.num_se_rd_ports = 0; 611 interface_ip.num_search_ports = coredynp.fp_issueW; 612 fp_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty); 613 fp_inst_window->area.set_area(fp_inst_window->area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines); 614 area.set_area(area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines); 615 fp_Iw_height =fp_inst_window->local_result.cache_ht; 616 617 if (XML->sys.core[ithCore].ROB_size >0) 618 { 619 /* 620 * if ROB_size = 0, then the target processor does not support hardware-based 621 * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which 622 * means branch must be resolved before instruction issued into instruction window, since 623 * there is no change to flush miss-predict branch path after instructions are issued in this situation. 624 * 625 * ROB.ROB size = inflight inst. ROB is unified for int and fp inst. 626 * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7. 627 * However, this approach is abandoned due to its high power and poor scalablility. 628 * McPAT uses current implementation of ROB as circular buffer. 629 * ROB is written once when instruction is issued and read once when the instruction is committed. * 630 */ 631 int robExtra = int(ceil(5 + log2(coredynp.num_hthreads))); 632 //5 bits are: busy, Issued, Finished, speculative, valid 633 if(coredynp.scheu_ty==PhysicalRegFile) 634 { 635 //PC is to id the instruction for recover exception. 636 //inst is used to map the renamed dest. registers.so that commit stage can know which reg/RRAT to update 637// data = int(ceil((robExtra+coredynp.pc_width + 638// coredynp.instruction_length + 2*coredynp.phy_ireg_width)/8.0)); 639 data = int(ceil((robExtra+coredynp.pc_width + 640 coredynp.phy_ireg_width)/8.0)); 641 } 642 else 643 { 644 //in RS based OOO, ROB also contains value of destination reg 645// data = int(ceil((robExtra+coredynp.pc_width + 646// coredynp.instruction_length + 2*coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0)); 647 data = int(ceil((robExtra + coredynp.pc_width + 648 coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0)); 649 } 650 interface_ip.is_cache = false; 651 interface_ip.pure_cam = false; 652 interface_ip.pure_ram = true; 653 interface_ip.line_sz = data; 654 interface_ip.cache_sz = data*XML->sys.core[ithCore].ROB_size;//The XML ROB size is for all threads 655 interface_ip.assoc = 1; 656 interface_ip.nbanks = 1; 657 interface_ip.out_w = interface_ip.line_sz*8; 658 interface_ip.access_mode = 1; 659 interface_ip.throughput = 1.0/clockRate; 660 interface_ip.latency = 1.0/clockRate; 661 interface_ip.obj_func_dyn_energy = 0; 662 interface_ip.obj_func_dyn_power = 0; 663 interface_ip.obj_func_leak_power = 0; 664 interface_ip.obj_func_cycle_t = 1; 665 interface_ip.num_rw_ports = 0; 666 interface_ip.num_rd_ports = coredynp.peak_commitW; 667 interface_ip.num_wr_ports = coredynp.peak_issueW; 668 interface_ip.num_se_rd_ports = 0; 669 interface_ip.num_search_ports = 0; 670 ROB = new ArrayST(&interface_ip, "ReorderBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); 671 ROB->area.set_area(ROB->area.get_area()+ ROB->local_result.area*coredynp.num_pipelines); 672 area.set_area(area.get_area()+ ROB->local_result.area*coredynp.num_pipelines); 673 ROB_height =ROB->local_result.cache_ht; 674 } 675 676 instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size, 677 coredynp.peak_issueW, &interface_ip, Core_device, coredynp.core_ty); 678 } 679} 680 681LoadStoreU::LoadStoreU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) 682:XML(XML_interface), 683 ithCore(ithCore_), 684 interface_ip(*interface_ip_), 685 coredynp(dyn_p_), 686 LSQ(0), 687 exist(exist_) 688{ 689 if (!exist) return; 690 int idx, tag, data, size, line, assoc, banks; 691 bool debug= false; 692 int ldst_opcode = XML->sys.core[ithCore].opcode_width;//16; 693 694 clockRate = coredynp.clockRate; 695 executionTime = coredynp.executionTime; 696 cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7]; 697 698 interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; 699 interface_ip.is_cache = true; 700 interface_ip.pure_cam = false; 701 interface_ip.pure_ram = false; 702 //Dcache 703 size = (int)XML->sys.core[ithCore].dcache.dcache_config[0]; 704 line = (int)XML->sys.core[ithCore].dcache.dcache_config[1]; 705 assoc = (int)XML->sys.core[ithCore].dcache.dcache_config[2]; 706 banks = (int)XML->sys.core[ithCore].dcache.dcache_config[3]; 707 idx = debug?9:int(ceil(log2(size/line/assoc))); 708 tag = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS; 709 interface_ip.specific_tag = 1; 710 interface_ip.tag_w = tag; 711 interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].dcache.dcache_config[0]; 712 interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].dcache.dcache_config[1]; 713 interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].dcache.dcache_config[2]; 714 interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].dcache.dcache_config[3]; 715 interface_ip.out_w = interface_ip.line_sz*8; 716 interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; 717 interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; 718 interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; 719 interface_ip.is_cache = true; 720 interface_ip.obj_func_dyn_energy = 0; 721 interface_ip.obj_func_dyn_power = 0; 722 interface_ip.obj_func_leak_power = 0; 723 interface_ip.obj_func_cycle_t = 1; 724 interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least. 725 interface_ip.num_rd_ports = 0; 726 interface_ip.num_wr_ports = 0; 727 interface_ip.num_se_rd_ports = 0; 728 dcache.caches = new ArrayST(&interface_ip, "dcache", Core_device, coredynp.opt_local, coredynp.core_ty); 729 dcache.area.set_area(dcache.area.get_area()+ dcache.caches->local_result.area); 730 area.set_area(area.get_area()+ dcache.caches->local_result.area); 731 //output_data_csv(dcache.caches.local_result); 732 733 //dCache controllers 734 //miss buffer 735 tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; 736 data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + dcache.caches->l_ip.line_sz*8; 737 interface_ip.specific_tag = 1; 738 interface_ip.tag_w = tag; 739 interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); 740 interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[0]*interface_ip.line_sz; 741 interface_ip.assoc = 0; 742 interface_ip.nbanks = 1; 743 interface_ip.out_w = interface_ip.line_sz*8; 744 interface_ip.access_mode = 2; 745 interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; 746 interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; 747 interface_ip.obj_func_dyn_energy = 0; 748 interface_ip.obj_func_dyn_power = 0; 749 interface_ip.obj_func_leak_power = 0; 750 interface_ip.obj_func_cycle_t = 1; 751 interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; 752 interface_ip.num_rd_ports = 0; 753 interface_ip.num_wr_ports = 0; 754 interface_ip.num_se_rd_ports = 0; 755 dcache.missb = new ArrayST(&interface_ip, "dcacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); 756 dcache.area.set_area(dcache.area.get_area()+ dcache.missb->local_result.area); 757 area.set_area(area.get_area()+ dcache.missb->local_result.area); 758 //output_data_csv(dcache.missb.local_result); 759 760 //fill buffer 761 tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; 762 data = dcache.caches->l_ip.line_sz; 763 interface_ip.specific_tag = 1; 764 interface_ip.tag_w = tag; 765 interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); 766 interface_ip.cache_sz = data*XML->sys.core[ithCore].dcache.buffer_sizes[1]; 767 interface_ip.assoc = 0; 768 interface_ip.nbanks = 1; 769 interface_ip.out_w = interface_ip.line_sz*8; 770 interface_ip.access_mode = 2; 771 interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; 772 interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; 773 interface_ip.obj_func_dyn_energy = 0; 774 interface_ip.obj_func_dyn_power = 0; 775 interface_ip.obj_func_leak_power = 0; 776 interface_ip.obj_func_cycle_t = 1; 777 interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; 778 interface_ip.num_rd_ports = 0; 779 interface_ip.num_wr_ports = 0; 780 interface_ip.num_se_rd_ports = 0; 781 dcache.ifb = new ArrayST(&interface_ip, "dcacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); 782 dcache.area.set_area(dcache.area.get_area()+ dcache.ifb->local_result.area); 783 area.set_area(area.get_area()+ dcache.ifb->local_result.area); 784 //output_data_csv(dcache.ifb.local_result); 785 786 //prefetch buffer 787 tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. 788 data = dcache.caches->l_ip.line_sz;//separate queue to prevent from cache polution. 789 interface_ip.specific_tag = 1; 790 interface_ip.tag_w = tag; 791 interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); 792 interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[2]*interface_ip.line_sz; 793 interface_ip.assoc = 0; 794 interface_ip.nbanks = 1; 795 interface_ip.out_w = interface_ip.line_sz*8; 796 interface_ip.access_mode = 2; 797 interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; 798 interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; 799 interface_ip.obj_func_dyn_energy = 0; 800 interface_ip.obj_func_dyn_power = 0; 801 interface_ip.obj_func_leak_power = 0; 802 interface_ip.obj_func_cycle_t = 1; 803 interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; 804 interface_ip.num_rd_ports = 0; 805 interface_ip.num_wr_ports = 0; 806 interface_ip.num_se_rd_ports = 0; 807 dcache.prefetchb = new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); 808 dcache.area.set_area(dcache.area.get_area()+ dcache.prefetchb->local_result.area); 809 area.set_area(area.get_area()+ dcache.prefetchb->local_result.area); 810 //output_data_csv(dcache.prefetchb.local_result); 811 812 //WBB 813 814 if (cache_p==Write_back) 815 { 816 tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; 817 data = dcache.caches->l_ip.line_sz; 818 interface_ip.specific_tag = 1; 819 interface_ip.tag_w = tag; 820 interface_ip.line_sz = data; 821 interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[3]*interface_ip.line_sz; 822 interface_ip.assoc = 0; 823 interface_ip.nbanks = 1; 824 interface_ip.out_w = interface_ip.line_sz*8; 825 interface_ip.access_mode = 2; 826 interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; 827 interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; 828 interface_ip.obj_func_dyn_energy = 0; 829 interface_ip.obj_func_dyn_power = 0; 830 interface_ip.obj_func_leak_power = 0; 831 interface_ip.obj_func_cycle_t = 1; 832 interface_ip.num_rw_ports = XML->sys.core[ithCore].memory_ports; 833 interface_ip.num_rd_ports = 0; 834 interface_ip.num_wr_ports = 0; 835 interface_ip.num_se_rd_ports = 0; 836 dcache.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty); 837 dcache.area.set_area(dcache.area.get_area()+ dcache.wbb->local_result.area); 838 area.set_area(area.get_area()+ dcache.wbb->local_result.area); 839 //output_data_csv(dcache.wbb.local_result); 840 } 841 842 /* 843 * LSU--in-order processors do not have separate load queue: unified lsq 844 * partitioned among threads 845 * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ 846 */ 847 tag = ldst_opcode+XML->sys.virtual_address_width +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + EXTRA_TAG_BITS; 848 data = XML->sys.machine_bits; 849 interface_ip.is_cache = true; 850 interface_ip.line_sz = int(ceil(data/32.0))*4; 851 interface_ip.specific_tag = 1; 852 interface_ip.tag_w = tag; 853 interface_ip.cache_sz = XML->sys.core[ithCore].store_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads; 854 interface_ip.assoc = 0; 855 interface_ip.nbanks = 1; 856 interface_ip.out_w = interface_ip.line_sz*8; 857 interface_ip.access_mode = 1; 858 interface_ip.throughput = 1.0/clockRate; 859 interface_ip.latency = 1.0/clockRate; 860 interface_ip.obj_func_dyn_energy = 0; 861 interface_ip.obj_func_dyn_power = 0; 862 interface_ip.obj_func_leak_power = 0; 863 interface_ip.obj_func_cycle_t = 1; 864 interface_ip.num_rw_ports = 0; 865 interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; 866 interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; 867 interface_ip.num_se_rd_ports = 0; 868 interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports; 869 LSQ = new ArrayST(&interface_ip, "Load(Store)Queue", Core_device, coredynp.opt_local, coredynp.core_ty); 870 LSQ->area.set_area(LSQ->area.get_area()+ LSQ->local_result.area); 871 area.set_area(area.get_area()+ LSQ->local_result.area); 872 area.set_area(area.get_area()*cdb_overhead); 873 //output_data_csv(LSQ.LSQ.local_result); 874 lsq_height=LSQ->local_result.cache_ht*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/ 875 876 if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) 877 { 878 interface_ip.line_sz = int(ceil(data/32.0))*4; 879 interface_ip.specific_tag = 1; 880 interface_ip.tag_w = tag; 881 interface_ip.cache_sz = XML->sys.core[ithCore].load_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads; 882 interface_ip.assoc = 0; 883 interface_ip.nbanks = 1; 884 interface_ip.out_w = interface_ip.line_sz*8; 885 interface_ip.access_mode = 1; 886 interface_ip.throughput = 1.0/clockRate; 887 interface_ip.latency = 1.0/clockRate; 888 interface_ip.obj_func_dyn_energy = 0; 889 interface_ip.obj_func_dyn_power = 0; 890 interface_ip.obj_func_leak_power = 0; 891 interface_ip.obj_func_cycle_t = 1; 892 interface_ip.num_rw_ports = 0; 893 interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; 894 interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; 895 interface_ip.num_se_rd_ports = 0; 896 interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports; 897 LoadQ = new ArrayST(&interface_ip, "LoadQueue", Core_device, coredynp.opt_local, coredynp.core_ty); 898 LoadQ->area.set_area(LoadQ->area.get_area()+ LoadQ->local_result.area); 899 area.set_area(area.get_area()+ LoadQ->local_result.area); 900 area.set_area(area.get_area()*cdb_overhead); 901 //output_data_csv(LoadQ.LoadQ.local_result); 902 lsq_height=(LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht)*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/ 903 } 904 905} 906 907MemManU::MemManU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) 908:XML(XML_interface), 909 ithCore(ithCore_), 910 interface_ip(*interface_ip_), 911 coredynp(dyn_p_), 912 itlb(0), 913 dtlb(0), 914 exist(exist_) 915{ 916 if (!exist) return; 917 int tag, data; 918 bool debug= false; 919 920 clockRate = coredynp.clockRate; 921 executionTime = coredynp.executionTime; 922 interface_ip.is_cache = true; 923 interface_ip.pure_cam = false; 924 interface_ip.pure_ram = false; 925 interface_ip.specific_tag = 1; 926 //Itlb TLBs are partioned among threads according to Nigara and Nehalem 927 tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS; 928 data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))); 929 interface_ip.tag_w = tag; 930 interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); 931 interface_ip.cache_sz = XML->sys.core[ithCore].itlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads; 932 interface_ip.assoc = 0; 933 interface_ip.nbanks = 1; 934 interface_ip.out_w = interface_ip.line_sz*8; 935 interface_ip.access_mode = 0; 936 interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; 937 interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; 938 interface_ip.obj_func_dyn_energy = 0; 939 interface_ip.obj_func_dyn_power = 0; 940 interface_ip.obj_func_leak_power = 0; 941 interface_ip.obj_func_cycle_t = 1; 942 interface_ip.num_rw_ports = 0; 943 interface_ip.num_rd_ports = 0; 944 interface_ip.num_wr_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; 945 interface_ip.num_se_rd_ports = 0; 946 interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; 947 itlb = new ArrayST(&interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty); 948 itlb->area.set_area(itlb->area.get_area()+ itlb->local_result.area); 949 area.set_area(area.get_area()+ itlb->local_result.area); 950 //output_data_csv(itlb.tlb.local_result); 951 952 //dtlb 953 tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS; 954 data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))); 955 interface_ip.specific_tag = 1; 956 interface_ip.tag_w = tag; 957 interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); 958 interface_ip.cache_sz = XML->sys.core[ithCore].dtlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads; 959 interface_ip.assoc = 0; 960 interface_ip.nbanks = 1; 961 interface_ip.out_w = interface_ip.line_sz*8; 962 interface_ip.access_mode = 0; 963 interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; 964 interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; 965 interface_ip.obj_func_dyn_energy = 0; 966 interface_ip.obj_func_dyn_power = 0; 967 interface_ip.obj_func_leak_power = 0; 968 interface_ip.obj_func_cycle_t = 1; 969 interface_ip.num_rw_ports = 0; 970 interface_ip.num_rd_ports = 0; 971 interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; 972 interface_ip.num_se_rd_ports = 0; 973 interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; 974 dtlb = new ArrayST(&interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty); 975 dtlb->area.set_area(dtlb->area.get_area()+ dtlb->local_result.area); 976 area.set_area(area.get_area()+ dtlb->local_result.area); 977 //output_data_csv(dtlb.tlb.local_result); 978 979} 980 981RegFU::RegFU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) 982:XML(XML_interface), 983 ithCore(ithCore_), 984 interface_ip(*interface_ip_), 985 coredynp(dyn_p_), 986 IRF (0), 987 FRF (0), 988 RFWIN (0), 989 exist(exist_) 990 { 991 /* 992 * processors have separate architectural register files for each thread. 993 * therefore, the bypass buses need to travel across all the register files. 994 */ 995 if (!exist) return; 996 int data; 997 998 clockRate = coredynp.clockRate; 999 executionTime = coredynp.executionTime; 1000 //**********************************IRF*************************************** 1001 data = coredynp.int_data_width; 1002 interface_ip.is_cache = false; 1003 interface_ip.pure_cam = false; 1004 interface_ip.pure_ram = true; 1005 interface_ip.line_sz = int(ceil(data/32.0))*4; 1006 interface_ip.cache_sz = coredynp.num_IRF_entry*interface_ip.line_sz; 1007 interface_ip.assoc = 1; 1008 interface_ip.nbanks = 1; 1009 interface_ip.out_w = interface_ip.line_sz*8; 1010 interface_ip.access_mode = 1; 1011 interface_ip.throughput = 1.0/clockRate; 1012 interface_ip.latency = 1.0/clockRate; 1013 interface_ip.obj_func_dyn_energy = 0; 1014 interface_ip.obj_func_dyn_power = 0; 1015 interface_ip.obj_func_leak_power = 0; 1016 interface_ip.obj_func_cycle_t = 1; 1017 interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. 1018 interface_ip.num_rd_ports = 2*coredynp.peak_issueW; 1019 interface_ip.num_wr_ports = coredynp.peak_issueW; 1020 interface_ip.num_se_rd_ports = 0; 1021 IRF = new ArrayST(&interface_ip, "Integer Register File", Core_device, coredynp.opt_local, coredynp.core_ty); 1022 IRF->area.set_area(IRF->area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead); 1023 area.set_area(area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead); 1024 //area.set_area(area.get_area()*cdb_overhead); 1025 //output_data_csv(IRF.RF.local_result); 1026 1027 //**********************************FRF*************************************** 1028 data = coredynp.fp_data_width; 1029 interface_ip.is_cache = false; 1030 interface_ip.pure_cam = false; 1031 interface_ip.pure_ram = true; 1032 interface_ip.line_sz = int(ceil(data/32.0))*4; 1033 interface_ip.cache_sz = coredynp.num_FRF_entry*interface_ip.line_sz; 1034 interface_ip.assoc = 1; 1035 interface_ip.nbanks = 1; 1036 interface_ip.out_w = interface_ip.line_sz*8; 1037 interface_ip.access_mode = 1; 1038 interface_ip.throughput = 1.0/clockRate; 1039 interface_ip.latency = 1.0/clockRate; 1040 interface_ip.obj_func_dyn_energy = 0; 1041 interface_ip.obj_func_dyn_power = 0; 1042 interface_ip.obj_func_leak_power = 0; 1043 interface_ip.obj_func_cycle_t = 1; 1044 interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. 1045 interface_ip.num_rd_ports = 2*XML->sys.core[ithCore].issue_width; 1046 interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width; 1047 interface_ip.num_se_rd_ports = 0; 1048 FRF = new ArrayST(&interface_ip, "Floating point Register File", Core_device, coredynp.opt_local, coredynp.core_ty); 1049 FRF->area.set_area(FRF->area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead); 1050 area.set_area(area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead); 1051 //area.set_area(area.get_area()*cdb_overhead); 1052 //output_data_csv(FRF.RF.local_result); 1053 int_regfile_height= IRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead); 1054 fp_regfile_height = FRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead); 1055 //since a EXU is associated with each pipeline, the cdb should not have longer length. 1056 if (coredynp.regWindowing) 1057 { 1058 //*********************************REG_WIN************************************ 1059 data = coredynp.int_data_width; //ECC, and usually 2 regs are transfered together during window shifting.Niagara Mega cell 1060 interface_ip.is_cache = false; 1061 interface_ip.pure_cam = false; 1062 interface_ip.pure_ram = true; 1063 interface_ip.line_sz = int(ceil(data/8.0)); 1064 interface_ip.cache_sz = XML->sys.core[ithCore].register_windows_size*IRF->l_ip.cache_sz*XML->sys.core[ithCore].number_hardware_threads; 1065 interface_ip.assoc = 1; 1066 interface_ip.nbanks = 1; 1067 interface_ip.out_w = interface_ip.line_sz*8; 1068 interface_ip.access_mode = 1; 1069 interface_ip.throughput = 4.0/clockRate; 1070 interface_ip.latency = 4.0/clockRate; 1071 interface_ip.obj_func_dyn_energy = 0; 1072 interface_ip.obj_func_dyn_power = 0; 1073 interface_ip.obj_func_leak_power = 0; 1074 interface_ip.obj_func_cycle_t = 1; 1075 interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. 1076 interface_ip.num_rd_ports = 0; 1077 interface_ip.num_wr_ports = 0; 1078 interface_ip.num_se_rd_ports = 0; 1079 RFWIN = new ArrayST(&interface_ip, "RegWindow", Core_device, coredynp.opt_local, coredynp.core_ty); 1080 RFWIN->area.set_area(RFWIN->area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines); 1081 area.set_area(area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines); 1082 //output_data_csv(RFWIN.RF.local_result); 1083 } 1084 1085 1086 } 1087 1088EXECU::EXECU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_, const CoreDynParam & dyn_p_, bool exist_) 1089:XML(XML_interface), 1090 ithCore(ithCore_), 1091 interface_ip(*interface_ip_), 1092 lsq_height(lsq_height_), 1093 coredynp(dyn_p_), 1094 rfu(0), 1095 scheu(0), 1096 fp_u(0), 1097 exeu(0), 1098 mul(0), 1099 int_bypass(0), 1100 intTagBypass(0), 1101 int_mul_bypass(0), 1102 intTag_mul_Bypass(0), 1103 fp_bypass(0), 1104 fpTagBypass(0), 1105 exist(exist_) 1106{ 1107 if (!exist) return; 1108 double fu_height = 0.0; 1109 clockRate = coredynp.clockRate; 1110 executionTime = coredynp.executionTime; 1111 rfu = new RegFU(XML, ithCore, &interface_ip,coredynp); 1112 scheu = new SchedulerU(XML, ithCore, &interface_ip,coredynp); 1113 exeu = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, ALU); 1114 area.set_area(area.get_area()+ exeu->area.get_area() + rfu->area.get_area() +scheu->area.get_area() ); 1115 fu_height = exeu->FU_height; 1116 if (coredynp.num_fpus >0) 1117 { 1118 fp_u = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, FPU); 1119 area.set_area(area.get_area()+ fp_u->area.get_area()); 1120 } 1121 if (coredynp.num_muls >0) 1122 { 1123 mul = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, MUL); 1124 area.set_area(area.get_area()+ mul->area.get_area()); 1125 fu_height += mul->FU_height; 1126 } 1127 /* 1128 * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; fp_tag-broadcast 1129 * integer by pass has two paths and fp has 3 paths. 1130 * on the same bus there are multiple tri-state drivers and muxes that go to different components on the same bus 1131 */ 1132 if (XML->sys.Embedded) 1133 { 1134 interface_ip.wt =Global_30; 1135 interface_ip.wire_is_mat_type = 0; 1136 interface_ip.wire_os_mat_type = 0; 1137 interface_ip.throughput = 1.0/clockRate; 1138 interface_ip.latency = 1.0/clockRate; 1139 } 1140 else 1141 { 1142 interface_ip.wt =Global; 1143 interface_ip.wire_is_mat_type = 2;//start from semi-global since local wires are already used 1144 interface_ip.wire_os_mat_type = 2; 1145 interface_ip.throughput = 10.0/clockRate; //Do not care 1146 interface_ip.latency = 10.0/clockRate; 1147 } 1148 1149 if (coredynp.core_ty==Inorder) 1150 { 1151 int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32), 1152 rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3, 1153 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1154 bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area()); 1155 intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, 1156 rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, 1157 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1158 bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area()); 1159 1160 if (coredynp.num_muls>0) 1161 { 1162 int_mul_bypass = new interconnect("Mul Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5), 1163 rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3, 1164 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1165 bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); 1166 intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, 1167 rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, 1168 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1169 bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); 1170 } 1171 1172 if (coredynp.num_fpus>0) 1173 { 1174 fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5), 1175 rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3, 1176 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1177 bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); 1178 fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, 1179 rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, 1180 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1181 bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); 1182 } 1183 } 1184 else 1185 {//OOO 1186 if (coredynp.scheu_ty==PhysicalRegFile) 1187 { 1188 /* For physical register based OOO, 1189 * data broadcast interconnects cover across functional units, lsq, inst windows and register files, 1190 * while tag broadcast interconnects also cover across ROB 1191 */ 1192 int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), 1193 rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3, 1194 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1195 bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area()); 1196 intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, 1197 rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, 1198 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1199 1200 if (coredynp.num_muls>0) 1201 { 1202 int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), 1203 rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3, 1204 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1205 intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, 1206 rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, 1207 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1208 bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); 1209 bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); 1210 } 1211 1212 if (coredynp.num_fpus>0) 1213 { 1214 fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)), 1215 rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3, 1216 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1217 fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width, 1218 rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, 1219 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1220 bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); 1221 bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); 1222 } 1223 } 1224 else 1225 { 1226 /* 1227 * In RS based processor both data and tag are broadcast together, 1228 * covering functional units, lsq, nst windows, register files, and ROBs 1229 */ 1230 int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), 1231 rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, 1232 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1233 intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, 1234 rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, 1235 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1236 bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area()); 1237 bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area()); 1238 if (coredynp.num_muls>0) 1239 { 1240 int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), 1241 rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, 1242 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1243 intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, 1244 rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, 1245 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1246 bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); 1247 bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); 1248 } 1249 1250 if (coredynp.num_fpus>0) 1251 { 1252 fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)), 1253 rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, 1254 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1255 fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width, 1256 rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, 1257 false, 1.0, coredynp.opt_local, coredynp.core_ty); 1258 bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); 1259 bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); 1260 } 1261 } 1262 1263 1264 } 1265 area.set_area(area.get_area()+ bypass.area.get_area()); 1266} 1267 1268RENAMINGU::RENAMINGU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) 1269:XML(XML_interface), 1270 ithCore(ithCore_), 1271 interface_ip(*interface_ip_), 1272 coredynp(dyn_p_), 1273 iFRAT(0), 1274 fFRAT(0), 1275 iRRAT(0), 1276 fRRAT(0), 1277 ifreeL(0), 1278 ffreeL(0), 1279 idcl(0), 1280 fdcl(0), 1281 RAHT(0), 1282 exist(exist_) 1283 { 1284 /* 1285 * Although renaming logic maybe be used in in-order processors, 1286 * McPAT assumes no renaming logic is used since the performance gain is very limited and 1287 * the only major inorder processor with renaming logic is Itainium 1288 * that is a VLIW processor and different from current McPAT's model. 1289 * physical register base OOO must have Dual-RAT architecture or equivalent structure.FRAT:FrontRAT, RRAT:RetireRAT; 1290 * i,f prefix mean int and fp 1291 * RAT for all Renaming logic, random accessible checkpointing is used, but only update when instruction retires. 1292 * FRAT will be read twice and written once per instruction; 1293 * RRAT will be write once per instruction when committing and reads out all when context switch 1294 * checkpointing is implicit 1295 * Renaming logic is duplicated for each different hardware threads 1296 * 1297 * No Dual-RAT is needed in RS-based OOO processors, 1298 * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry, 1299 * to make sure all the renamings associated with the ROB to be released are updated at the same time. 1300 * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag, 1301 * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag, 1302 * 1303 * Both RAM and CAM have same DCL 1304 */ 1305 if (!exist) return; 1306 int tag, data, out_w; 1307// interface_ip.wire_is_mat_type = 0; 1308// interface_ip.wire_os_mat_type = 0; 1309// interface_ip.wt = Global_30; 1310 clockRate = coredynp.clockRate; 1311 executionTime = coredynp.executionTime; 1312 if (coredynp.core_ty==OOO) 1313 { 1314 //integer pipeline 1315 if (coredynp.scheu_ty==PhysicalRegFile) 1316 { 1317 if (coredynp.rm_ty ==RAMbased) 1318 { //FRAT with global checkpointing (GCs) please see paper tech report for detailed explaintions 1319 data = 33;//int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0)); 1320// data = int(ceil(coredynp.phy_ireg_width/8.0)); 1321 out_w = 1;//int(ceil(coredynp.phy_ireg_width/8.0)); 1322 interface_ip.is_cache = false; 1323 interface_ip.pure_cam = false; 1324 interface_ip.pure_ram = true; 1325 interface_ip.line_sz = data; 1326 interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size; 1327 interface_ip.assoc = 1; 1328 interface_ip.nbanks = 1; 1329 interface_ip.out_w = out_w*8; 1330 interface_ip.access_mode = 2; 1331 interface_ip.throughput = 1.0/clockRate; 1332 interface_ip.latency = 1.0/clockRate; 1333 interface_ip.obj_func_dyn_energy = 0; 1334 interface_ip.obj_func_dyn_power = 0; 1335 interface_ip.obj_func_leak_power = 0; 1336 interface_ip.obj_func_cycle_t = 1; 1337 interface_ip.num_rw_ports = 1;//the extra one port is for GCs 1338 interface_ip.num_rd_ports = 2*coredynp.decodeW; 1339 interface_ip.num_wr_ports = coredynp.decodeW; 1340 interface_ip.num_se_rd_ports = 0; 1341 iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); 1342 iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); 1343 area.set_area(area.get_area()+ iFRAT->area.get_area()); 1344 1345// //RAHT According to Intel, combine GC with FRAT is very costly. 1346// data = int(ceil(coredynp.phy_ireg_width/8.0)*coredynp.num_IRF_entry); 1347// out_w = data; 1348// interface_ip.is_cache = false; 1349// interface_ip.pure_cam = false; 1350// interface_ip.pure_ram = true; 1351// interface_ip.line_sz = data; 1352// interface_ip.cache_sz = data*coredynp.globalCheckpoint; 1353// interface_ip.assoc = 1; 1354// interface_ip.nbanks = 1; 1355// interface_ip.out_w = out_w*8; 1356// interface_ip.access_mode = 0; 1357// interface_ip.throughput = 1.0/clockRate; 1358// interface_ip.latency = 1.0/clockRate; 1359// interface_ip.obj_func_dyn_energy = 0; 1360// interface_ip.obj_func_dyn_power = 0; 1361// interface_ip.obj_func_leak_power = 0; 1362// interface_ip.obj_func_cycle_t = 1; 1363// interface_ip.num_rw_ports = 1;//the extra one port is for GCs 1364// interface_ip.num_rd_ports = 2*coredynp.decodeW; 1365// interface_ip.num_wr_ports = coredynp.decodeW; 1366// interface_ip.num_se_rd_ports = 0; 1367// iFRAT = new ArrayST(&interface_ip, "Int FrontRAT"); 1368// iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); 1369// area.set_area(area.get_area()+ iFRAT->area.get_area()); 1370 1371 //FRAT floating point 1372 data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0)); 1373 out_w = int(ceil(coredynp.phy_freg_width/8.0)); 1374 interface_ip.is_cache = false; 1375 interface_ip.pure_cam = false; 1376 interface_ip.pure_ram = true; 1377 interface_ip.line_sz = data; 1378 interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size; 1379 interface_ip.assoc = 1; 1380 interface_ip.nbanks = 1; 1381 interface_ip.out_w = out_w*8; 1382 interface_ip.access_mode = 2; 1383 interface_ip.throughput = 1.0/clockRate; 1384 interface_ip.latency = 1.0/clockRate; 1385 interface_ip.obj_func_dyn_energy = 0; 1386 interface_ip.obj_func_dyn_power = 0; 1387 interface_ip.obj_func_leak_power = 0; 1388 interface_ip.obj_func_cycle_t = 1; 1389 interface_ip.num_rw_ports = 1;//the extra one port is for GCs 1390 interface_ip.num_rd_ports = 2*coredynp.fp_decodeW; 1391 interface_ip.num_wr_ports = coredynp.fp_decodeW; 1392 interface_ip.num_se_rd_ports = 0; 1393 fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); 1394 fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); 1395 area.set_area(area.get_area()+ fFRAT->area.get_area()); 1396 1397 } 1398 else if ((coredynp.rm_ty ==CAMbased)) 1399 { 1400 //FRAT 1401 tag = coredynp.arch_ireg_width; 1402 data = int(ceil ((coredynp.arch_ireg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out 1403 out_w = int(ceil (coredynp.arch_ireg_width/8.0)); 1404 interface_ip.is_cache = true; 1405 interface_ip.pure_cam = false; 1406 interface_ip.pure_ram = false; 1407 interface_ip.line_sz = data; 1408 interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size; 1409 interface_ip.assoc = 0; 1410 interface_ip.nbanks = 1; 1411 interface_ip.out_w = out_w*8; 1412 interface_ip.specific_tag = 1; 1413 interface_ip.tag_w = tag; 1414 interface_ip.access_mode = 2; 1415 interface_ip.throughput = 1.0/clockRate; 1416 interface_ip.latency = 1.0/clockRate; 1417 interface_ip.obj_func_dyn_energy = 0; 1418 interface_ip.obj_func_dyn_power = 0; 1419 interface_ip.obj_func_leak_power = 0; 1420 interface_ip.obj_func_cycle_t = 1; 1421 interface_ip.num_rw_ports = 1;//for GCs 1422 interface_ip.num_rd_ports = coredynp.decodeW; 1423 interface_ip.num_wr_ports = coredynp.decodeW; 1424 interface_ip.num_se_rd_ports = 0; 1425 interface_ip.num_search_ports= 2*coredynp.decodeW; 1426 iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); 1427 iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); 1428 area.set_area(area.get_area()+ iFRAT->area.get_area()); 1429 1430 //FRAT for FP 1431 tag = coredynp.arch_freg_width; 1432 data = int(ceil ((coredynp.arch_freg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out 1433 out_w = int(ceil (coredynp.arch_freg_width/8.0)); 1434 interface_ip.is_cache = true; 1435 interface_ip.pure_cam = false; 1436 interface_ip.pure_ram = false; 1437 interface_ip.line_sz = data; 1438 interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size; 1439 interface_ip.assoc = 0; 1440 interface_ip.nbanks = 1; 1441 interface_ip.out_w = out_w*8; 1442 interface_ip.specific_tag = 1; 1443 interface_ip.tag_w = tag; 1444 interface_ip.access_mode = 2; 1445 interface_ip.throughput = 1.0/clockRate; 1446 interface_ip.latency = 1.0/clockRate; 1447 interface_ip.obj_func_dyn_energy = 0; 1448 interface_ip.obj_func_dyn_power = 0; 1449 interface_ip.obj_func_leak_power = 0; 1450 interface_ip.obj_func_cycle_t = 1; 1451 interface_ip.num_rw_ports = 1;//for GCs 1452 interface_ip.num_rd_ports = coredynp.fp_decodeW; 1453 interface_ip.num_wr_ports = coredynp.fp_decodeW; 1454 interface_ip.num_se_rd_ports = 0; 1455 interface_ip.num_search_ports= 2*coredynp.fp_decodeW; 1456 fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); 1457 fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); 1458 area.set_area(area.get_area()+ fFRAT->area.get_area()); 1459 1460 } 1461 1462 //RRAT is always RAM based, does not have GCs, and is used only for record latest non-speculative mapping 1463 data = int(ceil(coredynp.phy_ireg_width/8.0)); 1464 interface_ip.is_cache = false; 1465 interface_ip.pure_cam = false; 1466 interface_ip.pure_ram = true; 1467 interface_ip.line_sz = data; 1468 interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*2;//HACK to make it as least 64B 1469 interface_ip.assoc = 1; 1470 interface_ip.nbanks = 1; 1471 interface_ip.out_w = interface_ip.line_sz*8; 1472 interface_ip.access_mode = 1; 1473 interface_ip.throughput = 1.0/clockRate; 1474 interface_ip.latency = 1.0/clockRate; 1475 interface_ip.obj_func_dyn_energy = 0; 1476 interface_ip.obj_func_dyn_power = 0; 1477 interface_ip.obj_func_leak_power = 0; 1478 interface_ip.obj_func_cycle_t = 1; 1479 interface_ip.num_rw_ports = 0; 1480 interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; 1481 interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; 1482 interface_ip.num_se_rd_ports = 0; 1483 iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); 1484 iRRAT->area.set_area(iRRAT->area.get_area()+ iRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); 1485 area.set_area(area.get_area()+ iRRAT->area.get_area()); 1486 1487 //RRAT for FP 1488 data = int(ceil(coredynp.phy_freg_width/8.0)); 1489 interface_ip.is_cache = false; 1490 interface_ip.pure_cam = false; 1491 interface_ip.pure_ram = true; 1492 interface_ip.line_sz = data; 1493 interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*2;//HACK to make it as least 64B 1494 interface_ip.assoc = 1; 1495 interface_ip.nbanks = 1; 1496 interface_ip.out_w = interface_ip.line_sz*8; 1497 interface_ip.access_mode = 1; 1498 interface_ip.throughput = 1.0/clockRate; 1499 interface_ip.latency = 1.0/clockRate; 1500 interface_ip.obj_func_dyn_energy = 0; 1501 interface_ip.obj_func_dyn_power = 0; 1502 interface_ip.obj_func_leak_power = 0; 1503 interface_ip.obj_func_cycle_t = 1; 1504 interface_ip.num_rw_ports = 0; 1505 interface_ip.num_rd_ports = coredynp.fp_decodeW; 1506 interface_ip.num_wr_ports = coredynp.fp_decodeW; 1507 interface_ip.num_se_rd_ports = 0; 1508 fRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); 1509 fRRAT->area.set_area(fRRAT->area.get_area()+ fRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); 1510 area.set_area(area.get_area()+ fRRAT->area.get_area()); 1511 1512 //Freelist of renaming unit always RAM based 1513 //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist 1514 // 2)When instruction commits the Phyregisters/ROB needed to be recycled. 1515 //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width 1516 data = int(ceil(coredynp.phy_ireg_width/8.0)); 1517 interface_ip.is_cache = false; 1518 interface_ip.pure_cam = false; 1519 interface_ip.pure_ram = true; 1520 interface_ip.line_sz = data; 1521 interface_ip.cache_sz = data*coredynp.num_ifreelist_entries; 1522 interface_ip.assoc = 1; 1523 interface_ip.nbanks = 1; 1524 interface_ip.out_w = interface_ip.line_sz*8; 1525 interface_ip.access_mode = 1; 1526 interface_ip.throughput = 1.0/clockRate; 1527 interface_ip.latency = 1.0/clockRate; 1528 interface_ip.obj_func_dyn_energy = 0; 1529 interface_ip.obj_func_dyn_power = 0; 1530 interface_ip.obj_func_leak_power = 0; 1531 interface_ip.obj_func_cycle_t = 1; 1532 interface_ip.num_rw_ports = 1;//TODO 1533 interface_ip.num_rd_ports = coredynp.decodeW; 1534 interface_ip.num_wr_ports = coredynp.decodeW -1 + XML->sys.core[ithCore].commit_width; 1535 //every cycle, (coredynp.decodeW -1) inst may need to send back it dest tags, committW insts needs to update freelist buffers 1536 interface_ip.num_se_rd_ports = 0; 1537 ifreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty); 1538 ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); 1539 area.set_area(area.get_area()+ ifreeL->area.get_area()); 1540 1541 //freelist for FP 1542 data = int(ceil(coredynp.phy_freg_width/8.0)); 1543 interface_ip.is_cache = false; 1544 interface_ip.pure_cam = false; 1545 interface_ip.pure_ram = true; 1546 interface_ip.line_sz = data; 1547 interface_ip.cache_sz = data*coredynp.num_ffreelist_entries; 1548 interface_ip.assoc = 1; 1549 interface_ip.nbanks = 1; 1550 interface_ip.out_w = interface_ip.line_sz*8; 1551 interface_ip.access_mode = 1; 1552 interface_ip.throughput = 1.0/clockRate; 1553 interface_ip.latency = 1.0/clockRate; 1554 interface_ip.obj_func_dyn_energy = 0; 1555 interface_ip.obj_func_dyn_power = 0; 1556 interface_ip.obj_func_leak_power = 0; 1557 interface_ip.obj_func_cycle_t = 1; 1558 interface_ip.num_rw_ports = 1; 1559 interface_ip.num_rd_ports = coredynp.fp_decodeW; 1560 interface_ip.num_wr_ports = coredynp.fp_decodeW -1 + XML->sys.core[ithCore].commit_width; 1561 interface_ip.num_se_rd_ports = 0; 1562 ffreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty); 1563 ffreeL->area.set_area(ffreeL->area.get_area()+ ffreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); 1564 area.set_area(area.get_area()+ ffreeL->area.get_area()); 1565 1566 idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR 1567 fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); 1568 1569 } 1570 else if (coredynp.scheu_ty==ReservationStation){ 1571 if (coredynp.rm_ty ==RAMbased){ 1572 /* 1573 * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry, 1574 * to make sure all the renamings associated with the ROB to be released are updated to ARF at the same time. 1575 * RAM based RAT for RS base OOO does not save the search operations. Its advantage is to have less entries than 1576 * CAM based RAT so that it is more scalable as number of ROB/physical regs increases. 1577 */ 1578 tag = coredynp.phy_ireg_width; 1579 data = int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0)); 1580 out_w = int(ceil(coredynp.phy_ireg_width/8.0)); 1581 interface_ip.is_cache = true; 1582 interface_ip.pure_cam = false; 1583 interface_ip.pure_ram = false; 1584 interface_ip.line_sz = data; 1585 interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size; 1586 interface_ip.assoc = 0; 1587 interface_ip.nbanks = 1; 1588 interface_ip.out_w = out_w*8; 1589 interface_ip.access_mode = 2; 1590 interface_ip.throughput = 1.0/clockRate; 1591 interface_ip.latency = 1.0/clockRate; 1592 interface_ip.obj_func_dyn_energy = 0; 1593 interface_ip.obj_func_dyn_power = 0; 1594 interface_ip.obj_func_leak_power = 0; 1595 interface_ip.obj_func_cycle_t = 1; 1596 interface_ip.num_rw_ports = 1;//the extra one port is for GCs 1597 interface_ip.num_rd_ports = 2*coredynp.decodeW; 1598 interface_ip.num_wr_ports = coredynp.decodeW; 1599 interface_ip.num_se_rd_ports = 0; 1600 interface_ip.num_search_ports= coredynp.commitW;//TODO 1601 iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); 1602 iFRAT->local_result.adjust_area(); 1603 iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); 1604 area.set_area(area.get_area()+ iFRAT->area.get_area()); 1605 1606 //FP 1607 tag = coredynp.phy_freg_width; 1608 data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0)); 1609 out_w = int(ceil(coredynp.phy_freg_width/8.0)); 1610 interface_ip.is_cache = true; 1611 interface_ip.pure_cam = false; 1612 interface_ip.pure_ram = false; 1613 interface_ip.line_sz = data; 1614 interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size; 1615 interface_ip.assoc = 0; 1616 interface_ip.nbanks = 1; 1617 interface_ip.out_w = out_w*8; 1618 interface_ip.access_mode = 2; 1619 interface_ip.throughput = 1.0/clockRate; 1620 interface_ip.latency = 1.0/clockRate; 1621 interface_ip.obj_func_dyn_energy = 0; 1622 interface_ip.obj_func_dyn_power = 0; 1623 interface_ip.obj_func_leak_power = 0; 1624 interface_ip.obj_func_cycle_t = 1; 1625 interface_ip.num_rw_ports = 1;//the extra one port is for GCs 1626 interface_ip.num_rd_ports = 2*coredynp.fp_decodeW; 1627 interface_ip.num_wr_ports = coredynp.fp_decodeW; 1628 interface_ip.num_se_rd_ports = 0; 1629 interface_ip.num_search_ports= coredynp.fp_decodeW;//actually is fp commit width 1630 fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); 1631 fFRAT->local_result.adjust_area(); 1632 fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); 1633 area.set_area(area.get_area()+ fFRAT->area.get_area()); 1634 1635 } 1636 else if ((coredynp.rm_ty ==CAMbased)) 1637 { 1638 //FRAT 1639 tag = coredynp.arch_ireg_width; 1640 data = int(ceil (coredynp.arch_ireg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out 1641 out_w = int(ceil (coredynp.arch_ireg_width/8.0)); 1642 interface_ip.is_cache = true; 1643 interface_ip.pure_cam = false; 1644 interface_ip.pure_ram = false; 1645 interface_ip.line_sz = data; 1646 interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size; 1647 interface_ip.assoc = 0; 1648 interface_ip.nbanks = 1; 1649 interface_ip.out_w = out_w*8; 1650 interface_ip.specific_tag = 1; 1651 interface_ip.tag_w = tag; 1652 interface_ip.access_mode = 2; 1653 interface_ip.throughput = 1.0/clockRate; 1654 interface_ip.latency = 1.0/clockRate; 1655 interface_ip.obj_func_dyn_energy = 0; 1656 interface_ip.obj_func_dyn_power = 0; 1657 interface_ip.obj_func_leak_power = 0; 1658 interface_ip.obj_func_cycle_t = 1; 1659 interface_ip.num_rw_ports = 1;//for GCs 1660 interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO 1661 interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width; 1662 interface_ip.num_se_rd_ports = 0; 1663 interface_ip.num_search_ports= 2*XML->sys.core[ithCore].decode_width; 1664 iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); 1665 iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); 1666 area.set_area(area.get_area()+ iFRAT->area.get_area()); 1667 1668 //FRAT 1669 tag = coredynp.arch_freg_width; 1670 data = int(ceil (coredynp.arch_freg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out 1671 out_w = int(ceil (coredynp.arch_freg_width/8.0)); 1672 interface_ip.is_cache = true; 1673 interface_ip.pure_cam = false; 1674 interface_ip.pure_ram = false; 1675 interface_ip.line_sz = data; 1676 interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size; 1677 interface_ip.assoc = 0; 1678 interface_ip.nbanks = 1; 1679 interface_ip.out_w = out_w*8; 1680 interface_ip.specific_tag = 1; 1681 interface_ip.tag_w = tag; 1682 interface_ip.access_mode = 2; 1683 interface_ip.throughput = 1.0/clockRate; 1684 interface_ip.latency = 1.0/clockRate; 1685 interface_ip.obj_func_dyn_energy = 0; 1686 interface_ip.obj_func_dyn_power = 0; 1687 interface_ip.obj_func_leak_power = 0; 1688 interface_ip.obj_func_cycle_t = 1; 1689 interface_ip.num_rw_ports = 1;//for GCs 1690 interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO; 1691 interface_ip.num_wr_ports = coredynp.fp_decodeW; 1692 interface_ip.num_se_rd_ports = 0; 1693 interface_ip.num_search_ports= 2*coredynp.fp_decodeW; 1694 fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); 1695 fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); 1696 area.set_area(area.get_area()+ fFRAT->area.get_area()); 1697 1698 } 1699 //No RRAT for RS based OOO 1700 //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified 1701 data = int(ceil(coredynp.phy_ireg_width/8.0)); 1702 interface_ip.is_cache = false; 1703 interface_ip.pure_cam = false; 1704 interface_ip.pure_ram = true; 1705 interface_ip.line_sz = data; 1706 interface_ip.cache_sz = data*coredynp.num_ifreelist_entries; 1707 interface_ip.assoc = 1; 1708 interface_ip.nbanks = 1; 1709 interface_ip.out_w = interface_ip.line_sz*8; 1710 interface_ip.access_mode = 1; 1711 interface_ip.throughput = 1.0/clockRate; 1712 interface_ip.latency = 1.0/clockRate; 1713 interface_ip.obj_func_dyn_energy = 0; 1714 interface_ip.obj_func_dyn_power = 0; 1715 interface_ip.obj_func_leak_power = 0; 1716 interface_ip.obj_func_cycle_t = 1; 1717 interface_ip.num_rw_ports = 1;//TODO 1718 interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width; 1719 interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width -1 + XML->sys.core[ithCore].commit_width; 1720 interface_ip.num_se_rd_ports = 0; 1721 ifreeL = new ArrayST(&interface_ip, "Unified Free List", Core_device, coredynp.opt_local, coredynp.core_ty); 1722 ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); 1723 area.set_area(area.get_area()+ ifreeL->area.get_area()); 1724 1725 idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR 1726 fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); 1727 } 1728 1729} 1730 if (coredynp.core_ty==Inorder&& coredynp.issueW>1) 1731 { 1732 /* Dependency check logic will only present when decode(issue) width>1. 1733 * Multiple issue in order processor can do without renaming, but dcl is a must. 1734 */ 1735 idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR 1736 fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); 1737 } 1738} 1739 1740Core::Core(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_) 1741:XML(XML_interface), 1742 ithCore(ithCore_), 1743 interface_ip(*interface_ip_), 1744 ifu (0), 1745 lsu (0), 1746 mmu (0), 1747 exu (0), 1748 rnu (0), 1749 corepipe (0), 1750 undiffCore (0), 1751 l2cache (0) 1752{ 1753 /* 1754 * initialize, compute and optimize individual components. 1755 */ 1756 1757 double pipeline_area_per_unit; 1758 if (XML->sys.Private_L2) 1759 { 1760 l2cache = new SharedCache(XML,ithCore, &interface_ip); 1761 1762 } 1763// interface_ip.wire_is_mat_type = 2; 1764// interface_ip.wire_os_mat_type = 2; 1765// interface_ip.wt =Global_30; 1766 set_core_param(); 1767 clockRate = coredynp.clockRate; 1768 executionTime = coredynp.executionTime; 1769 ifu = new InstFetchU(XML, ithCore, &interface_ip,coredynp); 1770 lsu = new LoadStoreU(XML, ithCore, &interface_ip,coredynp); 1771 mmu = new MemManU (XML, ithCore, &interface_ip,coredynp); 1772 exu = new EXECU (XML, ithCore, &interface_ip,lsu->lsq_height, coredynp); 1773 undiffCore = new UndiffCore(XML, ithCore, &interface_ip,coredynp); 1774 if (coredynp.core_ty==OOO) 1775 { 1776 rnu = new RENAMINGU(XML, ithCore, &interface_ip,coredynp); 1777 } 1778 corepipe = new Pipeline(&interface_ip,coredynp); 1779 1780 if (coredynp.core_ty==OOO) 1781 { 1782 pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/5.0; 1783 if (rnu->exist) 1784 { 1785 rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); 1786 } 1787 } 1788 else { 1789 pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/4.0; 1790 } 1791 1792 //area.set_area(area.get_area()+ corepipe->area.get_area()); 1793 if (ifu->exist) 1794 { 1795 ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); 1796 area.set_area(area.get_area() + ifu->area.get_area()); 1797 } 1798 if (lsu->exist) 1799 { 1800 lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); 1801 area.set_area(area.get_area() + lsu->area.get_area()); 1802 } 1803 if (exu->exist) 1804 { 1805 exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); 1806 area.set_area(area.get_area()+exu->area.get_area()); 1807 } 1808 if (mmu->exist) 1809 { 1810 mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); 1811 area.set_area(area.get_area()+mmu->area.get_area()); 1812 } 1813 1814 if (coredynp.core_ty==OOO) 1815 { 1816 if (rnu->exist) 1817 { 1818 1819 area.set_area(area.get_area() + rnu->area.get_area()); 1820 } 1821 } 1822 1823 if (undiffCore->exist) 1824 { 1825 area.set_area(area.get_area() + undiffCore->area.get_area()); 1826 } 1827 1828 if (XML->sys.Private_L2) 1829 { 1830 area.set_area(area.get_area() + l2cache->area.get_area()); 1831 1832 } 1833// //clock power 1834// clockNetwork.init_wire_external(is_default, &interface_ip); 1835// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb 1836// clockNetwork.end_wiring_level =5;//toplevel metal 1837// clockNetwork.start_wiring_level =5;//toplevel metal 1838// clockNetwork.num_regs = corepipe.tot_stage_vector; 1839// clockNetwork.optimize_wire(); 1840} 1841 1842 1843void BranchPredictor::computeEnergy(bool is_tdp) 1844{ 1845 if (!exist) return; 1846 double r_access; 1847 double w_access; 1848 if (is_tdp) 1849 { 1850 r_access = coredynp.predictionW*coredynp.BR_duty_cycle; 1851 w_access = 0*coredynp.BR_duty_cycle; 1852 globalBPT->stats_t.readAc.access = r_access; 1853 globalBPT->stats_t.writeAc.access = w_access; 1854 globalBPT->tdp_stats = globalBPT->stats_t; 1855 1856 L1_localBPT->stats_t.readAc.access = r_access; 1857 L1_localBPT->stats_t.writeAc.access = w_access; 1858 L1_localBPT->tdp_stats = L1_localBPT->stats_t; 1859 1860 L2_localBPT->stats_t.readAc.access = r_access; 1861 L2_localBPT->stats_t.writeAc.access = w_access; 1862 L2_localBPT->tdp_stats = L2_localBPT->stats_t; 1863 1864 chooser->stats_t.readAc.access = r_access; 1865 chooser->stats_t.writeAc.access = w_access; 1866 chooser->tdp_stats = chooser->stats_t; 1867 1868 RAS->stats_t.readAc.access = r_access; 1869 RAS->stats_t.writeAc.access = w_access; 1870 RAS->tdp_stats = RAS->stats_t; 1871 } 1872 else 1873 { 1874 //The resolution of BPT accesses is coarse, but this is 1875 //because most simulators cannot track finer grained details 1876 r_access = XML->sys.core[ithCore].branch_instructions; 1877 w_access = XML->sys.core[ithCore].branch_mispredictions + 0.1*XML->sys.core[ithCore].branch_instructions;//10% of BR will flip internal bits//0 1878 globalBPT->stats_t.readAc.access = r_access; 1879 globalBPT->stats_t.writeAc.access = w_access; 1880 globalBPT->rtp_stats = globalBPT->stats_t; 1881 1882 L1_localBPT->stats_t.readAc.access = r_access; 1883 L1_localBPT->stats_t.writeAc.access = w_access; 1884 L1_localBPT->rtp_stats = L1_localBPT->stats_t; 1885 1886 L2_localBPT->stats_t.readAc.access = r_access; 1887 L2_localBPT->stats_t.writeAc.access = w_access; 1888 L2_localBPT->rtp_stats = L2_localBPT->stats_t; 1889 1890 chooser->stats_t.readAc.access = r_access; 1891 chooser->stats_t.writeAc.access = w_access; 1892 chooser->rtp_stats = chooser->stats_t; 1893 1894 RAS->stats_t.readAc.access = XML->sys.core[ithCore].function_calls; 1895 RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls; 1896 RAS->rtp_stats = RAS->stats_t; 1897 } 1898 1899 globalBPT->power_t.reset(); 1900 L1_localBPT->power_t.reset(); 1901 L2_localBPT->power_t.reset(); 1902 chooser->power_t.reset(); 1903 RAS->power_t.reset(); 1904 1905 globalBPT->power_t.readOp.dynamic += globalBPT->local_result.power.readOp.dynamic*globalBPT->stats_t.readAc.access + 1906 globalBPT->stats_t.writeAc.access*globalBPT->local_result.power.writeOp.dynamic; 1907 L1_localBPT->power_t.readOp.dynamic += L1_localBPT->local_result.power.readOp.dynamic*L1_localBPT->stats_t.readAc.access + 1908 L1_localBPT->stats_t.writeAc.access*L1_localBPT->local_result.power.writeOp.dynamic; 1909 1910 L2_localBPT->power_t.readOp.dynamic += L2_localBPT->local_result.power.readOp.dynamic*L2_localBPT->stats_t.readAc.access + 1911 L2_localBPT->stats_t.writeAc.access*L2_localBPT->local_result.power.writeOp.dynamic; 1912 1913 chooser->power_t.readOp.dynamic += chooser->local_result.power.readOp.dynamic*chooser->stats_t.readAc.access + 1914 chooser->stats_t.writeAc.access*chooser->local_result.power.writeOp.dynamic; 1915 RAS->power_t.readOp.dynamic += RAS->local_result.power.readOp.dynamic*RAS->stats_t.readAc.access + 1916 RAS->stats_t.writeAc.access*RAS->local_result.power.writeOp.dynamic; 1917 1918 if (is_tdp) 1919 { 1920 globalBPT->power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg; 1921 L1_localBPT->power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg; 1922 L2_localBPT->power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg; 1923 chooser->power = chooser->power_t + chooser->local_result.power*pppm_lkg; 1924 RAS->power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread; 1925 1926 power = power + globalBPT->power + L1_localBPT->power + chooser->power + RAS->power; 1927 } 1928 else 1929 { 1930 globalBPT->rt_power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg; 1931 L1_localBPT->rt_power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg; 1932 L2_localBPT->rt_power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg; 1933 chooser->rt_power = chooser->power_t + chooser->local_result.power*pppm_lkg; 1934 RAS->rt_power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread; 1935 rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + chooser->rt_power + RAS->rt_power; 1936 } 1937} 1938 1939void BranchPredictor::displayEnergy(uint32_t indent,int plevel,bool is_tdp) 1940{ 1941 if (!exist) return; 1942 string indent_str(indent, ' '); 1943 string indent_str_next(indent+2, ' '); 1944 bool long_channel = XML->sys.longer_channel_device; 1945 if (is_tdp) 1946 { 1947 cout << indent_str<< "Global Predictor:" << endl; 1948 cout << indent_str_next << "Area = " << globalBPT->area.get_area()*1e-6<< " mm^2" << endl; 1949 cout << indent_str_next << "Peak Dynamic = " << globalBPT->power.readOp.dynamic*clockRate << " W" << endl; 1950 cout << indent_str_next << "Subthreshold Leakage = " 1951 << (long_channel? globalBPT->power.readOp.longer_channel_leakage:globalBPT->power.readOp.leakage) <<" W" << endl; 1952 cout << indent_str_next << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" << endl; 1953 cout << indent_str_next << "Runtime Dynamic = " << globalBPT->rt_power.readOp.dynamic/executionTime << " W" << endl; 1954 cout <<endl; 1955 cout << indent_str << "Local Predictor:" << endl; 1956 cout << indent_str << "L1_Local Predictor:" << endl; 1957 cout << indent_str_next << "Area = " << L1_localBPT->area.get_area() *1e-6 << " mm^2" << endl; 1958 cout << indent_str_next << "Peak Dynamic = " << L1_localBPT->power.readOp.dynamic*clockRate << " W" << endl; 1959 cout << indent_str_next << "Subthreshold Leakage = " 1960 << (long_channel? L1_localBPT->power.readOp.longer_channel_leakage:L1_localBPT->power.readOp.leakage) << " W" << endl; 1961 cout << indent_str_next << "Gate Leakage = " << L1_localBPT->power.readOp.gate_leakage << " W" << endl; 1962 cout << indent_str_next << "Runtime Dynamic = " << L1_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl; 1963 cout <<endl; 1964 cout << indent_str << "L2_Local Predictor:" << endl; 1965 cout << indent_str_next << "Area = " << L2_localBPT->area.get_area() *1e-6 << " mm^2" << endl; 1966 cout << indent_str_next << "Peak Dynamic = " << L2_localBPT->power.readOp.dynamic*clockRate << " W" << endl; 1967 cout << indent_str_next << "Subthreshold Leakage = " 1968 << (long_channel? L2_localBPT->power.readOp.longer_channel_leakage:L2_localBPT->power.readOp.leakage) << " W" << endl; 1969 cout << indent_str_next << "Gate Leakage = " << L2_localBPT->power.readOp.gate_leakage << " W" << endl; 1970 cout << indent_str_next << "Runtime Dynamic = " << L2_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl; 1971 cout <<endl; 1972 1973 cout << indent_str << "Chooser:" << endl; 1974 cout << indent_str_next << "Area = " << chooser->area.get_area() *1e-6 << " mm^2" << endl; 1975 cout << indent_str_next << "Peak Dynamic = " << chooser->power.readOp.dynamic*clockRate << " W" << endl; 1976 cout << indent_str_next << "Subthreshold Leakage = " 1977 << (long_channel? chooser->power.readOp.longer_channel_leakage:chooser->power.readOp.leakage) << " W" << endl; 1978 cout << indent_str_next << "Gate Leakage = " << chooser->power.readOp.gate_leakage << " W" << endl; 1979 cout << indent_str_next << "Runtime Dynamic = " << chooser->rt_power.readOp.dynamic/executionTime << " W" << endl; 1980 cout <<endl; 1981 cout << indent_str << "RAS:" << endl; 1982 cout << indent_str_next << "Area = " << RAS->area.get_area() *1e-6 << " mm^2" << endl; 1983 cout << indent_str_next << "Peak Dynamic = " << RAS->power.readOp.dynamic*clockRate << " W" << endl; 1984 cout << indent_str_next << "Subthreshold Leakage = " 1985 << (long_channel? RAS->power.readOp.longer_channel_leakage:RAS->power.readOp.leakage) << " W" << endl; 1986 cout << indent_str_next << "Gate Leakage = " << RAS->power.readOp.gate_leakage << " W" << endl; 1987 cout << indent_str_next << "Runtime Dynamic = " << RAS->rt_power.readOp.dynamic/executionTime << " W" << endl; 1988 cout <<endl; 1989 } 1990 else 1991 { 1992// cout << indent_str_next << "Global Predictor Peak Dynamic = " << globalBPT->rt_power.readOp.dynamic*clockRate << " W" << endl; 1993// cout << indent_str_next << "Global Predictor Subthreshold Leakage = " << globalBPT->rt_power.readOp.leakage <<" W" << endl; 1994// cout << indent_str_next << "Global Predictor Gate Leakage = " << globalBPT->rt_power.readOp.gate_leakage << " W" << endl; 1995// cout << indent_str_next << "Local Predictor Peak Dynamic = " << L1_localBPT->rt_power.readOp.dynamic*clockRate << " W" << endl; 1996// cout << indent_str_next << "Local Predictor Subthreshold Leakage = " << L1_localBPT->rt_power.readOp.leakage << " W" << endl; 1997// cout << indent_str_next << "Local Predictor Gate Leakage = " << L1_localBPT->rt_power.readOp.gate_leakage << " W" << endl; 1998// cout << indent_str_next << "Chooser Peak Dynamic = " << chooser->rt_power.readOp.dynamic*clockRate << " W" << endl; 1999// cout << indent_str_next << "Chooser Subthreshold Leakage = " << chooser->rt_power.readOp.leakage << " W" << endl; 2000// cout << indent_str_next << "Chooser Gate Leakage = " << chooser->rt_power.readOp.gate_leakage << " W" << endl; 2001// cout << indent_str_next << "RAS Peak Dynamic = " << RAS->rt_power.readOp.dynamic*clockRate << " W" << endl; 2002// cout << indent_str_next << "RAS Subthreshold Leakage = " << RAS->rt_power.readOp.leakage << " W" << endl; 2003// cout << indent_str_next << "RAS Gate Leakage = " << RAS->rt_power.readOp.gate_leakage << " W" << endl; 2004 } 2005 2006} 2007 2008void InstFetchU::computeEnergy(bool is_tdp) 2009{ 2010 if (!exist) return; 2011 if (is_tdp) 2012 { 2013 //init stats for Peak 2014 icache.caches->stats_t.readAc.access = icache.caches->l_ip.num_rw_ports*coredynp.IFU_duty_cycle; 2015 icache.caches->stats_t.readAc.miss = 0; 2016 icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss; 2017 icache.caches->tdp_stats = icache.caches->stats_t; 2018 2019 icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit= icache.missb->l_ip.num_search_ports; 2020 icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit= icache.missb->l_ip.num_search_ports; 2021 icache.missb->tdp_stats = icache.missb->stats_t; 2022 2023 icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit= icache.ifb->l_ip.num_search_ports; 2024 icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports; 2025 icache.ifb->tdp_stats = icache.ifb->stats_t; 2026 2027 icache.prefetchb->stats_t.readAc.access = icache.prefetchb->stats_t.readAc.hit= icache.prefetchb->l_ip.num_search_ports; 2028 icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports; 2029 icache.prefetchb->tdp_stats = icache.prefetchb->stats_t; 2030 2031 IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].peak_issue_width; 2032 IB->tdp_stats = IB->stats_t; 2033 2034 if (coredynp.predictionW>0) 2035 { 2036 BTB->stats_t.readAc.access = coredynp.predictionW;//XML->sys.core[ithCore].BTB.read_accesses; 2037 BTB->stats_t.writeAc.access = 0;//XML->sys.core[ithCore].BTB.write_accesses; 2038 } 2039 2040 ID_inst->stats_t.readAc.access = coredynp.decodeW; 2041 ID_operand->stats_t.readAc.access = coredynp.decodeW; 2042 ID_misc->stats_t.readAc.access = coredynp.decodeW; 2043 ID_inst->tdp_stats = ID_inst->stats_t; 2044 ID_operand->tdp_stats = ID_operand->stats_t; 2045 ID_misc->tdp_stats = ID_misc->stats_t; 2046 2047 2048 } 2049 else 2050 { 2051 //init stats for Runtime Dynamic (RTP) 2052 icache.caches->stats_t.readAc.access = XML->sys.core[ithCore].icache.read_accesses; 2053 icache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].icache.read_misses; 2054 icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss; 2055 icache.caches->rtp_stats = icache.caches->stats_t; 2056 2057 icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; 2058 icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; 2059 icache.missb->rtp_stats = icache.missb->stats_t; 2060 2061 icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; 2062 icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; 2063 icache.ifb->rtp_stats = icache.ifb->stats_t; 2064 2065 icache.prefetchb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; 2066 icache.prefetchb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; 2067 icache.prefetchb->rtp_stats = icache.prefetchb->stats_t; 2068 2069 IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].total_instructions; 2070 IB->rtp_stats = IB->stats_t; 2071 2072 if (coredynp.predictionW>0) 2073 { 2074 BTB->stats_t.readAc.access = XML->sys.core[ithCore].BTB.read_accesses;//XML->sys.core[ithCore].branch_instructions; 2075 BTB->stats_t.writeAc.access = XML->sys.core[ithCore].BTB.write_accesses;//XML->sys.core[ithCore].branch_mispredictions; 2076 BTB->rtp_stats = BTB->stats_t; 2077 } 2078 2079 ID_inst->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; 2080 ID_operand->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; 2081 ID_misc->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; 2082 ID_inst->rtp_stats = ID_inst->stats_t; 2083 ID_operand->rtp_stats = ID_operand->stats_t; 2084 ID_misc->rtp_stats = ID_misc->stats_t; 2085 2086 } 2087 2088 icache.power_t.reset(); 2089 IB->power_t.reset(); 2090// ID_inst->power_t.reset(); 2091// ID_operand->power_t.reset(); 2092// ID_misc->power_t.reset(); 2093 if (coredynp.predictionW>0) 2094 { 2095 BTB->power_t.reset(); 2096 } 2097 2098 icache.power_t.readOp.dynamic += (icache.caches->stats_t.readAc.hit*icache.caches->local_result.power.readOp.dynamic+ 2099 //icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+ 2100 icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.readOp.dynamic+ //assume tag data accessed in parallel 2101 icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.writeOp.dynamic); //read miss in Icache cause a write to Icache 2102 icache.power_t.readOp.dynamic += icache.missb->stats_t.readAc.access*icache.missb->local_result.power.searchOp.dynamic + 2103 icache.missb->stats_t.writeAc.access*icache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write 2104 icache.power_t.readOp.dynamic += icache.ifb->stats_t.readAc.access*icache.ifb->local_result.power.searchOp.dynamic + 2105 icache.ifb->stats_t.writeAc.access*icache.ifb->local_result.power.writeOp.dynamic; 2106 icache.power_t.readOp.dynamic += icache.prefetchb->stats_t.readAc.access*icache.prefetchb->local_result.power.searchOp.dynamic + 2107 icache.prefetchb->stats_t.writeAc.access*icache.prefetchb->local_result.power.writeOp.dynamic; 2108 2109 IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic*IB->stats_t.readAc.access + 2110 IB->stats_t.writeAc.access*IB->local_result.power.writeOp.dynamic; 2111 2112 if (coredynp.predictionW>0) 2113 { 2114 BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic*BTB->stats_t.readAc.access + 2115 BTB->stats_t.writeAc.access*BTB->local_result.power.writeOp.dynamic; 2116 2117 BPT->computeEnergy(is_tdp); 2118 } 2119 2120 if (is_tdp) 2121 { 2122// icache.power = icache.power_t + 2123// (icache.caches->local_result.power)*pppm_lkg + 2124// (icache.missb->local_result.power + 2125// icache.ifb->local_result.power + 2126// icache.prefetchb->local_result.power)*pppm_Isub; 2127 icache.power = icache.power_t + 2128 (icache.caches->local_result.power + 2129 icache.missb->local_result.power + 2130 icache.ifb->local_result.power + 2131 icache.prefetchb->local_result.power)*pppm_lkg; 2132 2133 IB->power = IB->power_t + IB->local_result.power*pppm_lkg; 2134 power = power + icache.power + IB->power; 2135 if (coredynp.predictionW>0) 2136 { 2137 BTB->power = BTB->power_t + BTB->local_result.power*pppm_lkg; 2138 power = power + BTB->power + BPT->power; 2139 } 2140 2141 ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic; 2142 ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic; 2143 ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic; 2144 2145 ID_inst->power.readOp.dynamic *= ID_inst->tdp_stats.readAc.access; 2146 ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access; 2147 ID_misc->power.readOp.dynamic *= ID_misc->tdp_stats.readAc.access; 2148 2149 power = power + (ID_inst->power + 2150 ID_operand->power + 2151 ID_misc->power); 2152 } 2153 else 2154 { 2155// icache.rt_power = icache.power_t + 2156// (icache.caches->local_result.power)*pppm_lkg + 2157// (icache.missb->local_result.power + 2158// icache.ifb->local_result.power + 2159// icache.prefetchb->local_result.power)*pppm_Isub; 2160 2161 icache.rt_power = icache.power_t + 2162 (icache.caches->local_result.power + 2163 icache.missb->local_result.power + 2164 icache.ifb->local_result.power + 2165 icache.prefetchb->local_result.power)*pppm_lkg; 2166 2167 IB->rt_power = IB->power_t + IB->local_result.power*pppm_lkg; 2168 rt_power = rt_power + icache.rt_power + IB->rt_power; 2169 if (coredynp.predictionW>0) 2170 { 2171 BTB->rt_power = BTB->power_t + BTB->local_result.power*pppm_lkg; 2172 rt_power = rt_power + BTB->rt_power + BPT->rt_power; 2173 } 2174 2175 ID_inst->rt_power.readOp.dynamic = ID_inst->power_t.readOp.dynamic*ID_inst->rtp_stats.readAc.access; 2176 ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * ID_operand->rtp_stats.readAc.access; 2177 ID_misc->rt_power.readOp.dynamic = ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access; 2178 2179 rt_power = rt_power + (ID_inst->rt_power + 2180 ID_operand->rt_power + 2181 ID_misc->rt_power); 2182 } 2183} 2184 2185void InstFetchU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) 2186{ 2187 if (!exist) return; 2188 string indent_str(indent, ' '); 2189 string indent_str_next(indent+2, ' '); 2190 bool long_channel = XML->sys.longer_channel_device; 2191 2192 2193 if (is_tdp) 2194 { 2195 2196 cout << indent_str<< "Instruction Cache:" << endl; 2197 cout << indent_str_next << "Area = " << icache.area.get_area()*1e-6<< " mm^2" << endl; 2198 cout << indent_str_next << "Peak Dynamic = " << icache.power.readOp.dynamic*clockRate << " W" << endl; 2199 cout << indent_str_next << "Subthreshold Leakage = " 2200 << (long_channel? icache.power.readOp.longer_channel_leakage:icache.power.readOp.leakage) <<" W" << endl; 2201 cout << indent_str_next << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" << endl; 2202 cout << indent_str_next << "Runtime Dynamic = " << icache.rt_power.readOp.dynamic/executionTime << " W" << endl; 2203 cout <<endl; 2204 if (coredynp.predictionW>0) 2205 { 2206 cout << indent_str<< "Branch Target Buffer:" << endl; 2207 cout << indent_str_next << "Area = " << BTB->area.get_area() *1e-6 << " mm^2" << endl; 2208 cout << indent_str_next << "Peak Dynamic = " << BTB->power.readOp.dynamic*clockRate << " W" << endl; 2209 cout << indent_str_next << "Subthreshold Leakage = " 2210 << (long_channel? BTB->power.readOp.longer_channel_leakage:BTB->power.readOp.leakage) << " W" << endl; 2211 cout << indent_str_next << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" << endl; 2212 cout << indent_str_next << "Runtime Dynamic = " << BTB->rt_power.readOp.dynamic/executionTime << " W" << endl; 2213 cout <<endl; 2214 if (BPT->exist) 2215 { 2216 cout << indent_str<< "Branch Predictor:" << endl; 2217 cout << indent_str_next << "Area = " << BPT->area.get_area() *1e-6<< " mm^2" << endl; 2218 cout << indent_str_next << "Peak Dynamic = " << BPT->power.readOp.dynamic*clockRate << " W" << endl; 2219 cout << indent_str_next << "Subthreshold Leakage = " 2220 << (long_channel? BPT->power.readOp.longer_channel_leakage:BPT->power.readOp.leakage) << " W" << endl; 2221 cout << indent_str_next << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" << endl; 2222 cout << indent_str_next << "Runtime Dynamic = " << BPT->rt_power.readOp.dynamic/executionTime << " W" << endl; 2223 cout <<endl; 2224 if (plevel>3) 2225 { 2226 BPT->displayEnergy(indent+4, plevel, is_tdp); 2227 } 2228 } 2229 } 2230 cout << indent_str<< "Instruction Buffer:" << endl; 2231 cout << indent_str_next << "Area = " << IB->area.get_area()*1e-6 << " mm^2" << endl; 2232 cout << indent_str_next << "Peak Dynamic = " << IB->power.readOp.dynamic*clockRate << " W" << endl; 2233 cout << indent_str_next << "Subthreshold Leakage = " 2234 << (long_channel? IB->power.readOp.longer_channel_leakage:IB->power.readOp.leakage) << " W" << endl; 2235 cout << indent_str_next << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl; 2236 cout << indent_str_next << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic/executionTime << " W" << endl; 2237 cout <<endl; 2238 cout << indent_str<< "Instruction Decoder:" << endl; 2239 cout << indent_str_next << "Area = " << (ID_inst->area.get_area() + 2240 ID_operand->area.get_area() + 2241 ID_misc->area.get_area())*coredynp.decodeW*1e-6 << " mm^2" << endl; 2242 cout << indent_str_next << "Peak Dynamic = " << (ID_inst->power.readOp.dynamic + 2243 ID_operand->power.readOp.dynamic + 2244 ID_misc->power.readOp.dynamic)*clockRate << " W" << endl; 2245 cout << indent_str_next << "Subthreshold Leakage = " 2246 << (long_channel? (ID_inst->power.readOp.longer_channel_leakage + 2247 ID_operand->power.readOp.longer_channel_leakage + 2248 ID_misc->power.readOp.longer_channel_leakage): 2249 (ID_inst->power.readOp.leakage + 2250 ID_operand->power.readOp.leakage + 2251 ID_misc->power.readOp.leakage)) << " W" << endl; 2252 cout << indent_str_next << "Gate Leakage = " << (ID_inst->power.readOp.gate_leakage + 2253 ID_operand->power.readOp.gate_leakage + 2254 ID_misc->power.readOp.gate_leakage) << " W" << endl; 2255 cout << indent_str_next << "Runtime Dynamic = " << (ID_inst->rt_power.readOp.dynamic + 2256 ID_operand->rt_power.readOp.dynamic + 2257 ID_misc->rt_power.readOp.dynamic)/executionTime << " W" << endl; 2258 cout <<endl; 2259 } 2260 else 2261 { 2262// cout << indent_str_next << "Instruction Cache Peak Dynamic = " << icache.rt_power.readOp.dynamic*clockRate << " W" << endl; 2263// cout << indent_str_next << "Instruction Cache Subthreshold Leakage = " << icache.rt_power.readOp.leakage <<" W" << endl; 2264// cout << indent_str_next << "Instruction Cache Gate Leakage = " << icache.rt_power.readOp.gate_leakage << " W" << endl; 2265// cout << indent_str_next << "Instruction Buffer Peak Dynamic = " << IB->rt_power.readOp.dynamic*clockRate << " W" << endl; 2266// cout << indent_str_next << "Instruction Buffer Subthreshold Leakage = " << IB->rt_power.readOp.leakage << " W" << endl; 2267// cout << indent_str_next << "Instruction Buffer Gate Leakage = " << IB->rt_power.readOp.gate_leakage << " W" << endl; 2268// cout << indent_str_next << "Branch Target Buffer Peak Dynamic = " << BTB->rt_power.readOp.dynamic*clockRate << " W" << endl; 2269// cout << indent_str_next << "Branch Target Buffer Subthreshold Leakage = " << BTB->rt_power.readOp.leakage << " W" << endl; 2270// cout << indent_str_next << "Branch Target Buffer Gate Leakage = " << BTB->rt_power.readOp.gate_leakage << " W" << endl; 2271// cout << indent_str_next << "Branch Predictor Peak Dynamic = " << BPT->rt_power.readOp.dynamic*clockRate << " W" << endl; 2272// cout << indent_str_next << "Branch Predictor Subthreshold Leakage = " << BPT->rt_power.readOp.leakage << " W" << endl; 2273// cout << indent_str_next << "Branch Predictor Gate Leakage = " << BPT->rt_power.readOp.gate_leakage << " W" << endl; 2274 } 2275 2276} 2277 2278void RENAMINGU::computeEnergy(bool is_tdp) 2279{ 2280 if (!exist) return; 2281 double pppm_t[4] = {1,1,1,1}; 2282 if (is_tdp) 2283 {//init stats for Peak 2284 if (coredynp.core_ty==OOO){ 2285 if (coredynp.scheu_ty==PhysicalRegFile) 2286 { 2287 if (coredynp.rm_ty ==RAMbased) 2288 { 2289 iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; 2290 iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; 2291 iFRAT->tdp_stats = iFRAT->stats_t; 2292 2293 fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; 2294 fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; 2295 fFRAT->tdp_stats = fFRAT->stats_t; 2296 2297 } 2298 else if ((coredynp.rm_ty ==CAMbased)) 2299 { 2300 iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; 2301 iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; 2302 iFRAT->tdp_stats = iFRAT->stats_t; 2303 2304 fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; 2305 fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; 2306 fFRAT->tdp_stats = fFRAT->stats_t; 2307 } 2308 2309 iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; 2310 iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; 2311 iRRAT->tdp_stats = iRRAT->stats_t; 2312 2313 fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; 2314 fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; 2315 fRRAT->tdp_stats = fRRAT->stats_t; 2316 2317 ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;; 2318 ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports; 2319 ifreeL->tdp_stats = ifreeL->stats_t; 2320 2321 ffreeL->stats_t.readAc.access = coredynp.decodeW;//ffreeL->l_ip.num_rd_ports; 2322 ffreeL->stats_t.writeAc.access = coredynp.decodeW;//ffreeL->l_ip.num_wr_ports; 2323 ffreeL->tdp_stats = ffreeL->stats_t; 2324 } 2325 else if (coredynp.scheu_ty==ReservationStation){ 2326 if (coredynp.rm_ty ==RAMbased) 2327 { 2328 iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; 2329 iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; 2330 iFRAT->stats_t.searchAc.access = iFRAT->l_ip.num_search_ports; 2331 iFRAT->tdp_stats = iFRAT->stats_t; 2332 2333 fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; 2334 fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; 2335 fFRAT->stats_t.searchAc.access = fFRAT->l_ip.num_search_ports; 2336 fFRAT->tdp_stats = fFRAT->stats_t; 2337 2338 } 2339 else if ((coredynp.rm_ty ==CAMbased)) 2340 { 2341 iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; 2342 iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; 2343 iFRAT->tdp_stats = iFRAT->stats_t; 2344 2345 fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; 2346 fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; 2347 fFRAT->tdp_stats = fFRAT->stats_t; 2348 } 2349 //Unified free list for both int and fp 2350 ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports; 2351 ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports; 2352 ifreeL->tdp_stats = ifreeL->stats_t; 2353 } 2354 idcl->stats_t.readAc.access = coredynp.decodeW; 2355 fdcl->stats_t.readAc.access = coredynp.decodeW; 2356 idcl->tdp_stats = idcl->stats_t; 2357 fdcl->tdp_stats = fdcl->stats_t; 2358 } 2359 else 2360 { 2361 if (coredynp.issueW>1) 2362 { 2363 idcl->stats_t.readAc.access = coredynp.decodeW; 2364 fdcl->stats_t.readAc.access = coredynp.decodeW; 2365 idcl->tdp_stats = idcl->stats_t; 2366 fdcl->tdp_stats = fdcl->stats_t; 2367 } 2368 } 2369 2370 } 2371 else 2372 {//init stats for Runtime Dynamic (RTP) 2373 if (coredynp.core_ty==OOO){ 2374 if (coredynp.scheu_ty==PhysicalRegFile) 2375 { 2376 if (coredynp.rm_ty ==RAMbased) 2377 { 2378 iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; 2379 iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; 2380 iFRAT->rtp_stats = iFRAT->stats_t; 2381 2382 fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; 2383 fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; 2384 fFRAT->rtp_stats = fFRAT->stats_t; 2385 } 2386 else if ((coredynp.rm_ty ==CAMbased)) 2387 { 2388 iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; 2389 iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; 2390 iFRAT->rtp_stats = iFRAT->stats_t; 2391 2392 fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; 2393 fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; 2394 fFRAT->rtp_stats = fFRAT->stats_t; 2395 } 2396 2397 iRRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_writes;//Hack, should be (context switch + branch mispredictions)*16 2398 iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; 2399 iRRAT->rtp_stats = iRRAT->stats_t; 2400 2401 fRRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_writes;//Hack, should be (context switch + branch mispredictions)*16 2402 fRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; 2403 fRRAT->rtp_stats = fRRAT->stats_t; 2404 2405 ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; 2406 ifreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].rename_writes; 2407 ifreeL->rtp_stats = ifreeL->stats_t; 2408 2409 ffreeL->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; 2410 ffreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].fp_rename_writes; 2411 ffreeL->rtp_stats = ffreeL->stats_t; 2412 } 2413 else if (coredynp.scheu_ty==ReservationStation){ 2414 if (coredynp.rm_ty ==RAMbased) 2415 { 2416 iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; 2417 iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; 2418 iFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_int_instructions;//hack: not all committed instructions use regs. 2419 iFRAT->rtp_stats = iFRAT->stats_t; 2420 2421 fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; 2422 fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; 2423 fFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_fp_instructions; 2424 fFRAT->rtp_stats = fFRAT->stats_t; 2425 } 2426 else if ((coredynp.rm_ty ==CAMbased)) 2427 { 2428 iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; 2429 iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; 2430 iFRAT->rtp_stats = iFRAT->stats_t; 2431 2432 fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; 2433 fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; 2434 fFRAT->rtp_stats = fFRAT->stats_t; 2435 } 2436 //Unified free list for both int and fp since the ROB act as physcial registers 2437 ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads + 2438 XML->sys.core[ithCore].fp_rename_reads; 2439 ifreeL->stats_t.writeAc.access = 2*(XML->sys.core[ithCore].rename_writes + 2440 XML->sys.core[ithCore].fp_rename_writes);//HACK: 2-> since some of renaming in the same group 2441 //are terminated early 2442 ifreeL->rtp_stats = ifreeL->stats_t; 2443 } 2444 idcl->stats_t.readAc.access = 3*coredynp.decodeW*coredynp.decodeW*XML->sys.core[ithCore].rename_reads; 2445 fdcl->stats_t.readAc.access = 3*coredynp.fp_issueW*coredynp.fp_issueW*XML->sys.core[ithCore].fp_rename_writes; 2446 idcl->rtp_stats = idcl->stats_t; 2447 fdcl->rtp_stats = fdcl->stats_t; 2448 } 2449 else 2450 { 2451 if (coredynp.issueW>1) 2452 { 2453 idcl->stats_t.readAc.access = 2*XML->sys.core[ithCore].int_instructions; 2454 fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions; 2455 idcl->rtp_stats = idcl->stats_t; 2456 fdcl->rtp_stats = fdcl->stats_t; 2457 } 2458 } 2459 2460 } 2461 /* Compute engine */ 2462 if (coredynp.core_ty==OOO) 2463 { 2464 if (coredynp.scheu_ty==PhysicalRegFile) 2465 { 2466 if (coredynp.rm_ty ==RAMbased) 2467 { 2468 iFRAT->power_t.reset(); 2469 fFRAT->power_t.reset(); 2470 2471 iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access 2472 *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) 2473 +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); 2474 fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access 2475 *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) 2476 +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); 2477 } 2478 else if ((coredynp.rm_ty ==CAMbased)) 2479 { 2480 iFRAT->power_t.reset(); 2481 fFRAT->power_t.reset(); 2482 iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access 2483 *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic) 2484 +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); 2485 fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access 2486 *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic) 2487 +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); 2488 } 2489 2490 iRRAT->power_t.reset(); 2491 fRRAT->power_t.reset(); 2492 ifreeL->power_t.reset(); 2493 ffreeL->power_t.reset(); 2494 2495 iRRAT->power_t.readOp.dynamic += (iRRAT->stats_t.readAc.access*iRRAT->local_result.power.readOp.dynamic 2496 +iRRAT->stats_t.writeAc.access*iRRAT->local_result.power.writeOp.dynamic); 2497 fRRAT->power_t.readOp.dynamic += (fRRAT->stats_t.readAc.access*fRRAT->local_result.power.readOp.dynamic 2498 +fRRAT->stats_t.writeAc.access*fRRAT->local_result.power.writeOp.dynamic); 2499 ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic 2500 +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic); 2501 ffreeL->power_t.readOp.dynamic += (ffreeL->stats_t.readAc.access*ffreeL->local_result.power.readOp.dynamic 2502 +ffreeL->stats_t.writeAc.access*ffreeL->local_result.power.writeOp.dynamic); 2503 2504 } 2505 else if (coredynp.scheu_ty==ReservationStation) 2506 { 2507 if (coredynp.rm_ty ==RAMbased) 2508 { 2509 iFRAT->power_t.reset(); 2510 fFRAT->power_t.reset(); 2511 2512 iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access 2513 *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) 2514 +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic 2515 +iFRAT->stats_t.searchAc.access*iFRAT->local_result.power.searchOp.dynamic); 2516 fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access 2517 *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) 2518 +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic 2519 +fFRAT->stats_t.searchAc.access*fFRAT->local_result.power.searchOp.dynamic); 2520 } 2521 else if ((coredynp.rm_ty ==CAMbased)) 2522 { 2523 iFRAT->power_t.reset(); 2524 fFRAT->power_t.reset(); 2525 iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access 2526 *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic) 2527 +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); 2528 fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access 2529 *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic) 2530 +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); 2531 } 2532 ifreeL->power_t.reset(); 2533 ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic 2534 +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic); 2535 } 2536 2537 } 2538 else 2539 { 2540 if (coredynp.issueW>1) 2541 { 2542 idcl->power_t.reset(); 2543 fdcl->power_t.reset(); 2544 set_pppm(pppm_t, idcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access); 2545 idcl->power_t = idcl->power * pppm_t; 2546 set_pppm(pppm_t, fdcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access); 2547 fdcl->power_t = fdcl->power * pppm_t; 2548 } 2549 2550 } 2551 2552 //assign value to tpd and rtp 2553 if (is_tdp) 2554 { 2555 if (coredynp.core_ty==OOO) 2556 { 2557 if (coredynp.scheu_ty==PhysicalRegFile) 2558 { 2559 iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; 2560 fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; 2561 iRRAT->power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread; 2562 fRRAT->power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread; 2563 ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; 2564 ffreeL->power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread; 2565 power = power + (iFRAT->power + fFRAT->power) 2566 + (iRRAT->power + fRRAT->power) 2567 + (ifreeL->power + ffreeL->power); 2568 } 2569 else if (coredynp.scheu_ty==ReservationStation) 2570 { 2571 iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; 2572 fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; 2573 ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; 2574 power = power + (iFRAT->power + fFRAT->power) 2575 + ifreeL->power; 2576 } 2577 } 2578 else 2579 { 2580 power = power + idcl->power_t + fdcl->power_t; 2581 } 2582 2583 } 2584 else 2585 { 2586 if (coredynp.core_ty==OOO) 2587 { 2588 if (coredynp.scheu_ty==PhysicalRegFile) 2589 { 2590 iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; 2591 fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; 2592 iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread; 2593 fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread; 2594 ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; 2595 ffreeL->rt_power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread; 2596 rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power) 2597 + (iRRAT->rt_power + fRRAT->rt_power) 2598 + (ifreeL->rt_power + ffreeL->rt_power); 2599 } 2600 else if (coredynp.scheu_ty==ReservationStation) 2601 { 2602 iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; 2603 fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; 2604 ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; 2605 rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power) 2606 + ifreeL->rt_power; 2607 } 2608 } 2609 else 2610 { 2611 rt_power = rt_power + idcl->power_t + fdcl->power_t; 2612 } 2613 2614 } 2615} 2616 2617void RENAMINGU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) 2618{ 2619 if (!exist) return; 2620 string indent_str(indent, ' '); 2621 string indent_str_next(indent+2, ' '); 2622 bool long_channel = XML->sys.longer_channel_device; 2623 2624 2625 if (is_tdp) 2626 { 2627 2628 if (coredynp.core_ty==OOO) 2629 { 2630 cout << indent_str<< "Int Front End RAT:" << endl; 2631 cout << indent_str_next << "Area = " << iFRAT->area.get_area()*1e-6<< " mm^2" << endl; 2632 cout << indent_str_next << "Peak Dynamic = " << iFRAT->power.readOp.dynamic*clockRate << " W" << endl; 2633 cout << indent_str_next << "Subthreshold Leakage = " 2634 << (long_channel? iFRAT->power.readOp.longer_channel_leakage:iFRAT->power.readOp.leakage) <<" W" << endl; 2635 cout << indent_str_next << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" << endl; 2636 cout << indent_str_next << "Runtime Dynamic = " << iFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl; 2637 cout <<endl; 2638 cout << indent_str<< "FP Front End RAT:" << endl; 2639 cout << indent_str_next << "Area = " << fFRAT->area.get_area()*1e-6 << " mm^2" << endl; 2640 cout << indent_str_next << "Peak Dynamic = " << fFRAT->power.readOp.dynamic*clockRate << " W" << endl; 2641 cout << indent_str_next << "Subthreshold Leakage = " 2642 << (long_channel? fFRAT->power.readOp.longer_channel_leakage:fFRAT->power.readOp.leakage) << " W" << endl; 2643 cout << indent_str_next << "Gate Leakage = " << fFRAT->power.readOp.gate_leakage << " W" << endl; 2644 cout << indent_str_next << "Runtime Dynamic = " << fFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl; 2645 cout <<endl; 2646 cout << indent_str<<"Free List:" << endl; 2647 cout << indent_str_next << "Area = " << ifreeL->area.get_area()*1e-6 << " mm^2" << endl; 2648 cout << indent_str_next << "Peak Dynamic = " << ifreeL->power.readOp.dynamic*clockRate << " W" << endl; 2649 cout << indent_str_next << "Subthreshold Leakage = " 2650 << (long_channel? ifreeL->power.readOp.longer_channel_leakage:ifreeL->power.readOp.leakage) << " W" << endl; 2651 cout << indent_str_next << "Gate Leakage = " << ifreeL->power.readOp.gate_leakage << " W" << endl; 2652 cout << indent_str_next << "Runtime Dynamic = " << ifreeL->rt_power.readOp.dynamic/executionTime << " W" << endl; 2653 cout <<endl; 2654 2655 if (coredynp.scheu_ty==PhysicalRegFile) 2656 { 2657 cout << indent_str<< "Int Retire RAT: " << endl; 2658 cout << indent_str_next << "Area = " << iRRAT->area.get_area() *1e-6 << " mm^2" << endl; 2659 cout << indent_str_next << "Peak Dynamic = " << iRRAT->power.readOp.dynamic*clockRate << " W" << endl; 2660 cout << indent_str_next << "Subthreshold Leakage = " 2661 << (long_channel? iRRAT->power.readOp.longer_channel_leakage:iRRAT->power.readOp.leakage) << " W" << endl; 2662 cout << indent_str_next << "Gate Leakage = " << iRRAT->power.readOp.gate_leakage << " W" << endl; 2663 cout << indent_str_next << "Runtime Dynamic = " << iRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl; 2664 cout <<endl; 2665 cout << indent_str<< "FP Retire RAT:" << endl; 2666 cout << indent_str_next << "Area = " << fRRAT->area.get_area() *1e-6<< " mm^2" << endl; 2667 cout << indent_str_next << "Peak Dynamic = " << fRRAT->power.readOp.dynamic*clockRate << " W" << endl; 2668 cout << indent_str_next << "Subthreshold Leakage = " 2669 << (long_channel? fRRAT->power.readOp.longer_channel_leakage:fRRAT->power.readOp.leakage) << " W" << endl; 2670 cout << indent_str_next << "Gate Leakage = " << fRRAT->power.readOp.gate_leakage << " W" << endl; 2671 cout << indent_str_next << "Runtime Dynamic = " << fRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl; 2672 cout <<endl; 2673 cout << indent_str<< "FP Free List:" << endl; 2674 cout << indent_str_next << "Area = " << ffreeL->area.get_area()*1e-6 << " mm^2" << endl; 2675 cout << indent_str_next << "Peak Dynamic = " << ffreeL->power.readOp.dynamic*clockRate << " W" << endl; 2676 cout << indent_str_next << "Subthreshold Leakage = " 2677 << (long_channel? ffreeL->power.readOp.longer_channel_leakage:ffreeL->power.readOp.leakage) << " W" << endl; 2678 cout << indent_str_next << "Gate Leakage = " << ffreeL->power.readOp.gate_leakage << " W" << endl; 2679 cout << indent_str_next << "Runtime Dynamic = " << ffreeL->rt_power.readOp.dynamic/executionTime << " W" << endl; 2680 cout <<endl; 2681 } 2682 } 2683 else 2684 { 2685 cout << indent_str<< "Int DCL:" << endl; 2686 cout << indent_str_next << "Peak Dynamic = " << idcl->power.readOp.dynamic*clockRate << " W" << endl; 2687 cout << indent_str_next << "Subthreshold Leakage = " 2688 << (long_channel? idcl->power.readOp.longer_channel_leakage:idcl->power.readOp.leakage) << " W" << endl; 2689 cout << indent_str_next << "Gate Leakage = " << idcl->power.readOp.gate_leakage << " W" << endl; 2690 cout << indent_str_next << "Runtime Dynamic = " << idcl->rt_power.readOp.dynamic/executionTime << " W" << endl; 2691 cout << indent_str<<"FP DCL:" << endl; 2692 cout << indent_str_next << "Peak Dynamic = " << fdcl->power.readOp.dynamic*clockRate << " W" << endl; 2693 cout << indent_str_next << "Subthreshold Leakage = " 2694 << (long_channel? fdcl->power.readOp.longer_channel_leakage:fdcl->power.readOp.leakage) << " W" << endl; 2695 cout << indent_str_next << "Gate Leakage = " << fdcl->power.readOp.gate_leakage << " W" << endl; 2696 cout << indent_str_next << "Runtime Dynamic = " << fdcl->rt_power.readOp.dynamic/executionTime << " W" << endl; 2697 } 2698 } 2699 else 2700 { 2701 if (coredynp.core_ty==OOO) 2702 { 2703 cout << indent_str_next << "Int Front End RAT Peak Dynamic = " << iFRAT->rt_power.readOp.dynamic*clockRate << " W" << endl; 2704 cout << indent_str_next << "Int Front End RAT Subthreshold Leakage = " << iFRAT->rt_power.readOp.leakage <<" W" << endl; 2705 cout << indent_str_next << "Int Front End RAT Gate Leakage = " << iFRAT->rt_power.readOp.gate_leakage << " W" << endl; 2706 cout << indent_str_next << "FP Front End RAT Peak Dynamic = " << fFRAT->rt_power.readOp.dynamic*clockRate << " W" << endl; 2707 cout << indent_str_next << "FP Front End RAT Subthreshold Leakage = " << fFRAT->rt_power.readOp.leakage << " W" << endl; 2708 cout << indent_str_next << "FP Front End RAT Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; 2709 cout << indent_str_next << "Free List Peak Dynamic = " << ifreeL->rt_power.readOp.dynamic*clockRate << " W" << endl; 2710 cout << indent_str_next << "Free List Subthreshold Leakage = " << ifreeL->rt_power.readOp.leakage << " W" << endl; 2711 cout << indent_str_next << "Free List Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; 2712 if (coredynp.scheu_ty==PhysicalRegFile) 2713 { 2714 cout << indent_str_next << "Int Retire RAT Peak Dynamic = " << iRRAT->rt_power.readOp.dynamic*clockRate << " W" << endl; 2715 cout << indent_str_next << "Int Retire RAT Subthreshold Leakage = " << iRRAT->rt_power.readOp.leakage << " W" << endl; 2716 cout << indent_str_next << "Int Retire RAT Gate Leakage = " << iRRAT->rt_power.readOp.gate_leakage << " W" << endl; 2717 cout << indent_str_next << "FP Retire RAT Peak Dynamic = " << fRRAT->rt_power.readOp.dynamic*clockRate << " W" << endl; 2718 cout << indent_str_next << "FP Retire RAT Subthreshold Leakage = " << fRRAT->rt_power.readOp.leakage << " W" << endl; 2719 cout << indent_str_next << "FP Retire RAT Gate Leakage = " << fRRAT->rt_power.readOp.gate_leakage << " W" << endl; 2720 cout << indent_str_next << "FP Free List Peak Dynamic = " << ffreeL->rt_power.readOp.dynamic*clockRate << " W" << endl; 2721 cout << indent_str_next << "FP Free List Subthreshold Leakage = " << ffreeL->rt_power.readOp.leakage << " W" << endl; 2722 cout << indent_str_next << "FP Free List Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; 2723 } 2724 } 2725 else 2726 { 2727 cout << indent_str_next << "Int DCL Peak Dynamic = " << idcl->rt_power.readOp.dynamic*clockRate << " W" << endl; 2728 cout << indent_str_next << "Int DCL Subthreshold Leakage = " << idcl->rt_power.readOp.leakage << " W" << endl; 2729 cout << indent_str_next << "Int DCL Gate Leakage = " << idcl->rt_power.readOp.gate_leakage << " W" << endl; 2730 cout << indent_str_next << "FP DCL Peak Dynamic = " << fdcl->rt_power.readOp.dynamic*clockRate << " W" << endl; 2731 cout << indent_str_next << "FP DCL Subthreshold Leakage = " << fdcl->rt_power.readOp.leakage << " W" << endl; 2732 cout << indent_str_next << "FP DCL Gate Leakage = " << fdcl->rt_power.readOp.gate_leakage << " W" << endl; 2733 } 2734 } 2735 2736} 2737 2738 2739void SchedulerU::computeEnergy(bool is_tdp) 2740{ 2741 if (!exist) return; 2742 double ROB_duty_cycle; 2743// ROB_duty_cycle = ((coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 2744// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0))*1.1<1 ? (coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 2745// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0)*1.1:1; 2746 ROB_duty_cycle = 1; 2747 //init stats 2748 if (is_tdp) 2749 { 2750 if (coredynp.core_ty==OOO) 2751 { 2752 int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports; 2753 int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports; 2754 int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines; 2755 int_inst_window->tdp_stats = int_inst_window->stats_t; 2756 fp_inst_window->stats_t.readAc.access = fp_inst_window->l_ip.num_rd_ports*coredynp.num_fp_pipelines; 2757 fp_inst_window->stats_t.writeAc.access = fp_inst_window->l_ip.num_wr_ports*coredynp.num_fp_pipelines; 2758 fp_inst_window->stats_t.searchAc.access = fp_inst_window->l_ip.num_search_ports*coredynp.num_fp_pipelines; 2759 fp_inst_window->tdp_stats = fp_inst_window->stats_t; 2760 2761 if (XML->sys.core[ithCore].ROB_size >0) 2762 { 2763 ROB->stats_t.readAc.access = coredynp.commitW*coredynp.num_pipelines*ROB_duty_cycle; 2764 ROB->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines*ROB_duty_cycle; 2765 ROB->tdp_stats = ROB->stats_t; 2766 2767 /* 2768 * When inst commits, ROB must be read. 2769 * Because for Physcial register based cores, physical register tag in ROB 2770 * need to be read out and write into RRAT/CAM based RAT. 2771 * For RS based cores, register content that stored in ROB must be 2772 * read out and stored in architectural registers. 2773 * 2774 * if no-register is involved, the ROB read out operation when instruction commits can be ignored. 2775 * assuming 20% insts. belong this type. 2776 * TODO: ROB duty_cycle need to be revisited 2777 */ 2778 } 2779 2780 } 2781 else if (coredynp.multithreaded) 2782 { 2783 int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports; 2784 int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports; 2785 int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines; 2786 int_inst_window->tdp_stats = int_inst_window->stats_t; 2787 } 2788 2789 } 2790 else 2791 {//rtp 2792 if (coredynp.core_ty==OOO) 2793 { 2794 int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].inst_window_reads; 2795 int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].inst_window_writes; 2796 int_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].inst_window_wakeup_accesses; 2797 int_inst_window->rtp_stats = int_inst_window->stats_t; 2798 fp_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].fp_inst_window_reads; 2799 fp_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].fp_inst_window_writes; 2800 fp_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].fp_inst_window_wakeup_accesses; 2801 fp_inst_window->rtp_stats = fp_inst_window->stats_t; 2802 2803 if (XML->sys.core[ithCore].ROB_size >0) 2804 { 2805 2806 ROB->stats_t.readAc.access = XML->sys.core[ithCore].ROB_reads; 2807 ROB->stats_t.writeAc.access = XML->sys.core[ithCore].ROB_writes; 2808 /* ROB need to be updated in RS based OOO when new values are produced, 2809 * this update may happen before the commit stage when ROB entry is released 2810 * 1. ROB write at instruction inserted in 2811 * 2. ROB write as results produced (for RS based OOO only) 2812 * 3. ROB read as instruction committed. For RS based OOO, data values are read out and sent to ARF 2813 * For Physical reg based OOO, no data stored in ROB, but register tags need to be 2814 * read out and used to set the RRAT and to recycle the register tag to free list buffer 2815 */ 2816 ROB->rtp_stats = ROB->stats_t; 2817 } 2818 2819 } 2820 else if (coredynp.multithreaded) 2821 { 2822 int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions; 2823 int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions; 2824 int_inst_window->stats_t.searchAc.access = 2*(XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions); 2825 int_inst_window->rtp_stats = int_inst_window->stats_t; 2826 } 2827 } 2828 2829 //computation engine 2830 if (coredynp.core_ty==OOO) 2831 { 2832 int_inst_window->power_t.reset(); 2833 fp_inst_window->power_t.reset(); 2834 2835 /* each instruction needs to write to scheduler, read out when all resources and source operands are ready 2836 * two search ops with one for each source operand 2837 * 2838 */ 2839 int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access 2840 + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access 2841 + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access 2842 + int_inst_window->stats_t.readAc.access * instruction_selection->power.readOp.dynamic; 2843 2844 fp_inst_window->power_t.readOp.dynamic += fp_inst_window->local_result.power.readOp.dynamic * fp_inst_window->stats_t.readAc.access 2845 + fp_inst_window->local_result.power.searchOp.dynamic * fp_inst_window->stats_t.searchAc.access 2846 + fp_inst_window->local_result.power.writeOp.dynamic * fp_inst_window->stats_t.writeAc.access 2847 + fp_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic; 2848 2849 if (XML->sys.core[ithCore].ROB_size >0) 2850 { 2851 ROB->power_t.reset(); 2852 ROB->power_t.readOp.dynamic += ROB->local_result.power.readOp.dynamic*ROB->stats_t.readAc.access + 2853 ROB->stats_t.writeAc.access*ROB->local_result.power.writeOp.dynamic; 2854 } 2855 2856 2857 2858 2859 } 2860 else if (coredynp.multithreaded) 2861 { 2862 int_inst_window->power_t.reset(); 2863 int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access 2864 + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access 2865 + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access 2866 + int_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic; 2867 } 2868 2869 //assign values 2870 if (is_tdp) 2871 { 2872 if (coredynp.core_ty==OOO) 2873 { 2874 int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; 2875 fp_inst_window->power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; 2876 power = power + int_inst_window->power + fp_inst_window->power; 2877 if (XML->sys.core[ithCore].ROB_size >0) 2878 { 2879 ROB->power = ROB->power_t + ROB->local_result.power*pppm_lkg; 2880 power = power + ROB->power; 2881 } 2882 2883 } 2884 else if (coredynp.multithreaded) 2885 { 2886 // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); 2887 int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; 2888 power = power + int_inst_window->power; 2889 } 2890 2891 } 2892 else 2893 {//rtp 2894 if (coredynp.core_ty==OOO) 2895 { 2896 int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; 2897 fp_inst_window->rt_power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; 2898 rt_power = rt_power + int_inst_window->rt_power + fp_inst_window->rt_power; 2899 if (XML->sys.core[ithCore].ROB_size >0) 2900 { 2901 ROB->rt_power = ROB->power_t + ROB->local_result.power*pppm_lkg; 2902 rt_power = rt_power + ROB->rt_power; 2903 } 2904 2905 } 2906 else if (coredynp.multithreaded) 2907 { 2908 // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); 2909 int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; 2910 rt_power = rt_power + int_inst_window->rt_power; 2911 } 2912 } 2913// set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); 2914// cout<<"Scheduler power="<<power.readOp.dynamic<<"leakage="<<power.readOp.leakage<<endl; 2915// cout<<"IW="<<int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.readAc.access + 2916// + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access<<"leakage="<<int_inst_window->local_result.power.readOp.leakage<<endl; 2917// cout<<"selection"<<instruction_selection->power.readOp.dynamic<<"leakage"<<instruction_selection->power.readOp.leakage<<endl; 2918} 2919 2920void SchedulerU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) 2921{ 2922 if (!exist) return; 2923 string indent_str(indent, ' '); 2924 string indent_str_next(indent+2, ' '); 2925 bool long_channel = XML->sys.longer_channel_device; 2926 2927 2928 if (is_tdp) 2929 { 2930 if (coredynp.core_ty==OOO) 2931 { 2932 cout << indent_str << "Instruction Window:" << endl; 2933 cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl; 2934 cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl; 2935 cout << indent_str_next << "Subthreshold Leakage = " 2936 << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl; 2937 cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl; 2938 cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl; 2939 cout <<endl; 2940 cout << indent_str << "FP Instruction Window:" << endl; 2941 cout << indent_str_next << "Area = " << fp_inst_window->area.get_area()*1e-6 << " mm^2" << endl; 2942 cout << indent_str_next << "Peak Dynamic = " << fp_inst_window->power.readOp.dynamic*clockRate << " W" << endl; 2943 cout << indent_str_next << "Subthreshold Leakage = " 2944 << (long_channel? fp_inst_window->power.readOp.longer_channel_leakage:fp_inst_window->power.readOp.leakage ) << " W" << endl; 2945 cout << indent_str_next << "Gate Leakage = " << fp_inst_window->power.readOp.gate_leakage << " W" << endl; 2946 cout << indent_str_next << "Runtime Dynamic = " << fp_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl; 2947 cout <<endl; 2948 if (XML->sys.core[ithCore].ROB_size >0) 2949 { 2950 cout << indent_str<<"ROB:" << endl; 2951 cout << indent_str_next << "Area = " << ROB->area.get_area() *1e-6 << " mm^2" << endl; 2952 cout << indent_str_next << "Peak Dynamic = " << ROB->power.readOp.dynamic*clockRate << " W" << endl; 2953 cout << indent_str_next << "Subthreshold Leakage = " 2954 << (long_channel? ROB->power.readOp.longer_channel_leakage:ROB->power.readOp.leakage) << " W" << endl; 2955 cout << indent_str_next << "Gate Leakage = " << ROB->power.readOp.gate_leakage << " W" << endl; 2956 cout << indent_str_next << "Runtime Dynamic = " << ROB->rt_power.readOp.dynamic/executionTime << " W" << endl; 2957 cout <<endl; 2958 } 2959 } 2960 else if (coredynp.multithreaded) 2961 { 2962 cout << indent_str << "Instruction Window:" << endl; 2963 cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl; 2964 cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl; 2965 cout << indent_str_next << "Subthreshold Leakage = " 2966 << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl; 2967 cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl; 2968 cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl; 2969 cout <<endl; 2970 } 2971 } 2972 else 2973 { 2974 if (coredynp.core_ty==OOO) 2975 { 2976 cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl; 2977 cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl; 2978 cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; 2979 cout << indent_str_next << "FP Instruction Window Peak Dynamic = " << fp_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl; 2980 cout << indent_str_next << "FP Instruction Window Subthreshold Leakage = " << fp_inst_window->rt_power.readOp.leakage << " W" << endl; 2981 cout << indent_str_next << "FP Instruction Window Gate Leakage = " << fp_inst_window->rt_power.readOp.gate_leakage << " W" << endl; 2982 if (XML->sys.core[ithCore].ROB_size >0) 2983 { 2984 cout << indent_str_next << "ROB Peak Dynamic = " << ROB->rt_power.readOp.dynamic*clockRate << " W" << endl; 2985 cout << indent_str_next << "ROB Subthreshold Leakage = " << ROB->rt_power.readOp.leakage << " W" << endl; 2986 cout << indent_str_next << "ROB Gate Leakage = " << ROB->rt_power.readOp.gate_leakage << " W" << endl; 2987 } 2988 } 2989 else if (coredynp.multithreaded) 2990 { 2991 cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl; 2992 cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl; 2993 cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; 2994 } 2995 } 2996 2997} 2998 2999void LoadStoreU::computeEnergy(bool is_tdp) 3000{ 3001 if (!exist) return; 3002 if (is_tdp) 3003 { 3004 //init stats for Peak 3005 dcache.caches->stats_t.readAc.access = 0.67*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle; 3006 dcache.caches->stats_t.readAc.miss = 0; 3007 dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss; 3008 dcache.caches->stats_t.writeAc.access = 0.33*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle; 3009 dcache.caches->stats_t.writeAc.miss = 0; 3010 dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss; 3011 dcache.caches->tdp_stats = dcache.caches->stats_t; 3012 3013 dcache.missb->stats_t.readAc.access = dcache.missb->l_ip.num_search_ports; 3014 dcache.missb->stats_t.writeAc.access = dcache.missb->l_ip.num_search_ports; 3015 dcache.missb->tdp_stats = dcache.missb->stats_t; 3016 3017 dcache.ifb->stats_t.readAc.access = dcache.ifb->l_ip.num_search_ports; 3018 dcache.ifb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports; 3019 dcache.ifb->tdp_stats = dcache.ifb->stats_t; 3020 3021 dcache.prefetchb->stats_t.readAc.access = dcache.prefetchb->l_ip.num_search_ports; 3022 dcache.prefetchb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports; 3023 dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t; 3024 if (cache_p==Write_back) 3025 { 3026 dcache.wbb->stats_t.readAc.access = dcache.wbb->l_ip.num_search_ports; 3027 dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports; 3028 dcache.wbb->tdp_stats = dcache.wbb->stats_t; 3029 } 3030 3031 LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = LSQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle; 3032 LSQ->tdp_stats = LSQ->stats_t; 3033 if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) 3034 { 3035 LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = LoadQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle; 3036 LoadQ->tdp_stats = LoadQ->stats_t; 3037 } 3038 } 3039 else 3040 { 3041 //init stats for Runtime Dynamic (RTP) 3042 dcache.caches->stats_t.readAc.access = XML->sys.core[ithCore].dcache.read_accesses; 3043 dcache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].dcache.read_misses; 3044 dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss; 3045 dcache.caches->stats_t.writeAc.access = XML->sys.core[ithCore].dcache.write_accesses; 3046 dcache.caches->stats_t.writeAc.miss = XML->sys.core[ithCore].dcache.write_misses; 3047 dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss; 3048 dcache.caches->rtp_stats = dcache.caches->stats_t; 3049 3050 if (cache_p==Write_back) 3051 { 3052 dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; 3053 dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; 3054 dcache.missb->rtp_stats = dcache.missb->stats_t; 3055 3056 dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; 3057 dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; 3058 dcache.ifb->rtp_stats = dcache.ifb->stats_t; 3059 3060 dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; 3061 dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; 3062 dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; 3063 3064 dcache.wbb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; 3065 dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; 3066 dcache.wbb->rtp_stats = dcache.wbb->stats_t; 3067 } 3068 else 3069 { 3070 dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; 3071 dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; 3072 dcache.missb->rtp_stats = dcache.missb->stats_t; 3073 3074 dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; 3075 dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; 3076 dcache.ifb->rtp_stats = dcache.ifb->stats_t; 3077 3078 dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; 3079 dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; 3080 dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; 3081 } 3082 3083 LSQ->stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;//flush overhead considered 3084 LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2; 3085 LSQ->rtp_stats = LSQ->stats_t; 3086 3087 if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) 3088 { 3089 LoadQ->stats_t.readAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions; 3090 LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions; 3091 LoadQ->rtp_stats = LoadQ->stats_t; 3092 } 3093 3094 } 3095 3096 dcache.power_t.reset(); 3097 LSQ->power_t.reset(); 3098 dcache.power_t.readOp.dynamic += (dcache.caches->stats_t.readAc.hit*dcache.caches->local_result.power.readOp.dynamic+ 3099 dcache.caches->stats_t.readAc.miss*dcache.caches->local_result.power.readOp.dynamic+ 3100 dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.tag_array2->power.readOp.dynamic+ 3101 dcache.caches->stats_t.writeAc.access*dcache.caches->local_result.power.writeOp.dynamic); 3102 3103 if (cache_p==Write_back) 3104 {//write miss will generate a write later 3105 dcache.power_t.readOp.dynamic += dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.power.writeOp.dynamic; 3106 } 3107 3108 dcache.power_t.readOp.dynamic += dcache.missb->stats_t.readAc.access*dcache.missb->local_result.power.searchOp.dynamic + 3109 dcache.missb->stats_t.writeAc.access*dcache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write 3110 dcache.power_t.readOp.dynamic += dcache.ifb->stats_t.readAc.access*dcache.ifb->local_result.power.searchOp.dynamic + 3111 dcache.ifb->stats_t.writeAc.access*dcache.ifb->local_result.power.writeOp.dynamic; 3112 dcache.power_t.readOp.dynamic += dcache.prefetchb->stats_t.readAc.access*dcache.prefetchb->local_result.power.searchOp.dynamic + 3113 dcache.prefetchb->stats_t.writeAc.access*dcache.prefetchb->local_result.power.writeOp.dynamic; 3114 if (cache_p==Write_back) 3115 { 3116 dcache.power_t.readOp.dynamic += dcache.wbb->stats_t.readAc.access*dcache.wbb->local_result.power.searchOp.dynamic 3117 + dcache.wbb->stats_t.writeAc.access*dcache.wbb->local_result.power.writeOp.dynamic; 3118 } 3119 3120 if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) 3121 { 3122 LoadQ->power_t.reset(); 3123 LoadQ->power_t.readOp.dynamic += LoadQ->stats_t.readAc.access*(LoadQ->local_result.power.searchOp.dynamic+ LoadQ->local_result.power.readOp.dynamic)+ 3124 LoadQ->stats_t.writeAc.access*LoadQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LoadQ 3125 3126 LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic) 3127 + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ 3128 3129 } 3130 else 3131 { 3132 LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic) 3133 + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ 3134 3135 } 3136 3137 if (is_tdp) 3138 { 3139// dcache.power = dcache.power_t + (dcache.caches->local_result.power)*pppm_lkg + 3140// (dcache.missb->local_result.power + 3141// dcache.ifb->local_result.power + 3142// dcache.prefetchb->local_result.power + 3143// dcache.wbb->local_result.power)*pppm_Isub; 3144 dcache.power = dcache.power_t + (dcache.caches->local_result.power + 3145 dcache.missb->local_result.power + 3146 dcache.ifb->local_result.power + 3147 dcache.prefetchb->local_result.power) *pppm_lkg; 3148 if (cache_p==Write_back) 3149 { 3150 dcache.power = dcache.power + dcache.wbb->local_result.power*pppm_lkg; 3151 } 3152 3153 LSQ->power = LSQ->power_t + LSQ->local_result.power *pppm_lkg; 3154 power = power + dcache.power + LSQ->power; 3155 3156 if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) 3157 { 3158 LoadQ->power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg; 3159 power = power + LoadQ->power; 3160 } 3161 } 3162 else 3163 { 3164// dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power + 3165// dcache.missb->local_result.power + 3166// dcache.ifb->local_result.power + 3167// dcache.prefetchb->local_result.power + 3168// dcache.wbb->local_result.power)*pppm_lkg; 3169 dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power + 3170 dcache.missb->local_result.power + 3171 dcache.ifb->local_result.power + 3172 dcache.prefetchb->local_result.power )*pppm_lkg; 3173 3174 if (cache_p==Write_back) 3175 { 3176 dcache.rt_power = dcache.rt_power + dcache.wbb->local_result.power*pppm_lkg; 3177 } 3178 3179 LSQ->rt_power = LSQ->power_t + LSQ->local_result.power *pppm_lkg; 3180 rt_power = rt_power + dcache.rt_power + LSQ->rt_power; 3181 3182 if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) 3183 { 3184 LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg; 3185 rt_power = rt_power + LoadQ->rt_power; 3186 } 3187 } 3188} 3189 3190 3191void LoadStoreU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) 3192{ 3193 if (!exist) return; 3194 string indent_str(indent, ' '); 3195 string indent_str_next(indent+2, ' '); 3196 bool long_channel = XML->sys.longer_channel_device; 3197 3198 3199 if (is_tdp) 3200 { 3201 cout << indent_str << "Data Cache:" << endl; 3202 cout << indent_str_next << "Area = " << dcache.area.get_area()*1e-6<< " mm^2" << endl; 3203 cout << indent_str_next << "Peak Dynamic = " << dcache.power.readOp.dynamic*clockRate << " W" << endl; 3204 cout << indent_str_next << "Subthreshold Leakage = " 3205 << (long_channel? dcache.power.readOp.longer_channel_leakage:dcache.power.readOp.leakage )<<" W" << endl; 3206 cout << indent_str_next << "Gate Leakage = " << dcache.power.readOp.gate_leakage << " W" << endl; 3207 cout << indent_str_next << "Runtime Dynamic = " << dcache.rt_power.readOp.dynamic/executionTime << " W" << endl; 3208 cout <<endl; 3209 if (coredynp.core_ty==Inorder) 3210 { 3211 cout << indent_str << "Load/Store Queue:" << endl; 3212 cout << indent_str_next << "Area = " << LSQ->area.get_area()*1e-6 << " mm^2" << endl; 3213 cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate << " W" << endl; 3214 cout << indent_str_next << "Subthreshold Leakage = " 3215 << (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage) << " W" << endl; 3216 cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" << endl; 3217 cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime << " W" << endl; 3218 cout <<endl; 3219 } 3220 else 3221 3222 { 3223 if (XML->sys.core[ithCore].load_buffer_size >0) 3224 { 3225 cout << indent_str << "LoadQ:" << endl; 3226 cout << indent_str_next << "Area = " << LoadQ->area.get_area() *1e-6 << " mm^2" << endl; 3227 cout << indent_str_next << "Peak Dynamic = " << LoadQ->power.readOp.dynamic*clockRate << " W" << endl; 3228 cout << indent_str_next << "Subthreshold Leakage = " 3229 << (long_channel? LoadQ->power.readOp.longer_channel_leakage:LoadQ->power.readOp.leakage) << " W" << endl; 3230 cout << indent_str_next << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage << " W" << endl; 3231 cout << indent_str_next << "Runtime Dynamic = " << LoadQ->rt_power.readOp.dynamic/executionTime << " W" << endl; 3232 cout <<endl; 3233 } 3234 cout << indent_str<< "StoreQ:" << endl; 3235 cout << indent_str_next << "Area = " << LSQ->area.get_area() *1e-6<< " mm^2" << endl; 3236 cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate << " W" << endl; 3237 cout << indent_str_next << "Subthreshold Leakage = " 3238 << (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage) << " W" << endl; 3239 cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" << endl; 3240 cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime<< " W" << endl; 3241 cout <<endl; 3242 } 3243 } 3244 else 3245 { 3246 cout << indent_str_next << "Data Cache Peak Dynamic = " << dcache.rt_power.readOp.dynamic*clockRate << " W" << endl; 3247 cout << indent_str_next << "Data Cache Subthreshold Leakage = " << dcache.rt_power.readOp.leakage <<" W" << endl; 3248 cout << indent_str_next << "Data Cache Gate Leakage = " << dcache.rt_power.readOp.gate_leakage << " W" << endl; 3249 if (coredynp.core_ty==Inorder) 3250 { 3251 cout << indent_str_next << "Load/Store Queue Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate << " W" << endl; 3252 cout << indent_str_next << "Load/Store Queue Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage << " W" << endl; 3253 cout << indent_str_next << "Load/Store Queue Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage << " W" << endl; 3254 } 3255 else 3256 { 3257 cout << indent_str_next << "LoadQ Peak Dynamic = " << LoadQ->rt_power.readOp.dynamic*clockRate << " W" << endl; 3258 cout << indent_str_next << "LoadQ Subthreshold Leakage = " << LoadQ->rt_power.readOp.leakage << " W" << endl; 3259 cout << indent_str_next << "LoadQ Gate Leakage = " << LoadQ->rt_power.readOp.gate_leakage << " W" << endl; 3260 cout << indent_str_next << "StoreQ Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate << " W" << endl; 3261 cout << indent_str_next << "StoreQ Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage << " W" << endl; 3262 cout << indent_str_next << "StoreQ Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage << " W" << endl; 3263 } 3264 } 3265 3266} 3267 3268void MemManU::computeEnergy(bool is_tdp) 3269{ 3270 3271 if (!exist) return; 3272 if (is_tdp) 3273 { 3274 //init stats for Peak 3275 itlb->stats_t.readAc.access = itlb->l_ip.num_search_ports; 3276 itlb->stats_t.readAc.miss = 0; 3277 itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; 3278 itlb->tdp_stats = itlb->stats_t; 3279 3280 dtlb->stats_t.readAc.access = dtlb->l_ip.num_search_ports*coredynp.LSU_duty_cycle; 3281 dtlb->stats_t.readAc.miss = 0; 3282 dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; 3283 dtlb->tdp_stats = dtlb->stats_t; 3284 } 3285 else 3286 { 3287 //init stats for Runtime Dynamic (RTP) 3288 itlb->stats_t.readAc.access = XML->sys.core[ithCore].itlb.total_accesses; 3289 itlb->stats_t.readAc.miss = XML->sys.core[ithCore].itlb.total_misses; 3290 itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; 3291 itlb->rtp_stats = itlb->stats_t; 3292 3293 dtlb->stats_t.readAc.access = XML->sys.core[ithCore].dtlb.total_accesses; 3294 dtlb->stats_t.readAc.miss = XML->sys.core[ithCore].dtlb.total_misses; 3295 dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; 3296 dtlb->rtp_stats = dtlb->stats_t; 3297 } 3298 3299 itlb->power_t.reset(); 3300 dtlb->power_t.reset(); 3301 itlb->power_t.readOp.dynamic += itlb->stats_t.readAc.access*itlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits 3302 +itlb->stats_t.readAc.miss*itlb->local_result.power.writeOp.dynamic; 3303 dtlb->power_t.readOp.dynamic += dtlb->stats_t.readAc.access*dtlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits 3304 +dtlb->stats_t.readAc.miss*dtlb->local_result.power.writeOp.dynamic; 3305 3306 if (is_tdp) 3307 { 3308 itlb->power = itlb->power_t + itlb->local_result.power *pppm_lkg; 3309 dtlb->power = dtlb->power_t + dtlb->local_result.power *pppm_lkg; 3310 power = power + itlb->power + dtlb->power; 3311 } 3312 else 3313 { 3314 itlb->rt_power = itlb->power_t + itlb->local_result.power *pppm_lkg; 3315 dtlb->rt_power = dtlb->power_t + dtlb->local_result.power *pppm_lkg; 3316 rt_power = rt_power + itlb->rt_power + dtlb->rt_power; 3317 } 3318} 3319 3320void MemManU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) 3321{ 3322 if (!exist) return; 3323 string indent_str(indent, ' '); 3324 string indent_str_next(indent+2, ' '); 3325 bool long_channel = XML->sys.longer_channel_device; 3326 3327 3328 3329 3330 if (is_tdp) 3331 { 3332 cout << indent_str << "Itlb:" << endl; 3333 cout << indent_str_next << "Area = " << itlb->area.get_area()*1e-6<< " mm^2" << endl; 3334 cout << indent_str_next << "Peak Dynamic = " << itlb->power.readOp.dynamic*clockRate << " W" << endl; 3335 cout << indent_str_next << "Subthreshold Leakage = " 3336 << (long_channel? itlb->power.readOp.longer_channel_leakage:itlb->power.readOp.leakage) <<" W" << endl; 3337 cout << indent_str_next << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" << endl; 3338 cout << indent_str_next << "Runtime Dynamic = " << itlb->rt_power.readOp.dynamic/executionTime << " W" << endl; 3339 cout <<endl; 3340 cout << indent_str<< "Dtlb:" << endl; 3341 cout << indent_str_next << "Area = " << dtlb->area.get_area()*1e-6 << " mm^2" << endl; 3342 cout << indent_str_next << "Peak Dynamic = " << dtlb->power.readOp.dynamic*clockRate << " W" << endl; 3343 cout << indent_str_next << "Subthreshold Leakage = " 3344 << (long_channel? dtlb->power.readOp.longer_channel_leakage:dtlb->power.readOp.leakage) << " W" << endl; 3345 cout << indent_str_next << "Gate Leakage = " << dtlb->power.readOp.gate_leakage << " W" << endl; 3346 cout << indent_str_next << "Runtime Dynamic = " << dtlb->rt_power.readOp.dynamic/executionTime << " W" << endl; 3347 cout <<endl; 3348 } 3349 else 3350 { 3351 cout << indent_str_next << "Itlb Peak Dynamic = " << itlb->rt_power.readOp.dynamic*clockRate << " W" << endl; 3352 cout << indent_str_next << "Itlb Subthreshold Leakage = " << itlb->rt_power.readOp.leakage <<" W" << endl; 3353 cout << indent_str_next << "Itlb Gate Leakage = " << itlb->rt_power.readOp.gate_leakage << " W" << endl; 3354 cout << indent_str_next << "Dtlb Peak Dynamic = " << dtlb->rt_power.readOp.dynamic*clockRate << " W" << endl; 3355 cout << indent_str_next << "Dtlb Subthreshold Leakage = " << dtlb->rt_power.readOp.leakage << " W" << endl; 3356 cout << indent_str_next << "Dtlb Gate Leakage = " << dtlb->rt_power.readOp.gate_leakage << " W" << endl; 3357 } 3358 3359} 3360 3361void RegFU::computeEnergy(bool is_tdp) 3362{ 3363/* 3364 * Architecture RF and physical RF cannot be present at the same time. 3365 * Therefore, the RF stats can only refer to either ARF or PRF; 3366 * And the same stats can be used for both. 3367 */ 3368 if (!exist) return; 3369 if (is_tdp) 3370 { 3371 //init stats for Peak 3372 IRF->stats_t.readAc.access = coredynp.issueW*2*(coredynp.ALU_duty_cycle*1.1+ 3373 (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines; 3374 IRF->stats_t.writeAc.access = coredynp.issueW*(coredynp.ALU_duty_cycle*1.1+ 3375 (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines; 3376 //Rule of Thumb: about 10% RF related instructions do not need to access ALUs 3377 IRF->tdp_stats = IRF->stats_t; 3378 3379 FRF->stats_t.readAc.access = FRF->l_ip.num_rd_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines; 3380 FRF->stats_t.writeAc.access = FRF->l_ip.num_wr_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines; 3381 FRF->tdp_stats = FRF->stats_t; 3382 if (coredynp.regWindowing) 3383 { 3384 RFWIN->stats_t.readAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports; 3385 RFWIN->stats_t.writeAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports; 3386 RFWIN->tdp_stats = RFWIN->stats_t; 3387 } 3388 } 3389 else 3390 { 3391 //init stats for Runtime Dynamic (RTP) 3392 IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads;//TODO: no diff on archi and phy 3393 IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes; 3394 IRF->rtp_stats = IRF->stats_t; 3395 3396 FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads; 3397 FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes; 3398 FRF->rtp_stats = FRF->stats_t; 3399 if (coredynp.regWindowing) 3400 { 3401 RFWIN->stats_t.readAc.access = XML->sys.core[ithCore].function_calls*16; 3402 RFWIN->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls*16; 3403 RFWIN->rtp_stats = RFWIN->stats_t; 3404 3405 IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads + 3406 XML->sys.core[ithCore].function_calls*16; 3407 IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes + 3408 XML->sys.core[ithCore].function_calls*16; 3409 IRF->rtp_stats = IRF->stats_t; 3410 3411 FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads + 3412 XML->sys.core[ithCore].function_calls*16;; 3413 FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes+ 3414 XML->sys.core[ithCore].function_calls*16;; 3415 FRF->rtp_stats = FRF->stats_t; 3416 } 3417 } 3418 IRF->power_t.reset(); 3419 FRF->power_t.reset(); 3420 IRF->power_t.readOp.dynamic += (IRF->stats_t.readAc.access*IRF->local_result.power.readOp.dynamic 3421 +IRF->stats_t.writeAc.access*IRF->local_result.power.writeOp.dynamic); 3422 FRF->power_t.readOp.dynamic += (FRF->stats_t.readAc.access*FRF->local_result.power.readOp.dynamic 3423 +FRF->stats_t.writeAc.access*FRF->local_result.power.writeOp.dynamic); 3424 if (coredynp.regWindowing) 3425 { 3426 RFWIN->power_t.reset(); 3427 RFWIN->power_t.readOp.dynamic += (RFWIN->stats_t.readAc.access*RFWIN->local_result.power.readOp.dynamic + 3428 RFWIN->stats_t.writeAc.access*RFWIN->local_result.power.writeOp.dynamic); 3429 } 3430 3431 if (is_tdp) 3432 { 3433 IRF->power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread; 3434 FRF->power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread; 3435 power = power + (IRF->power + FRF->power); 3436 if (coredynp.regWindowing) 3437 { 3438 RFWIN->power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg; 3439 power = power + RFWIN->power; 3440 } 3441 } 3442 else 3443 { 3444 IRF->rt_power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread; 3445 FRF->rt_power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread; 3446 rt_power = rt_power + (IRF->power_t + FRF->power_t); 3447 if (coredynp.regWindowing) 3448 { 3449 RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg; 3450 rt_power = rt_power + RFWIN->rt_power; 3451 } 3452 } 3453} 3454 3455 3456void RegFU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) 3457{ 3458 if (!exist) return; 3459 string indent_str(indent, ' '); 3460 string indent_str_next(indent+2, ' '); 3461 bool long_channel = XML->sys.longer_channel_device; 3462 3463 if (is_tdp) 3464 { cout << indent_str << "Integer RF:" << endl; 3465 cout << indent_str_next << "Area = " << IRF->area.get_area()*1e-6<< " mm^2" << endl; 3466 cout << indent_str_next << "Peak Dynamic = " << IRF->power.readOp.dynamic*clockRate << " W" << endl; 3467 cout << indent_str_next << "Subthreshold Leakage = " 3468 << (long_channel? IRF->power.readOp.longer_channel_leakage:IRF->power.readOp.leakage) <<" W" << endl; 3469 cout << indent_str_next << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl; 3470 cout << indent_str_next << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic/executionTime << " W" << endl; 3471 cout <<endl; 3472 cout << indent_str<< "Floating Point RF:" << endl; 3473 cout << indent_str_next << "Area = " << FRF->area.get_area()*1e-6 << " mm^2" << endl; 3474 cout << indent_str_next << "Peak Dynamic = " << FRF->power.readOp.dynamic*clockRate << " W" << endl; 3475 cout << indent_str_next << "Subthreshold Leakage = " 3476 << (long_channel? FRF->power.readOp.longer_channel_leakage:FRF->power.readOp.leakage) << " W" << endl; 3477 cout << indent_str_next << "Gate Leakage = " << FRF->power.readOp.gate_leakage << " W" << endl; 3478 cout << indent_str_next << "Runtime Dynamic = " << FRF->rt_power.readOp.dynamic/executionTime << " W" << endl; 3479 cout <<endl; 3480 if (coredynp.regWindowing) 3481 { 3482 cout << indent_str << "Register Windows:" << endl; 3483 cout << indent_str_next << "Area = " << RFWIN->area.get_area() *1e-6 << " mm^2" << endl; 3484 cout << indent_str_next << "Peak Dynamic = " << RFWIN->power.readOp.dynamic*clockRate << " W" << endl; 3485 cout << indent_str_next << "Subthreshold Leakage = " 3486 << (long_channel? RFWIN->power.readOp.longer_channel_leakage:RFWIN->power.readOp.leakage) << " W" << endl; 3487 cout << indent_str_next << "Gate Leakage = " << RFWIN->power.readOp.gate_leakage << " W" << endl; 3488 cout << indent_str_next << "Runtime Dynamic = " << RFWIN->rt_power.readOp.dynamic/executionTime << " W" << endl; 3489 cout <<endl; 3490 } 3491 } 3492 else 3493 { 3494 cout << indent_str_next << "Integer RF Peak Dynamic = " << IRF->rt_power.readOp.dynamic*clockRate << " W" << endl; 3495 cout << indent_str_next << "Integer RF Subthreshold Leakage = " << IRF->rt_power.readOp.leakage <<" W" << endl; 3496 cout << indent_str_next << "Integer RF Gate Leakage = " << IRF->rt_power.readOp.gate_leakage << " W" << endl; 3497 cout << indent_str_next << "Floating Point RF Peak Dynamic = " << FRF->rt_power.readOp.dynamic*clockRate << " W" << endl; 3498 cout << indent_str_next << "Floating Point RF Subthreshold Leakage = " << FRF->rt_power.readOp.leakage << " W" << endl; 3499 cout << indent_str_next << "Floating Point RF Gate Leakage = " << FRF->rt_power.readOp.gate_leakage << " W" << endl; 3500 if (coredynp.regWindowing) 3501 { 3502 cout << indent_str_next << "Register Windows Peak Dynamic = " << RFWIN->rt_power.readOp.dynamic*clockRate << " W" << endl; 3503 cout << indent_str_next << "Register Windows Subthreshold Leakage = " << RFWIN->rt_power.readOp.leakage << " W" << endl; 3504 cout << indent_str_next << "Register Windows Gate Leakage = " << RFWIN->rt_power.readOp.gate_leakage << " W" << endl; 3505 } 3506 } 3507} 3508 3509 3510void EXECU::computeEnergy(bool is_tdp) 3511{ 3512 if (!exist) return; 3513 double pppm_t[4] = {1,1,1,1}; 3514// rfu->power.reset(); 3515// rfu->rt_power.reset(); 3516// scheu->power.reset(); 3517// scheu->rt_power.reset(); 3518// exeu->power.reset(); 3519// exeu->rt_power.reset(); 3520 3521 rfu->computeEnergy(is_tdp); 3522 scheu->computeEnergy(is_tdp); 3523 exeu->computeEnergy(is_tdp); 3524 if (coredynp.num_fpus >0) 3525 { 3526 fp_u->computeEnergy(is_tdp); 3527 } 3528 if (coredynp.num_muls >0) 3529 { 3530 mul->computeEnergy(is_tdp); 3531 } 3532 3533 if (is_tdp) 3534 { 3535 set_pppm(pppm_t, 2*coredynp.ALU_cdb_duty_cycle, 2, 2, 2*coredynp.ALU_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction. 3536 bypass.power = bypass.power + intTagBypass->power*pppm_t + int_bypass->power*pppm_t; 3537 if (coredynp.num_muls >0) 3538 { 3539 set_pppm(pppm_t, 2*coredynp.MUL_cdb_duty_cycle, 2, 2, 2*coredynp.MUL_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction. 3540 bypass.power = bypass.power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t; 3541 power = power + mul->power; 3542 } 3543 if (coredynp.num_fpus>0) 3544 { 3545 set_pppm(pppm_t, 3*coredynp.FPU_cdb_duty_cycle, 3, 3, 3*coredynp.FPU_cdb_duty_cycle);//3 means three source operands needs to be passed for each fp instruction. 3546 bypass.power = bypass.power + fp_bypass->power*pppm_t + fpTagBypass->power*pppm_t ; 3547 power = power + fp_u->power; 3548 } 3549 3550 power = power + rfu->power + exeu->power + bypass.power + scheu->power; 3551 } 3552 else 3553 { 3554 set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, 2, 2, XML->sys.core[ithCore].cdb_alu_accesses); 3555 bypass.rt_power = bypass.rt_power + intTagBypass->power*pppm_t; 3556 bypass.rt_power = bypass.rt_power + int_bypass->power*pppm_t; 3557 3558 if (coredynp.num_muls >0) 3559 { 3560 set_pppm(pppm_t, XML->sys.core[ithCore].cdb_mul_accesses, 2, 2, XML->sys.core[ithCore].cdb_mul_accesses);//2 means two source operands needs to be passed for each int instruction. 3561 bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t; 3562 rt_power = rt_power + mul->rt_power; 3563 } 3564 3565 if (coredynp.num_fpus>0) 3566 { 3567 set_pppm(pppm_t, XML->sys.core[ithCore].cdb_fpu_accesses, 3, 3, XML->sys.core[ithCore].cdb_fpu_accesses); 3568 bypass.rt_power = bypass.rt_power + fp_bypass->power*pppm_t; 3569 bypass.rt_power = bypass.rt_power + fpTagBypass->power*pppm_t; 3570 rt_power = rt_power + fp_u->rt_power; 3571 } 3572 rt_power = rt_power + rfu->rt_power + exeu->rt_power + bypass.rt_power + scheu->rt_power; 3573 } 3574} 3575 3576void EXECU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) 3577{ 3578 if (!exist) return; 3579 string indent_str(indent, ' '); 3580 string indent_str_next(indent+2, ' '); 3581 bool long_channel = XML->sys.longer_channel_device; 3582 3583 3584// cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl; 3585 if (is_tdp) 3586 { 3587 cout << indent_str << "Register Files:" << endl; 3588 cout << indent_str_next << "Area = " << rfu->area.get_area()*1e-6<< " mm^2" << endl; 3589 cout << indent_str_next << "Peak Dynamic = " << rfu->power.readOp.dynamic*clockRate << " W" << endl; 3590 cout << indent_str_next << "Subthreshold Leakage = " 3591 << (long_channel? rfu->power.readOp.longer_channel_leakage:rfu->power.readOp.leakage) <<" W" << endl; 3592 cout << indent_str_next << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl; 3593 cout << indent_str_next << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic/executionTime << " W" << endl; 3594 cout <<endl; 3595 if (plevel>3){ 3596 rfu->displayEnergy(indent+4,is_tdp); 3597 } 3598 cout << indent_str << "Instruction Scheduler:" << endl; 3599 cout << indent_str_next << "Area = " << scheu->area.get_area()*1e-6 << " mm^2" << endl; 3600 cout << indent_str_next << "Peak Dynamic = " << scheu->power.readOp.dynamic*clockRate << " W" << endl; 3601 cout << indent_str_next << "Subthreshold Leakage = " 3602 << (long_channel? scheu->power.readOp.longer_channel_leakage:scheu->power.readOp.leakage) << " W" << endl; 3603 cout << indent_str_next << "Gate Leakage = " << scheu->power.readOp.gate_leakage << " W" << endl; 3604 cout << indent_str_next << "Runtime Dynamic = " << scheu->rt_power.readOp.dynamic/executionTime << " W" << endl; 3605 cout <<endl; 3606 if (plevel>3){ 3607 scheu->displayEnergy(indent+4,is_tdp); 3608 } 3609 exeu->displayEnergy(indent,is_tdp); 3610 if (coredynp.num_fpus>0) 3611 { 3612 fp_u->displayEnergy(indent,is_tdp); 3613 } 3614 if (coredynp.num_muls >0) 3615 { 3616 mul->displayEnergy(indent,is_tdp); 3617 } 3618 cout << indent_str << "Results Broadcast Bus:" << endl; 3619 cout << indent_str_next << "Area Overhead = " << bypass.area.get_area()*1e-6 << " mm^2" << endl; 3620 cout << indent_str_next << "Peak Dynamic = " << bypass.power.readOp.dynamic*clockRate << " W" << endl; 3621 cout << indent_str_next << "Subthreshold Leakage = " 3622 << (long_channel? bypass.power.readOp.longer_channel_leakage:bypass.power.readOp.leakage ) << " W" << endl; 3623 cout << indent_str_next << "Gate Leakage = " << bypass.power.readOp.gate_leakage << " W" << endl; 3624 cout << indent_str_next << "Runtime Dynamic = " << bypass.rt_power.readOp.dynamic/executionTime << " W" << endl; 3625 cout <<endl; 3626 } 3627 else 3628 { 3629 cout << indent_str_next << "Register Files Peak Dynamic = " << rfu->rt_power.readOp.dynamic*clockRate << " W" << endl; 3630 cout << indent_str_next << "Register Files Subthreshold Leakage = " << rfu->rt_power.readOp.leakage <<" W" << endl; 3631 cout << indent_str_next << "Register Files Gate Leakage = " << rfu->rt_power.readOp.gate_leakage << " W" << endl; 3632 cout << indent_str_next << "Instruction Sheduler Peak Dynamic = " << scheu->rt_power.readOp.dynamic*clockRate << " W" << endl; 3633 cout << indent_str_next << "Instruction Sheduler Subthreshold Leakage = " << scheu->rt_power.readOp.leakage << " W" << endl; 3634 cout << indent_str_next << "Instruction Sheduler Gate Leakage = " << scheu->rt_power.readOp.gate_leakage << " W" << endl; 3635 cout << indent_str_next << "Results Broadcast Bus Peak Dynamic = " << bypass.rt_power.readOp.dynamic*clockRate << " W" << endl; 3636 cout << indent_str_next << "Results Broadcast Bus Subthreshold Leakage = " << bypass.rt_power.readOp.leakage << " W" << endl; 3637 cout << indent_str_next << "Results Broadcast Bus Gate Leakage = " << bypass.rt_power.readOp.gate_leakage << " W" << endl; 3638 } 3639 3640} 3641 3642void Core::computeEnergy(bool is_tdp) 3643{ 3644 //power_point_product_masks 3645 double pppm_t[4] = {1,1,1,1}; 3646 double rtp_pipeline_coe; 3647 double num_units = 4.0; 3648 if (is_tdp) 3649 { 3650 ifu->computeEnergy(is_tdp); 3651 lsu->computeEnergy(is_tdp); 3652 mmu->computeEnergy(is_tdp); 3653 exu->computeEnergy(is_tdp); 3654 3655 if (coredynp.core_ty==OOO) 3656 { 3657 num_units = 5.0; 3658 rnu->computeEnergy(is_tdp); 3659 set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); 3660 if (rnu->exist) 3661 { 3662 rnu->power = rnu->power + corepipe->power*pppm_t; 3663 power = power + rnu->power; 3664 } 3665 } 3666 3667 if (ifu->exist) 3668 { 3669 set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.IFU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); 3670// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; 3671 ifu->power = ifu->power + corepipe->power*pppm_t; 3672// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; 3673// cout << "1/4 pipe = " << corepipe->power.readOp.dynamic*clockRate/num_units << " W" << endl; 3674 power = power + ifu->power; 3675// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; 3676 } 3677 if (lsu->exist) 3678 { 3679 set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.LSU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); 3680 lsu->power = lsu->power + corepipe->power*pppm_t; 3681// cout << "LSU = " << lsu->power.readOp.dynamic*clockRate << " W" << endl; 3682 power = power + lsu->power; 3683// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; 3684 } 3685 if (exu->exist) 3686 { 3687 set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.ALU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); 3688 exu->power = exu->power + corepipe->power*pppm_t; 3689// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl; 3690 power = power + exu->power; 3691// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; 3692 } 3693 if (mmu->exist) 3694 { 3695 set_pppm(pppm_t, coredynp.num_pipelines/num_units*(0.5+0.5*coredynp.LSU_duty_cycle), coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); 3696 mmu->power = mmu->power + corepipe->power*pppm_t; 3697// cout << "MMU = " << mmu->power.readOp.dynamic*clockRate << " W" << endl; 3698 power = power + mmu->power; 3699// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; 3700 } 3701 3702 power = power + undiffCore->power; 3703 3704 if (XML->sys.Private_L2) 3705 { 3706 3707 l2cache->computeEnergy(is_tdp); 3708 set_pppm(pppm_t,l2cache->cachep.clockRate/clockRate, 1,1,1); 3709 //l2cache->power = l2cache->power*pppm_t; 3710 power = power + l2cache->power*pppm_t; 3711 } 3712 } 3713 else 3714 { 3715 ifu->computeEnergy(is_tdp); 3716 lsu->computeEnergy(is_tdp); 3717 mmu->computeEnergy(is_tdp); 3718 exu->computeEnergy(is_tdp); 3719 if (coredynp.core_ty==OOO) 3720 { 3721 num_units = 5.0; 3722 rnu->computeEnergy(is_tdp); 3723 set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); 3724 if (rnu->exist) 3725 { 3726 rnu->rt_power = rnu->rt_power + corepipe->power*pppm_t; 3727 3728 rt_power = rt_power + rnu->rt_power; 3729 } 3730 } 3731 else 3732 { 3733 if (XML->sys.homogeneous_cores==1) 3734 { 3735 rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores; 3736 } 3737 else 3738 { 3739 rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles; 3740 } 3741 set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); 3742 } 3743 3744 if (ifu->exist) 3745 { 3746 ifu->rt_power = ifu->rt_power + corepipe->power*pppm_t; 3747 rt_power = rt_power + ifu->rt_power ; 3748 } 3749 if (lsu->exist) 3750 { 3751 lsu->rt_power = lsu->rt_power + corepipe->power*pppm_t; 3752 rt_power = rt_power + lsu->rt_power; 3753 } 3754 if (exu->exist) 3755 { 3756 exu->rt_power = exu->rt_power + corepipe->power*pppm_t; 3757 rt_power = rt_power + exu->rt_power; 3758 } 3759 if (mmu->exist) 3760 { 3761 mmu->rt_power = mmu->rt_power + corepipe->power*pppm_t; 3762 rt_power = rt_power + mmu->rt_power ; 3763 } 3764 3765 rt_power = rt_power + undiffCore->power; 3766// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl; 3767 if (XML->sys.Private_L2) 3768 { 3769 3770 l2cache->computeEnergy(is_tdp); 3771 //set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1); 3772 //l2cache->rt_power = l2cache->rt_power*pppm_t; 3773 rt_power = rt_power + l2cache->rt_power; 3774 } 3775 } 3776 3777} 3778 3779void Core::displayEnergy(uint32_t indent,int plevel,bool is_tdp) 3780{ 3781 string indent_str(indent, ' '); 3782 string indent_str_next(indent+2, ' '); 3783 bool long_channel = XML->sys.longer_channel_device; 3784 if (is_tdp) 3785 { 3786 cout << "Core:" << endl; 3787 cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; 3788 cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl; 3789 cout << indent_str << "Subthreshold Leakage = " 3790 << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; 3791 //cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; 3792 cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; 3793 cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl; 3794 cout<<endl; 3795 if (ifu->exist) 3796 { 3797 cout << indent_str << "Instruction Fetch Unit:" << endl; 3798 cout << indent_str_next << "Area = " << ifu->area.get_area()*1e-6<< " mm^2" << endl; 3799 cout << indent_str_next << "Peak Dynamic = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; 3800 cout << indent_str_next << "Subthreshold Leakage = " 3801 << (long_channel? ifu->power.readOp.longer_channel_leakage:ifu->power.readOp.leakage) <<" W" << endl; 3802 //cout << indent_str_next << "Subthreshold Leakage = " << ifu->power.readOp.longer_channel_leakage <<" W" << endl; 3803 cout << indent_str_next << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" << endl; 3804 cout << indent_str_next << "Runtime Dynamic = " << ifu->rt_power.readOp.dynamic/executionTime << " W" << endl; 3805 cout <<endl; 3806 if (plevel >2){ 3807 ifu->displayEnergy(indent+4,plevel,is_tdp); 3808 } 3809 } 3810 if (coredynp.core_ty==OOO) 3811 { 3812 if (rnu->exist) 3813 { 3814 cout << indent_str<< "Renaming Unit:" << endl; 3815 cout << indent_str_next << "Area = " << rnu->area.get_area()*1e-6 << " mm^2" << endl; 3816 cout << indent_str_next << "Peak Dynamic = " << rnu->power.readOp.dynamic*clockRate << " W" << endl; 3817 cout << indent_str_next << "Subthreshold Leakage = " 3818 << (long_channel? rnu->power.readOp.longer_channel_leakage:rnu->power.readOp.leakage) << " W" << endl; 3819 //cout << indent_str_next << "Subthreshold Leakage = " << rnu->power.readOp.longer_channel_leakage << " W" << endl; 3820 cout << indent_str_next << "Gate Leakage = " << rnu->power.readOp.gate_leakage << " W" << endl; 3821 cout << indent_str_next << "Runtime Dynamic = " << rnu->rt_power.readOp.dynamic/executionTime << " W" << endl; 3822 cout <<endl; 3823 if (plevel >2){ 3824 rnu->displayEnergy(indent+4,plevel,is_tdp); 3825 } 3826 } 3827 3828 } 3829 if (lsu->exist) 3830 { 3831 cout << indent_str<< "Load Store Unit:" << endl; 3832 cout << indent_str_next << "Area = " << lsu->area.get_area()*1e-6 << " mm^2" << endl; 3833 cout << indent_str_next << "Peak Dynamic = " << lsu->power.readOp.dynamic*clockRate << " W" << endl; 3834 cout << indent_str_next << "Subthreshold Leakage = " 3835 << (long_channel? lsu->power.readOp.longer_channel_leakage:lsu->power.readOp.leakage ) << " W" << endl; 3836 //cout << indent_str_next << "Subthreshold Leakage = " << lsu->power.readOp.longer_channel_leakage << " W" << endl; 3837 cout << indent_str_next << "Gate Leakage = " << lsu->power.readOp.gate_leakage << " W" << endl; 3838 cout << indent_str_next << "Runtime Dynamic = " << lsu->rt_power.readOp.dynamic/executionTime << " W" << endl; 3839 cout <<endl; 3840 if (plevel >2){ 3841 lsu->displayEnergy(indent+4,plevel,is_tdp); 3842 } 3843 } 3844 if (mmu->exist) 3845 { 3846 cout << indent_str<< "Memory Management Unit:" << endl; 3847 cout << indent_str_next << "Area = " << mmu->area.get_area() *1e-6 << " mm^2" << endl; 3848 cout << indent_str_next << "Peak Dynamic = " << mmu->power.readOp.dynamic*clockRate << " W" << endl; 3849 cout << indent_str_next << "Subthreshold Leakage = " 3850 << (long_channel? mmu->power.readOp.longer_channel_leakage:mmu->power.readOp.leakage) << " W" << endl; 3851 //cout << indent_str_next << "Subthreshold Leakage = " << mmu->power.readOp.longer_channel_leakage << " W" << endl; 3852 cout << indent_str_next << "Gate Leakage = " << mmu->power.readOp.gate_leakage << " W" << endl; 3853 cout << indent_str_next << "Runtime Dynamic = " << mmu->rt_power.readOp.dynamic/executionTime << " W" << endl; 3854 cout <<endl; 3855 if (plevel >2){ 3856 mmu->displayEnergy(indent+4,plevel,is_tdp); 3857 } 3858 } 3859 if (exu->exist) 3860 { 3861 cout << indent_str<< "Execution Unit:" << endl; 3862 cout << indent_str_next << "Area = " << exu->area.get_area() *1e-6<< " mm^2" << endl; 3863 cout << indent_str_next << "Peak Dynamic = " << exu->power.readOp.dynamic*clockRate << " W" << endl; 3864 cout << indent_str_next << "Subthreshold Leakage = " 3865 << (long_channel? exu->power.readOp.longer_channel_leakage:exu->power.readOp.leakage) << " W" << endl; 3866 //cout << indent_str_next << "Subthreshold Leakage = " << exu->power.readOp.longer_channel_leakage << " W" << endl; 3867 cout << indent_str_next << "Gate Leakage = " << exu->power.readOp.gate_leakage << " W" << endl; 3868 cout << indent_str_next << "Runtime Dynamic = " << exu->rt_power.readOp.dynamic/executionTime << " W" << endl; 3869 cout <<endl; 3870 if (plevel >2){ 3871 exu->displayEnergy(indent+4,plevel,is_tdp); 3872 } 3873 } 3874// if (plevel >2) 3875// { 3876// if (undiffCore->exist) 3877// { 3878// cout << indent_str << "Undifferentiated Core" << endl; 3879// cout << indent_str_next << "Area = " << undiffCore->area.get_area()*1e-6<< " mm^2" << endl; 3880// cout << indent_str_next << "Peak Dynamic = " << undiffCore->power.readOp.dynamic*clockRate << " W" << endl; 3881//// cout << indent_str_next << "Subthreshold Leakage = " << undiffCore->power.readOp.leakage <<" W" << endl; 3882// cout << indent_str_next << "Subthreshold Leakage = " 3883// << (long_channel? undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage) << " W" << endl; 3884// cout << indent_str_next << "Gate Leakage = " << undiffCore->power.readOp.gate_leakage << " W" << endl; 3885// // cout << indent_str_next << "Runtime Dynamic = " << undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl; 3886// cout <<endl; 3887// } 3888// } 3889 if (XML->sys.Private_L2) 3890 { 3891 3892 l2cache->displayEnergy(4,is_tdp); 3893 } 3894 3895 } 3896 else 3897 { 3898// cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl; 3899// cout << indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl; 3900// cout << indent_str_next << "Instruction Fetch Unit Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl; 3901// cout << indent_str_next << "Load Store Unit Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate << " W" << endl; 3902// cout << indent_str_next << "Load Store Unit Subthreshold Leakage = " << lsu->rt_power.readOp.leakage << " W" << endl; 3903// cout << indent_str_next << "Load Store Unit Gate Leakage = " << lsu->rt_power.readOp.gate_leakage << " W" << endl; 3904// cout << indent_str_next << "Memory Management Unit Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << endl; 3905// cout << indent_str_next << "Memory Management Unit Subthreshold Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl; 3906// cout << indent_str_next << "Memory Management Unit Gate Leakage = " << mmu->rt_power.readOp.gate_leakage << " W" << endl; 3907// cout << indent_str_next << "Execution Unit Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate << " W" << endl; 3908// cout << indent_str_next << "Execution Unit Subthreshold Leakage = " << exu->rt_power.readOp.leakage << " W" << endl; 3909// cout << indent_str_next << "Execution Unit Gate Leakage = " << exu->rt_power.readOp.gate_leakage << " W" << endl; 3910 } 3911} 3912InstFetchU ::~InstFetchU(){ 3913 3914 if (!exist) return; 3915 if(IB) {delete IB; IB = 0;} 3916 if(ID_inst) {delete ID_inst; ID_inst = 0;} 3917 if(ID_operand) {delete ID_operand; ID_operand = 0;} 3918 if(ID_misc) {delete ID_misc; ID_misc = 0;} 3919 if (coredynp.predictionW>0) 3920 { 3921 if(BTB) {delete BTB; BTB = 0;} 3922 if(BPT) {delete BPT; BPT = 0;} 3923 } 3924} 3925 3926BranchPredictor ::~BranchPredictor(){ 3927 3928 if (!exist) return; 3929 if(globalBPT) {delete globalBPT; globalBPT = 0;} 3930 if(localBPT) {delete localBPT; localBPT = 0;} 3931 if(L1_localBPT) {delete L1_localBPT; L1_localBPT = 0;} 3932 if(L2_localBPT) {delete L2_localBPT; L2_localBPT = 0;} 3933 if(chooser) {delete chooser; chooser = 0;} 3934 if(RAS) {delete RAS; RAS = 0;} 3935 } 3936 3937RENAMINGU ::~RENAMINGU(){ 3938 3939 if (!exist) return; 3940 if(iFRAT ) {delete iFRAT; iFRAT = 0;} 3941 if(fFRAT ) {delete fFRAT; fFRAT =0;} 3942 if(iRRAT) {delete iRRAT; iRRAT = 0;} 3943 if(iFRAT) {delete iFRAT; iFRAT = 0;} 3944 if(ifreeL) {delete ifreeL;ifreeL= 0;} 3945 if(ffreeL) {delete ffreeL;ffreeL= 0;} 3946 if(idcl) {delete idcl; idcl = 0;} 3947 if(fdcl) {delete fdcl; fdcl = 0;} 3948 if(RAHT) {delete RAHT; RAHT = 0;} 3949 } 3950 3951LoadStoreU ::~LoadStoreU(){ 3952 3953 if (!exist) return; 3954 if(LSQ) {delete LSQ; LSQ = 0;} 3955 } 3956 3957MemManU ::~MemManU(){ 3958 3959 if (!exist) return; 3960 if(itlb) {delete itlb; itlb = 0;} 3961 if(dtlb) {delete dtlb; dtlb = 0;} 3962 } 3963 3964RegFU ::~RegFU(){ 3965 3966 if (!exist) return; 3967 if(IRF) {delete IRF; IRF = 0;} 3968 if(FRF) {delete FRF; FRF = 0;} 3969 if(RFWIN) {delete RFWIN; RFWIN = 0;} 3970 } 3971 3972SchedulerU ::~SchedulerU(){ 3973 3974 if (!exist) return; 3975 if(int_inst_window) {delete int_inst_window; int_inst_window = 0;} 3976 if(fp_inst_window) {delete int_inst_window; int_inst_window = 0;} 3977 if(ROB) {delete ROB; ROB = 0;} 3978 if(instruction_selection) {delete instruction_selection;instruction_selection = 0;} 3979 } 3980 3981EXECU ::~EXECU(){ 3982 3983 if (!exist) return; 3984 if(int_bypass) {delete int_bypass; int_bypass = 0;} 3985 if(intTagBypass) {delete intTagBypass; intTagBypass =0;} 3986 if(int_mul_bypass) {delete int_mul_bypass; int_mul_bypass = 0;} 3987 if(intTag_mul_Bypass) {delete intTag_mul_Bypass; intTag_mul_Bypass =0;} 3988 if(fp_bypass) {delete fp_bypass;fp_bypass = 0;} 3989 if(fpTagBypass) {delete fpTagBypass;fpTagBypass = 0;} 3990 if(fp_u) {delete fp_u;fp_u = 0;} 3991 if(exeu) {delete exeu;exeu = 0;} 3992 if(mul) {delete mul;mul = 0;} 3993 if(rfu) {delete rfu;rfu = 0;} 3994 if(scheu) {delete scheu; scheu = 0;} 3995 } 3996 3997Core ::~Core(){ 3998 3999 if(ifu) {delete ifu; ifu = 0;} 4000 if(lsu) {delete lsu; lsu = 0;} 4001 if(rnu) {delete rnu; rnu = 0;} 4002 if(mmu) {delete mmu; mmu = 0;} 4003 if(exu) {delete exu; exu = 0;} 4004 if(corepipe) {delete corepipe; corepipe = 0;} 4005 if(undiffCore) {delete undiffCore;undiffCore = 0;} 4006 if(l2cache) {delete l2cache;l2cache = 0;} 4007 } 4008 4009void Core::set_core_param() 4010{ 4011 coredynp.opt_local = XML->sys.core[ithCore].opt_local; 4012 coredynp.x86 = XML->sys.core[ithCore].x86; 4013 coredynp.Embedded = XML->sys.Embedded; 4014 coredynp.core_ty = (enum Core_type)XML->sys.core[ithCore].machine_type; 4015 coredynp.rm_ty = (enum Renaming_type)XML->sys.core[ithCore].rename_scheme; 4016 coredynp.fetchW = XML->sys.core[ithCore].fetch_width; 4017 coredynp.decodeW = XML->sys.core[ithCore].decode_width; 4018 coredynp.issueW = XML->sys.core[ithCore].issue_width; 4019 coredynp.peak_issueW = XML->sys.core[ithCore].peak_issue_width; 4020 coredynp.commitW = XML->sys.core[ithCore].commit_width; 4021 coredynp.peak_commitW = XML->sys.core[ithCore].peak_issue_width; 4022 coredynp.predictionW = XML->sys.core[ithCore].prediction_width; 4023 coredynp.fp_issueW = XML->sys.core[ithCore].fp_issue_width; 4024 coredynp.fp_decodeW = XML->sys.core[ithCore].fp_issue_width; 4025 coredynp.num_alus = XML->sys.core[ithCore].ALU_per_core; 4026 coredynp.num_fpus = XML->sys.core[ithCore].FPU_per_core; 4027 coredynp.num_muls = XML->sys.core[ithCore].MUL_per_core; 4028 4029 4030 coredynp.num_hthreads = XML->sys.core[ithCore].number_hardware_threads; 4031 coredynp.multithreaded = coredynp.num_hthreads>1? true:false; 4032 coredynp.instruction_length = XML->sys.core[ithCore].instruction_length; 4033 coredynp.pc_width = XML->sys.virtual_address_width; 4034 4035 coredynp.opcode_length = XML->sys.core[ithCore].opcode_width; 4036 coredynp.micro_opcode_length = XML->sys.core[ithCore].micro_opcode_width; 4037 coredynp.num_pipelines = XML->sys.core[ithCore].pipelines_per_core[0]; 4038 coredynp.pipeline_stages = XML->sys.core[ithCore].pipeline_depth[0]; 4039 coredynp.num_fp_pipelines = XML->sys.core[ithCore].pipelines_per_core[1]; 4040 coredynp.fp_pipeline_stages = XML->sys.core[ithCore].pipeline_depth[1]; 4041 coredynp.int_data_width = int(ceil(XML->sys.machine_bits/32.0))*32; 4042 coredynp.fp_data_width = coredynp.int_data_width; 4043 coredynp.v_address_width = XML->sys.virtual_address_width; 4044 coredynp.p_address_width = XML->sys.physical_address_width; 4045 4046 coredynp.scheu_ty = (enum Scheduler_type)XML->sys.core[ithCore].instruction_window_scheme; 4047 coredynp.arch_ireg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_IRF_size))); 4048 coredynp.arch_freg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_FRF_size))); 4049 coredynp.num_IRF_entry = XML->sys.core[ithCore].archi_Regs_IRF_size; 4050 coredynp.num_FRF_entry = XML->sys.core[ithCore].archi_Regs_FRF_size; 4051 coredynp.pipeline_duty_cycle = XML->sys.core[ithCore].pipeline_duty_cycle; 4052 coredynp.total_cycles = XML->sys.core[ithCore].total_cycles; 4053 coredynp.busy_cycles = XML->sys.core[ithCore].busy_cycles; 4054 coredynp.idle_cycles = XML->sys.core[ithCore].idle_cycles; 4055 4056 //Max power duty cycle for peak power estimation 4057// if (coredynp.core_ty==OOO) 4058// { 4059// coredynp.IFU_duty_cycle = 1; 4060// coredynp.LSU_duty_cycle = 1; 4061// coredynp.MemManU_I_duty_cycle =1; 4062// coredynp.MemManU_D_duty_cycle =1; 4063// coredynp.ALU_duty_cycle =1; 4064// coredynp.MUL_duty_cycle =1; 4065// coredynp.FPU_duty_cycle =1; 4066// coredynp.ALU_cdb_duty_cycle =1; 4067// coredynp.MUL_cdb_duty_cycle =1; 4068// coredynp.FPU_cdb_duty_cycle =1; 4069// } 4070// else 4071// { 4072 coredynp.IFU_duty_cycle = XML->sys.core[ithCore].IFU_duty_cycle; 4073 coredynp.BR_duty_cycle = XML->sys.core[ithCore].BR_duty_cycle; 4074 coredynp.LSU_duty_cycle = XML->sys.core[ithCore].LSU_duty_cycle; 4075 coredynp.MemManU_I_duty_cycle = XML->sys.core[ithCore].MemManU_I_duty_cycle; 4076 coredynp.MemManU_D_duty_cycle = XML->sys.core[ithCore].MemManU_D_duty_cycle; 4077 coredynp.ALU_duty_cycle = XML->sys.core[ithCore].ALU_duty_cycle; 4078 coredynp.MUL_duty_cycle = XML->sys.core[ithCore].MUL_duty_cycle; 4079 coredynp.FPU_duty_cycle = XML->sys.core[ithCore].FPU_duty_cycle; 4080 coredynp.ALU_cdb_duty_cycle = XML->sys.core[ithCore].ALU_cdb_duty_cycle; 4081 coredynp.MUL_cdb_duty_cycle = XML->sys.core[ithCore].MUL_cdb_duty_cycle; 4082 coredynp.FPU_cdb_duty_cycle = XML->sys.core[ithCore].FPU_cdb_duty_cycle; 4083// } 4084 4085 4086 if (!((coredynp.core_ty==OOO)||(coredynp.core_ty==Inorder))) 4087 { 4088 cout<<"Invalid Core Type"<<endl; 4089 exit(0); 4090 } 4091// if (coredynp.core_ty==OOO) 4092// { 4093// cout<<"OOO processor models are being updated and will be available in next release"<<endl; 4094// exit(0); 4095// } 4096 if (!((coredynp.scheu_ty==PhysicalRegFile)||(coredynp.scheu_ty==ReservationStation))) 4097 { 4098 cout<<"Invalid OOO Scheduler Type"<<endl; 4099 exit(0); 4100 } 4101 4102 if (!((coredynp.rm_ty ==RAMbased)||(coredynp.rm_ty ==CAMbased))) 4103 { 4104 cout<<"Invalid OOO Renaming Type"<<endl; 4105 exit(0); 4106 } 4107 4108if (coredynp.core_ty==OOO) 4109{ 4110 if (coredynp.scheu_ty==PhysicalRegFile) 4111 { 4112 coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_IRF_size))); 4113 coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_FRF_size))); 4114 coredynp.num_ifreelist_entries = coredynp.num_IRF_entry = XML->sys.core[ithCore].phy_Regs_IRF_size; 4115 coredynp.num_ffreelist_entries = coredynp.num_FRF_entry = XML->sys.core[ithCore].phy_Regs_FRF_size; 4116 } 4117 else if (coredynp.scheu_ty==ReservationStation) 4118 {//ROB serves as Phy RF in RS based OOO 4119 coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size))); 4120 coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size))); 4121 coredynp.num_ifreelist_entries = XML->sys.core[ithCore].ROB_size; 4122 coredynp.num_ffreelist_entries = XML->sys.core[ithCore].ROB_size; 4123 4124 } 4125 4126} 4127 coredynp.globalCheckpoint = 32;//best check pointing entries for a 4~8 issue OOO should be 16~48;See TR for reference. 4128 coredynp.perThreadState = 8; 4129 coredynp.instruction_length = 32; 4130 coredynp.clockRate = XML->sys.core[ithCore].clock_rate; 4131 coredynp.clockRate *= 1e6; 4132 coredynp.regWindowing= (XML->sys.core[ithCore].register_windows_size>0&&coredynp.core_ty==Inorder)?true:false; 4133 coredynp.executionTime = XML->sys.total_cycles/coredynp.clockRate; 4134 set_pppm(coredynp.pppm_lkg_multhread, 0, coredynp.num_hthreads, coredynp.num_hthreads, 0); 4135} 4136