core.cc revision 10152:52c552138ba1
1/*****************************************************************************
2 *                                McPAT
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *                          All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution;
14 * neither the name of the copyright holders nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 *
30 ***************************************************************************/
31
32#include <algorithm>
33#include <cassert>
34#include <cmath>
35#include <iostream>
36#include <string>
37
38#include "XML_Parse.h"
39#include "basic_circuit.h"
40#include "const.h"
41#include "core.h"
42#include "io.h"
43#include "parameter.h"
44//#include "globalvar.h"
45
46InstFetchU::InstFetchU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
47:XML(XML_interface),
48 ithCore(ithCore_),
49 interface_ip(*interface_ip_),
50 coredynp(dyn_p_),
51 IB  (0),
52 BTB (0),
53 ID_inst  (0),
54 ID_operand  (0),
55 ID_misc  (0),
56 exist(exist_)
57{
58          if (!exist) return;
59          int  idx, tag, data, size, line, assoc, banks;
60          bool debug= false, is_default = true;
61
62          clockRate = coredynp.clockRate;
63          executionTime = coredynp.executionTime;
64          cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7];
65          //Assuming all L1 caches are virtually idxed physically tagged.
66          //cache
67
68          size                             = (int)XML->sys.core[ithCore].icache.icache_config[0];
69          line                             = (int)XML->sys.core[ithCore].icache.icache_config[1];
70          assoc                            = (int)XML->sys.core[ithCore].icache.icache_config[2];
71          banks                            = (int)XML->sys.core[ithCore].icache.icache_config[3];
72          idx    					 	   = debug?9:int(ceil(log2(size/line/assoc)));
73          tag							   = debug?51:(int)XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
74          interface_ip.specific_tag        = 1;
75          interface_ip.tag_w               = tag;
76          interface_ip.cache_sz            = debug?32768:(int)XML->sys.core[ithCore].icache.icache_config[0];
77          interface_ip.line_sz             = debug?64:(int)XML->sys.core[ithCore].icache.icache_config[1];
78          interface_ip.assoc               = debug?8:(int)XML->sys.core[ithCore].icache.icache_config[2];
79          interface_ip.nbanks              = debug?1:(int)XML->sys.core[ithCore].icache.icache_config[3];
80          interface_ip.out_w               = interface_ip.line_sz*8;
81          interface_ip.access_mode         = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5];
82          interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
83          interface_ip.latency             = debug?3.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
84          interface_ip.is_cache			 = true;
85          interface_ip.pure_cam			 = false;
86          interface_ip.pure_ram			 = false;
87        //  interface_ip.obj_func_dyn_energy = 0;
88        //  interface_ip.obj_func_dyn_power  = 0;
89        //  interface_ip.obj_func_leak_power = 0;
90        //  interface_ip.obj_func_cycle_t    = 1;
91          interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
92          interface_ip.num_rd_ports    = 0;
93          interface_ip.num_wr_ports    = 0;
94          interface_ip.num_se_rd_ports = 0;
95          icache.caches = new ArrayST(&interface_ip, "icache", Core_device, coredynp.opt_local, coredynp.core_ty);
96          scktRatio = g_tp.sckt_co_eff;
97          chip_PR_overhead = g_tp.chip_layout_overhead;
98          macro_PR_overhead = g_tp.macro_layout_overhead;
99          icache.area.set_area(icache.area.get_area()+ icache.caches->local_result.area);
100          area.set_area(area.get_area()+ icache.caches->local_result.area);
101          //output_data_csv(icache.caches.local_result);
102
103
104          /*
105           *iCache controllers
106           *miss buffer Each MSHR contains enough state
107           *to handle one or more accesses of any type to a single memory line.
108           *Due to the generality of the MSHR mechanism,
109           *the amount of state involved is non-trivial:
110           *including the address, pointers to the cache entry and destination register,
111           *written data, and various other pieces of state.
112           */
113          interface_ip.num_search_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
114          tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
115          data							   = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + icache.caches->l_ip.line_sz*8;
116          interface_ip.specific_tag        = 1;
117          interface_ip.tag_w               = tag;
118          interface_ip.line_sz             = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
119          interface_ip.cache_sz            = XML->sys.core[ithCore].icache.buffer_sizes[0]*interface_ip.line_sz;
120          interface_ip.assoc               = 0;
121          interface_ip.nbanks              = 1;
122          interface_ip.out_w               = interface_ip.line_sz*8;
123          interface_ip.access_mode         = 0;
124          interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;//means cycle time
125          interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;//means access time
126          interface_ip.obj_func_dyn_energy = 0;
127          interface_ip.obj_func_dyn_power  = 0;
128          interface_ip.obj_func_leak_power = 0;
129          interface_ip.obj_func_cycle_t    = 1;
130          interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
131          interface_ip.num_rd_ports    = 0;
132          interface_ip.num_wr_ports    = 0;
133          interface_ip.num_se_rd_ports = 0;
134          interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
135          icache.missb = new ArrayST(&interface_ip, "icacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
136          icache.area.set_area(icache.area.get_area()+ icache.missb->local_result.area);
137          area.set_area(area.get_area()+ icache.missb->local_result.area);
138          //output_data_csv(icache.missb.local_result);
139
140          //fill buffer
141          tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
142          data							   = icache.caches->l_ip.line_sz;
143          interface_ip.specific_tag        = 1;
144          interface_ip.tag_w               = tag;
145          interface_ip.line_sz             = data;//int(pow(2.0,ceil(log2(data))));
146          interface_ip.cache_sz            = data*XML->sys.core[ithCore].icache.buffer_sizes[1];
147          interface_ip.assoc               = 0;
148          interface_ip.nbanks              = 1;
149          interface_ip.out_w               = interface_ip.line_sz*8;
150          interface_ip.access_mode         = 0;
151          interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
152          interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
153          interface_ip.obj_func_dyn_energy = 0;
154          interface_ip.obj_func_dyn_power  = 0;
155          interface_ip.obj_func_leak_power = 0;
156          interface_ip.obj_func_cycle_t    = 1;
157          interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
158          interface_ip.num_rd_ports    = 0;
159          interface_ip.num_wr_ports    = 0;
160          interface_ip.num_se_rd_ports = 0;
161          interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
162          icache.ifb = new ArrayST(&interface_ip, "icacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
163          icache.area.set_area(icache.area.get_area()+ icache.ifb->local_result.area);
164          area.set_area(area.get_area()+ icache.ifb->local_result.area);
165          //output_data_csv(icache.ifb.local_result);
166
167          //prefetch buffer
168          tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
169          data							   = icache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
170          interface_ip.specific_tag        = 1;
171          interface_ip.tag_w               = tag;
172          interface_ip.line_sz             = data;//int(pow(2.0,ceil(log2(data))));
173          interface_ip.cache_sz            = XML->sys.core[ithCore].icache.buffer_sizes[2]*interface_ip.line_sz;
174          interface_ip.assoc               = 0;
175          interface_ip.nbanks              = 1;
176          interface_ip.out_w               = interface_ip.line_sz*8;
177          interface_ip.access_mode         = 0;
178          interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
179          interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
180          interface_ip.obj_func_dyn_energy = 0;
181          interface_ip.obj_func_dyn_power  = 0;
182          interface_ip.obj_func_leak_power = 0;
183          interface_ip.obj_func_cycle_t    = 1;
184          interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
185          interface_ip.num_rd_ports    = 0;
186          interface_ip.num_wr_ports    = 0;
187          interface_ip.num_se_rd_ports = 0;
188          interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
189          icache.prefetchb = new ArrayST(&interface_ip, "icacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
190          icache.area.set_area(icache.area.get_area()+ icache.prefetchb->local_result.area);
191          area.set_area(area.get_area()+ icache.prefetchb->local_result.area);
192          //output_data_csv(icache.prefetchb.local_result);
193
194          //Instruction buffer
195          data							   = XML->sys.core[ithCore].instruction_length*XML->sys.core[ithCore].peak_issue_width;//icache.caches.l_ip.line_sz; //multiple threads timing sharing the instruction buffer.
196          interface_ip.is_cache			   = false;
197          interface_ip.pure_ram            = true;
198          interface_ip.pure_cam            = false;
199          interface_ip.line_sz             = int(ceil(data/8.0));
200          interface_ip.cache_sz            = XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz>64?
201                                                     XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz:64;
202          interface_ip.assoc               = 1;
203          interface_ip.nbanks              = 1;
204          interface_ip.out_w               = interface_ip.line_sz*8;
205          interface_ip.access_mode         = 0;
206          interface_ip.throughput          = 1.0/clockRate;
207          interface_ip.latency             = 1.0/clockRate;
208          interface_ip.obj_func_dyn_energy = 0;
209          interface_ip.obj_func_dyn_power  = 0;
210          interface_ip.obj_func_leak_power = 0;
211          interface_ip.obj_func_cycle_t    = 1;
212          //NOTE: Assuming IB is time slice shared among threads, every fetch op will at least fetch "fetch width" instructions.
213          interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;//XML->sys.core[ithCore].fetch_width;
214          interface_ip.num_rd_ports    = 0;
215          interface_ip.num_wr_ports    = 0;
216          interface_ip.num_se_rd_ports = 0;
217          IB = new ArrayST(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
218          IB->area.set_area(IB->area.get_area()+ IB->local_result.area);
219          area.set_area(area.get_area()+ IB->local_result.area);
220          //output_data_csv(IB.IB.local_result);
221
222          //	  inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width;
223          //	  inst_decoder.init_decoder(is_default, &interface_ip);
224          //	  inst_decoder.full_decoder_power();
225
226      if (coredynp.predictionW>0)
227      {
228          /*
229           * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged
230           * It is only a cache without all the buffers in the cache controller since it is more like a
231           * look up table than a cache with cache controller. When access miss, no load from other places
232           * such as main memory (not actively fill the misses), it is passively updated under two circumstances:
233           * 1)  when BPT@ID stage finds out current is a taken branch while BTB missed
234           * 2)  When BPT@ID stage predicts differently than BTB
235           * 3)  When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid)
236           * 4)  when EXEU find out wrong target has been provided from BTB.
237           *
238           */
239          size                             = XML->sys.core[ithCore].BTB.BTB_config[0];
240          line                             = XML->sys.core[ithCore].BTB.BTB_config[1];
241          assoc                            = XML->sys.core[ithCore].BTB.BTB_config[2];
242          banks                            = XML->sys.core[ithCore].BTB.BTB_config[3];
243          idx    					 	   = debug?9:int(ceil(log2(size/line/assoc)));
244//    	  tag							   = debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS;
245          tag							   = debug?51:XML->sys.virtual_address_width + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS;
246          interface_ip.is_cache			   = true;
247          interface_ip.pure_ram            = false;
248          interface_ip.pure_cam            = false;
249          interface_ip.specific_tag        = 1;
250          interface_ip.tag_w               = tag;
251          interface_ip.cache_sz            = debug?32768:size;
252          interface_ip.line_sz             = debug?64:line;
253          interface_ip.assoc               = debug?8:assoc;
254          interface_ip.nbanks              = debug?1:banks;
255          interface_ip.out_w               = interface_ip.line_sz*8;
256          interface_ip.access_mode         = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5];
257          interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[4]/clockRate;
258          interface_ip.latency             = debug?3.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[5]/clockRate;
259          interface_ip.obj_func_dyn_energy = 0;
260          interface_ip.obj_func_dyn_power  = 0;
261          interface_ip.obj_func_leak_power = 0;
262          interface_ip.obj_func_cycle_t    = 1;
263          interface_ip.num_rw_ports    = 1;
264          interface_ip.num_rd_ports    = coredynp.predictionW;
265          interface_ip.num_wr_ports    = coredynp.predictionW;
266          interface_ip.num_se_rd_ports = 0;
267          BTB = new ArrayST(&interface_ip, "Branch Target Buffer", Core_device, coredynp.opt_local, coredynp.core_ty);
268          BTB->area.set_area(BTB->area.get_area()+ BTB->local_result.area);
269          area.set_area(area.get_area()+ BTB->local_result.area);
270          ///cout<<"area="<<area<<endl;
271
272          BPT = new BranchPredictor(XML, ithCore, &interface_ip,coredynp);
273          area.set_area(area.get_area()+ BPT->area.get_area());
274      }
275
276      ID_inst = new inst_decoder(is_default, &interface_ip,
277                  coredynp.opcode_length, 1/*Decoder should not know how many by itself*/,
278                  coredynp.x86,
279                  Core_device, coredynp.core_ty);
280
281      ID_operand = new inst_decoder(is_default, &interface_ip,
282                  coredynp.arch_ireg_width, 1,
283                  coredynp.x86,
284                  Core_device, coredynp.core_ty);
285
286      ID_misc = new inst_decoder(is_default, &interface_ip,
287                  8/* Prefix field etc upto 14B*/, 1,
288                  coredynp.x86,
289                  Core_device, coredynp.core_ty);
290      //TODO: X86 decoder should decode the inst in cyclic mode under the control of squencer.
291      //So the dynamic power should be multiplied by a few times.
292      area.set_area(area.get_area()+ (ID_inst->area.get_area()
293                  +ID_operand->area.get_area()
294                  +ID_misc->area.get_area())*coredynp.decodeW);
295
296}
297
298
299BranchPredictor::BranchPredictor(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
300:XML(XML_interface),
301 ithCore(ithCore_),
302 interface_ip(*interface_ip_),
303 coredynp(dyn_p_),
304 globalBPT(0),
305 localBPT(0),
306 L1_localBPT(0),
307 L2_localBPT(0),
308 chooser(0),
309 RAS(0),
310 exist(exist_)
311{
312        /*
313         * Branch Predictor, accessed during ID stage.
314         * McPAT's branch predictor model is the tournament branch predictor used in Alpha 21264,
315         * including global predictor, local two level predictor, and Chooser.
316         * The Branch predictor also includes a RAS (return address stack) for function calls
317         * Branch predictors are tagged by thread ID and modeled as 1-way associative $
318         * However RAS return address stacks are duplicated for each thread.
319         * TODO:Data Width need to be computed more precisely	 *
320         */
321        if (!exist) return;
322        int  tag, data;
323
324        clockRate = coredynp.clockRate;
325        executionTime = coredynp.executionTime;
326        interface_ip.assoc               = 1;
327        interface_ip.pure_cam            = false;
328        if (coredynp.multithreaded)
329        {
330
331                tag							     = int(log2(coredynp.num_hthreads)+ EXTRA_TAG_BITS);
332                interface_ip.specific_tag        = 1;
333                interface_ip.tag_w               = tag;
334
335                interface_ip.is_cache			 = true;
336                interface_ip.pure_ram            = false;
337                }
338        else
339        {
340                interface_ip.is_cache			 = false;
341                interface_ip.pure_ram            = true;
342
343        }
344        //Global predictor
345        data							 = int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits/8.0));
346        interface_ip.line_sz             = data;
347        interface_ip.cache_sz            = data*XML->sys.core[ithCore].predictor.global_predictor_entries;
348        interface_ip.nbanks              = 1;
349        interface_ip.out_w               = interface_ip.line_sz*8;
350        interface_ip.access_mode         = 2;
351        interface_ip.throughput          = 1.0/clockRate;
352        interface_ip.latency             = 1.0/clockRate;
353        interface_ip.obj_func_dyn_energy = 0;
354        interface_ip.obj_func_dyn_power  = 0;
355        interface_ip.obj_func_leak_power = 0;
356        interface_ip.obj_func_cycle_t    = 1;
357        interface_ip.num_rw_ports    = 0;
358        interface_ip.num_rd_ports    = coredynp.predictionW;
359        interface_ip.num_wr_ports    = coredynp.predictionW;
360        interface_ip.num_se_rd_ports = 0;
361        globalBPT = new ArrayST(&interface_ip, "Global Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
362        globalBPT->area.set_area(globalBPT->area.get_area()+ globalBPT->local_result.area);
363        area.set_area(area.get_area()+ globalBPT->local_result.area);
364
365        //Local BPT (Level 1)
366        data							 = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0]/8.0));
367        interface_ip.line_sz             = data;
368        interface_ip.cache_sz            = data*XML->sys.core[ithCore].predictor.local_predictor_entries;
369        interface_ip.nbanks              = 1;
370        interface_ip.out_w               = interface_ip.line_sz*8;
371        interface_ip.access_mode         = 2;
372        interface_ip.throughput          = 1.0/clockRate;
373        interface_ip.latency             = 1.0/clockRate;
374        interface_ip.obj_func_dyn_energy = 0;
375        interface_ip.obj_func_dyn_power  = 0;
376        interface_ip.obj_func_leak_power = 0;
377        interface_ip.obj_func_cycle_t    = 1;
378        interface_ip.num_rw_ports    = 0;
379        interface_ip.num_rd_ports    = coredynp.predictionW;
380        interface_ip.num_wr_ports    = coredynp.predictionW;
381        interface_ip.num_se_rd_ports = 0;
382        L1_localBPT = new ArrayST(&interface_ip, "L1 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
383        L1_localBPT->area.set_area(L1_localBPT->area.get_area()+ L1_localBPT->local_result.area);
384        area.set_area(area.get_area()+ L1_localBPT->local_result.area);
385
386        //Local BPT (Level 2)
387        data							 = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1]/8.0));
388        interface_ip.line_sz             = data;
389        interface_ip.cache_sz            = data*XML->sys.core[ithCore].predictor.local_predictor_entries;
390        interface_ip.nbanks              = 1;
391        interface_ip.out_w               = interface_ip.line_sz*8;
392        interface_ip.access_mode         = 2;
393        interface_ip.throughput          = 1.0/clockRate;
394        interface_ip.latency             = 1.0/clockRate;
395        interface_ip.obj_func_dyn_energy = 0;
396        interface_ip.obj_func_dyn_power  = 0;
397        interface_ip.obj_func_leak_power = 0;
398        interface_ip.obj_func_cycle_t    = 1;
399        interface_ip.num_rw_ports    = 0;
400        interface_ip.num_rd_ports    = coredynp.predictionW;
401        interface_ip.num_wr_ports    = coredynp.predictionW;
402        interface_ip.num_se_rd_ports = 0;
403        L2_localBPT = new ArrayST(&interface_ip, "L2 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
404        L2_localBPT->area.set_area(L2_localBPT->area.get_area()+ L2_localBPT->local_result.area);
405        area.set_area(area.get_area()+ L2_localBPT->local_result.area);
406
407        //Chooser
408        data							 = int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits/8.0));
409        interface_ip.line_sz             = data;
410        interface_ip.cache_sz            = data*XML->sys.core[ithCore].predictor.chooser_predictor_entries;
411        interface_ip.nbanks              = 1;
412        interface_ip.out_w               = interface_ip.line_sz*8;
413        interface_ip.access_mode         = 2;
414        interface_ip.throughput          = 1.0/clockRate;
415        interface_ip.latency             = 1.0/clockRate;
416        interface_ip.obj_func_dyn_energy = 0;
417        interface_ip.obj_func_dyn_power  = 0;
418        interface_ip.obj_func_leak_power = 0;
419        interface_ip.obj_func_cycle_t    = 1;
420        interface_ip.num_rw_ports    = 0;
421        interface_ip.num_rd_ports    = coredynp.predictionW;
422        interface_ip.num_wr_ports    = coredynp.predictionW;
423        interface_ip.num_se_rd_ports = 0;
424        chooser = new ArrayST(&interface_ip, "Predictor Chooser", Core_device, coredynp.opt_local, coredynp.core_ty);
425        chooser->area.set_area(chooser->area.get_area()+ chooser->local_result.area);
426        area.set_area(area.get_area()+ chooser->local_result.area);
427
428        //RAS return address stacks are Duplicated for each thread.
429        interface_ip.is_cache			 = false;
430        interface_ip.pure_ram            = true;
431        data							 = int(ceil(coredynp.pc_width/8.0));
432        interface_ip.line_sz             = data;
433        interface_ip.cache_sz            = data*XML->sys.core[ithCore].RAS_size;
434        interface_ip.assoc               = 1;
435        interface_ip.nbanks              = 1;
436        interface_ip.out_w               = interface_ip.line_sz*8;
437        interface_ip.access_mode         = 2;
438        interface_ip.throughput          = 1.0/clockRate;
439        interface_ip.latency             = 1.0/clockRate;
440        interface_ip.obj_func_dyn_energy = 0;
441        interface_ip.obj_func_dyn_power  = 0;
442        interface_ip.obj_func_leak_power = 0;
443        interface_ip.obj_func_cycle_t    = 1;
444        interface_ip.num_rw_ports    = 0;
445        interface_ip.num_rd_ports    = coredynp.predictionW;
446        interface_ip.num_wr_ports    = coredynp.predictionW;
447        interface_ip.num_se_rd_ports = 0;
448        RAS = new ArrayST(&interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty);
449        RAS->area.set_area(RAS->area.get_area()+ RAS->local_result.area*coredynp.num_hthreads);
450        area.set_area(area.get_area()+ RAS->local_result.area*coredynp.num_hthreads);
451
452}
453
454SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
455:XML(XML_interface),
456 ithCore(ithCore_),
457 interface_ip(*interface_ip_),
458 coredynp(dyn_p_),
459 int_inst_window(0),
460 fp_inst_window(0),
461 ROB(0),
462 instruction_selection(0),
463 exist(exist_)
464 {
465        if (!exist) return;
466        int   tag, data;
467        bool  is_default=true;
468        string tmp_name;
469
470        clockRate = coredynp.clockRate;
471        executionTime = coredynp.executionTime;
472        if ((coredynp.core_ty==Inorder && coredynp.multithreaded))
473        {
474                //Instruction issue queue, in-order multi-issue or multithreaded processor also has this structure. Unified window for Inorder processors
475                tag							     = int(log2(XML->sys.core[ithCore].number_hardware_threads)*coredynp.perThreadState);//This is the normal thread state bits based on Niagara Design
476                data							 = XML->sys.core[ithCore].instruction_length;
477                //NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and IA-32 Architectures
478                //Software Developer’s Manual
479                interface_ip.is_cache			 = true;
480                interface_ip.pure_cam            = false;
481                interface_ip.pure_ram            = false;
482                interface_ip.line_sz             = int(ceil(data/8.0));
483                interface_ip.specific_tag        = 1;
484                interface_ip.tag_w               = tag;
485                interface_ip.cache_sz            = XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz>64?XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz:64;
486                interface_ip.assoc               = 0;
487                interface_ip.nbanks              = 1;
488                interface_ip.out_w               = interface_ip.line_sz*8;
489                interface_ip.access_mode         = 1;
490                interface_ip.throughput          = 1.0/clockRate;
491                interface_ip.latency             = 1.0/clockRate;
492                interface_ip.obj_func_dyn_energy = 0;
493                interface_ip.obj_func_dyn_power  = 0;
494                interface_ip.obj_func_leak_power = 0;
495                interface_ip.obj_func_cycle_t    = 1;
496                interface_ip.num_rw_ports        = 0;
497                interface_ip.num_rd_ports        = coredynp.peak_issueW;
498                interface_ip.num_wr_ports        = coredynp.peak_issueW;
499                interface_ip.num_se_rd_ports     = 0;
500                interface_ip.num_search_ports    = coredynp.peak_issueW;
501                int_inst_window = new ArrayST(&interface_ip, "InstFetchQueue", Core_device, coredynp.opt_local, coredynp.core_ty);
502                int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
503                area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
504                //output_data_csv(iRS.RS.local_result);
505                Iw_height      =int_inst_window->local_result.cache_ht;
506
507                /*
508                 * selection logic
509                 * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up
510                 * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who
511                 * at the issue stage.
512                 */
513
514                instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size,
515                                coredynp.peak_issueW*XML->sys.core[ithCore].number_hardware_threads,
516                                &interface_ip, Core_device, coredynp.core_ty);
517        }
518
519    if (coredynp.core_ty==OOO)
520    {
521        /*
522         * CAM based instruction window
523         * For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored
524         * For RS based OOO it is the Reservation station, where both tags and values of phy regs are stored
525         * It is written once and read twice(two operands) before an instruction can be issued.
526         * X86 instruction can be very long up to 15B. add instruction length in XML
527         */
528        if(coredynp.scheu_ty==PhysicalRegFile)
529        {
530                tag	 = coredynp.phy_ireg_width;
531                // Each time only half of the tag is compared, but two tag should be stored.
532                // This underestimate the search power
533                data = int((ceil((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width))/2.0)/8.0));
534                //Data width being divided by 2 means only after both operands available the whole data will be read out.
535                //This is modeled using two equivalent readouts with half of the data width
536                tmp_name = "InstIssueQueue";
537        }
538        else
539        {
540                tag	  = coredynp.phy_ireg_width;
541                // Each time only half of the tag is compared, but two tag should be stored.
542                // This underestimate the search power
543                data  = int(ceil(((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width)+
544                                2*coredynp.int_data_width)/2.0)/8.0));
545                //Data width being divided by 2 means only after both operands available the whole data will be read out.
546                //This is modeled using two equivalent readouts with half of the data width
547
548                tmp_name = "IntReservationStation";
549        }
550        interface_ip.is_cache			 = true;
551        interface_ip.pure_cam            = false;
552        interface_ip.pure_ram            = false;
553        interface_ip.line_sz             = data;
554        interface_ip.cache_sz            = data*XML->sys.core[ithCore].instruction_window_size;
555        interface_ip.assoc               = 0;
556        interface_ip.nbanks              = 1;
557        interface_ip.out_w               = interface_ip.line_sz*8;
558        interface_ip.specific_tag        = 1;
559        interface_ip.tag_w               = tag;
560        interface_ip.access_mode         = 0;
561        interface_ip.throughput          = 2*1.0/clockRate;
562        interface_ip.latency             = 2*1.0/clockRate;
563        interface_ip.obj_func_dyn_energy = 0;
564        interface_ip.obj_func_dyn_power  = 0;
565        interface_ip.obj_func_leak_power = 0;
566        interface_ip.obj_func_cycle_t    = 1;
567        interface_ip.num_rw_ports       = 0;
568        interface_ip.num_rd_ports       = coredynp.peak_issueW;
569        interface_ip.num_wr_ports       = coredynp.peak_issueW;
570        interface_ip.num_se_rd_ports    = 0;
571                interface_ip.num_search_ports   = coredynp.peak_issueW;
572                int_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty);
573                int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
574                area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
575                Iw_height      =int_inst_window->local_result.cache_ht;
576                //FU inst window
577        if(coredynp.scheu_ty==PhysicalRegFile)
578        {
579                tag	 = 2*coredynp.phy_freg_width;// TODO: each time only half of the tag is compared
580                data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width))/8.0));
581                tmp_name = "FPIssueQueue";
582        }
583        else
584        {
585                tag	  = 2*coredynp.phy_ireg_width;
586                data  = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width)+
587                                2*coredynp.fp_data_width)/8.0));
588                tmp_name = "FPReservationStation";
589        }
590        interface_ip.is_cache			 = true;
591        interface_ip.pure_cam            = false;
592        interface_ip.pure_ram            = false;
593        interface_ip.line_sz             = data;
594        interface_ip.cache_sz            = data*XML->sys.core[ithCore].fp_instruction_window_size;
595        interface_ip.assoc               = 0;
596        interface_ip.nbanks              = 1;
597        interface_ip.out_w               = interface_ip.line_sz*8;
598        interface_ip.specific_tag        = 1;
599        interface_ip.tag_w               = tag;
600        interface_ip.access_mode         = 0;
601        interface_ip.throughput          = 1.0/clockRate;
602        interface_ip.latency             = 1.0/clockRate;
603        interface_ip.obj_func_dyn_energy = 0;
604        interface_ip.obj_func_dyn_power  = 0;
605        interface_ip.obj_func_leak_power = 0;
606        interface_ip.obj_func_cycle_t    = 1;
607        interface_ip.num_rw_ports       = 0;
608        interface_ip.num_rd_ports       = coredynp.fp_issueW;
609        interface_ip.num_wr_ports       = coredynp.fp_issueW;
610        interface_ip.num_se_rd_ports    = 0;
611                interface_ip.num_search_ports   = coredynp.fp_issueW;
612                fp_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty);
613                fp_inst_window->area.set_area(fp_inst_window->area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines);
614                area.set_area(area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines);
615                fp_Iw_height      =fp_inst_window->local_result.cache_ht;
616
617                if (XML->sys.core[ithCore].ROB_size >0)
618                {
619                        /*
620                         *  if ROB_size = 0, then the target processor does not support hardware-based
621                         *  speculation, i.e. , the processor allow OOO issue as well as OOO completion, which
622                         *  means branch must be resolved before instruction issued into instruction window, since
623                         *  there is no change to flush miss-predict branch path after instructions are issued in this situation.
624                         *
625                         *  ROB.ROB size = inflight inst. ROB is unified for int and fp inst.
626                         *  One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7.
627                         *  However, this approach is abandoned due to its high power and poor scalablility.
628                         *	McPAT uses current implementation of ROB as circular buffer.
629                         *	ROB is written once when instruction is issued and read once when the instruction is committed.         *
630                         */
631                        int robExtra = int(ceil(5 + log2(coredynp.num_hthreads)));
632                        //5 bits are: busy, Issued, Finished, speculative, valid
633                        if(coredynp.scheu_ty==PhysicalRegFile)
634                        {
635                                //PC is to id the instruction for recover exception.
636                                //inst is used to map the renamed dest. registers.so that commit stage can know which reg/RRAT to update
637//				data = int(ceil((robExtra+coredynp.pc_width +
638//						coredynp.instruction_length + 2*coredynp.phy_ireg_width)/8.0));
639                                data = int(ceil((robExtra+coredynp.pc_width +
640                                                        coredynp.phy_ireg_width)/8.0));
641                        }
642                        else
643                        {
644                                //in RS based OOO, ROB also contains value of destination reg
645//				data  = int(ceil((robExtra+coredynp.pc_width +
646//						coredynp.instruction_length + 2*coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0));
647                                data  = int(ceil((robExtra + coredynp.pc_width +
648                                                coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0));
649                        }
650                        interface_ip.is_cache			 = false;
651                        interface_ip.pure_cam            = false;
652                        interface_ip.pure_ram            = true;
653                        interface_ip.line_sz             = data;
654                        interface_ip.cache_sz            = data*XML->sys.core[ithCore].ROB_size;//The XML ROB size is for all threads
655                        interface_ip.assoc               = 1;
656                        interface_ip.nbanks              = 1;
657                        interface_ip.out_w               = interface_ip.line_sz*8;
658                        interface_ip.access_mode         = 1;
659                        interface_ip.throughput          = 1.0/clockRate;
660                        interface_ip.latency             = 1.0/clockRate;
661                        interface_ip.obj_func_dyn_energy = 0;
662                        interface_ip.obj_func_dyn_power  = 0;
663                        interface_ip.obj_func_leak_power = 0;
664                        interface_ip.obj_func_cycle_t    = 1;
665                        interface_ip.num_rw_ports       = 0;
666                        interface_ip.num_rd_ports       = coredynp.peak_commitW;
667                        interface_ip.num_wr_ports       = coredynp.peak_issueW;
668                        interface_ip.num_se_rd_ports    = 0;
669                        interface_ip.num_search_ports   = 0;
670                        ROB = new ArrayST(&interface_ip, "ReorderBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
671                        ROB->area.set_area(ROB->area.get_area()+ ROB->local_result.area*coredynp.num_pipelines);
672                        area.set_area(area.get_area()+ ROB->local_result.area*coredynp.num_pipelines);
673                        ROB_height      =ROB->local_result.cache_ht;
674                }
675
676                instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size,
677                                coredynp.peak_issueW, &interface_ip, Core_device, coredynp.core_ty);
678    }
679}
680
681LoadStoreU::LoadStoreU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
682:XML(XML_interface),
683 ithCore(ithCore_),
684 interface_ip(*interface_ip_),
685 coredynp(dyn_p_),
686 LSQ(0),
687 exist(exist_)
688{
689          if (!exist) return;
690          int  idx, tag, data, size, line, assoc, banks;
691          bool debug= false;
692          int ldst_opcode = XML->sys.core[ithCore].opcode_width;//16;
693
694          clockRate = coredynp.clockRate;
695          executionTime = coredynp.executionTime;
696          cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7];
697
698          interface_ip.num_search_ports    = XML->sys.core[ithCore].memory_ports;
699          interface_ip.is_cache			   = true;
700          interface_ip.pure_cam            = false;
701          interface_ip.pure_ram            = false;
702          //Dcache
703          size                             = (int)XML->sys.core[ithCore].dcache.dcache_config[0];
704          line                             = (int)XML->sys.core[ithCore].dcache.dcache_config[1];
705          assoc                            = (int)XML->sys.core[ithCore].dcache.dcache_config[2];
706          banks                            = (int)XML->sys.core[ithCore].dcache.dcache_config[3];
707          idx    					 	   = debug?9:int(ceil(log2(size/line/assoc)));
708          tag							   = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
709          interface_ip.specific_tag        = 1;
710          interface_ip.tag_w               = tag;
711          interface_ip.cache_sz            = debug?32768:(int)XML->sys.core[ithCore].dcache.dcache_config[0];
712          interface_ip.line_sz             = debug?64:(int)XML->sys.core[ithCore].dcache.dcache_config[1];
713          interface_ip.assoc               = debug?8:(int)XML->sys.core[ithCore].dcache.dcache_config[2];
714          interface_ip.nbanks              = debug?1:(int)XML->sys.core[ithCore].dcache.dcache_config[3];
715          interface_ip.out_w               = interface_ip.line_sz*8;
716          interface_ip.access_mode         = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5];
717          interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
718          interface_ip.latency             = debug?3.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
719          interface_ip.is_cache			 = true;
720          interface_ip.obj_func_dyn_energy = 0;
721          interface_ip.obj_func_dyn_power  = 0;
722          interface_ip.obj_func_leak_power = 0;
723          interface_ip.obj_func_cycle_t    = 1;
724          interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least.
725          interface_ip.num_rd_ports    = 0;
726          interface_ip.num_wr_ports    = 0;
727          interface_ip.num_se_rd_ports = 0;
728          dcache.caches = new ArrayST(&interface_ip, "dcache", Core_device, coredynp.opt_local, coredynp.core_ty);
729          dcache.area.set_area(dcache.area.get_area()+ dcache.caches->local_result.area);
730          area.set_area(area.get_area()+ dcache.caches->local_result.area);
731          //output_data_csv(dcache.caches.local_result);
732
733          //dCache controllers
734          //miss buffer
735          tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
736          data							   = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + dcache.caches->l_ip.line_sz*8;
737          interface_ip.specific_tag        = 1;
738          interface_ip.tag_w               = tag;
739          interface_ip.line_sz             = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
740          interface_ip.cache_sz            = XML->sys.core[ithCore].dcache.buffer_sizes[0]*interface_ip.line_sz;
741          interface_ip.assoc               = 0;
742          interface_ip.nbanks              = 1;
743          interface_ip.out_w               = interface_ip.line_sz*8;
744          interface_ip.access_mode         = 2;
745          interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
746          interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
747          interface_ip.obj_func_dyn_energy = 0;
748          interface_ip.obj_func_dyn_power  = 0;
749          interface_ip.obj_func_leak_power = 0;
750          interface_ip.obj_func_cycle_t    = 1;
751          interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
752          interface_ip.num_rd_ports    = 0;
753          interface_ip.num_wr_ports    = 0;
754          interface_ip.num_se_rd_ports = 0;
755          dcache.missb = new ArrayST(&interface_ip, "dcacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
756          dcache.area.set_area(dcache.area.get_area()+ dcache.missb->local_result.area);
757          area.set_area(area.get_area()+ dcache.missb->local_result.area);
758          //output_data_csv(dcache.missb.local_result);
759
760          //fill buffer
761          tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
762          data							   = dcache.caches->l_ip.line_sz;
763          interface_ip.specific_tag        = 1;
764          interface_ip.tag_w               = tag;
765          interface_ip.line_sz             = data;//int(pow(2.0,ceil(log2(data))));
766          interface_ip.cache_sz            = data*XML->sys.core[ithCore].dcache.buffer_sizes[1];
767          interface_ip.assoc               = 0;
768          interface_ip.nbanks              = 1;
769          interface_ip.out_w               = interface_ip.line_sz*8;
770          interface_ip.access_mode         = 2;
771          interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
772          interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
773          interface_ip.obj_func_dyn_energy = 0;
774          interface_ip.obj_func_dyn_power  = 0;
775          interface_ip.obj_func_leak_power = 0;
776          interface_ip.obj_func_cycle_t    = 1;
777          interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
778          interface_ip.num_rd_ports    = 0;
779          interface_ip.num_wr_ports    = 0;
780          interface_ip.num_se_rd_ports = 0;
781          dcache.ifb = new ArrayST(&interface_ip, "dcacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
782          dcache.area.set_area(dcache.area.get_area()+ dcache.ifb->local_result.area);
783          area.set_area(area.get_area()+ dcache.ifb->local_result.area);
784          //output_data_csv(dcache.ifb.local_result);
785
786          //prefetch buffer
787          tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
788          data							   = dcache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
789          interface_ip.specific_tag        = 1;
790          interface_ip.tag_w               = tag;
791          interface_ip.line_sz             = data;//int(pow(2.0,ceil(log2(data))));
792          interface_ip.cache_sz            = XML->sys.core[ithCore].dcache.buffer_sizes[2]*interface_ip.line_sz;
793          interface_ip.assoc               = 0;
794          interface_ip.nbanks              = 1;
795          interface_ip.out_w               = interface_ip.line_sz*8;
796          interface_ip.access_mode         = 2;
797          interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
798          interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
799          interface_ip.obj_func_dyn_energy = 0;
800          interface_ip.obj_func_dyn_power  = 0;
801          interface_ip.obj_func_leak_power = 0;
802          interface_ip.obj_func_cycle_t    = 1;
803          interface_ip.num_rw_ports    = debug?1:XML->sys.core[ithCore].memory_ports;;
804          interface_ip.num_rd_ports    = 0;
805          interface_ip.num_wr_ports    = 0;
806          interface_ip.num_se_rd_ports = 0;
807          dcache.prefetchb = new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
808          dcache.area.set_area(dcache.area.get_area()+ dcache.prefetchb->local_result.area);
809          area.set_area(area.get_area()+ dcache.prefetchb->local_result.area);
810          //output_data_csv(dcache.prefetchb.local_result);
811
812          //WBB
813
814          if (cache_p==Write_back)
815          {
816                  tag							   = XML->sys.physical_address_width + EXTRA_TAG_BITS;
817                  data							   = dcache.caches->l_ip.line_sz;
818                  interface_ip.specific_tag        = 1;
819                  interface_ip.tag_w               = tag;
820                  interface_ip.line_sz             = data;
821                  interface_ip.cache_sz            = XML->sys.core[ithCore].dcache.buffer_sizes[3]*interface_ip.line_sz;
822                  interface_ip.assoc               = 0;
823                  interface_ip.nbanks              = 1;
824                  interface_ip.out_w               = interface_ip.line_sz*8;
825                  interface_ip.access_mode         = 2;
826                  interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
827                  interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
828                  interface_ip.obj_func_dyn_energy = 0;
829                  interface_ip.obj_func_dyn_power  = 0;
830                  interface_ip.obj_func_leak_power = 0;
831                  interface_ip.obj_func_cycle_t    = 1;
832                  interface_ip.num_rw_ports    = XML->sys.core[ithCore].memory_ports;
833                  interface_ip.num_rd_ports    = 0;
834                  interface_ip.num_wr_ports    = 0;
835                  interface_ip.num_se_rd_ports = 0;
836                  dcache.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty);
837                  dcache.area.set_area(dcache.area.get_area()+ dcache.wbb->local_result.area);
838                  area.set_area(area.get_area()+ dcache.wbb->local_result.area);
839                  //output_data_csv(dcache.wbb.local_result);
840          }
841
842          /*
843           * LSU--in-order processors do not have separate load queue: unified lsq
844           * partitioned among threads
845           * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
846           */
847          tag							   = ldst_opcode+XML->sys.virtual_address_width +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + EXTRA_TAG_BITS;
848          data							   = XML->sys.machine_bits;
849          interface_ip.is_cache			   = true;
850          interface_ip.line_sz             = int(ceil(data/32.0))*4;
851          interface_ip.specific_tag        = 1;
852          interface_ip.tag_w               = tag;
853          interface_ip.cache_sz            = XML->sys.core[ithCore].store_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads;
854          interface_ip.assoc               = 0;
855          interface_ip.nbanks              = 1;
856          interface_ip.out_w               = interface_ip.line_sz*8;
857          interface_ip.access_mode         = 1;
858          interface_ip.throughput          = 1.0/clockRate;
859          interface_ip.latency             = 1.0/clockRate;
860          interface_ip.obj_func_dyn_energy = 0;
861          interface_ip.obj_func_dyn_power  = 0;
862          interface_ip.obj_func_leak_power = 0;
863          interface_ip.obj_func_cycle_t    = 1;
864          interface_ip.num_rw_ports        = 0;
865          interface_ip.num_rd_ports        = XML->sys.core[ithCore].memory_ports;
866          interface_ip.num_wr_ports        = XML->sys.core[ithCore].memory_ports;
867          interface_ip.num_se_rd_ports     = 0;
868          interface_ip.num_search_ports    =XML->sys.core[ithCore].memory_ports;
869          LSQ = new ArrayST(&interface_ip, "Load(Store)Queue", Core_device, coredynp.opt_local, coredynp.core_ty);
870          LSQ->area.set_area(LSQ->area.get_area()+ LSQ->local_result.area);
871          area.set_area(area.get_area()+ LSQ->local_result.area);
872          area.set_area(area.get_area()*cdb_overhead);
873          //output_data_csv(LSQ.LSQ.local_result);
874          lsq_height=LSQ->local_result.cache_ht*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/
875
876          if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
877          {
878                  interface_ip.line_sz             = int(ceil(data/32.0))*4;
879                  interface_ip.specific_tag        = 1;
880                  interface_ip.tag_w               = tag;
881                  interface_ip.cache_sz            = XML->sys.core[ithCore].load_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads;
882                  interface_ip.assoc               = 0;
883                  interface_ip.nbanks              = 1;
884                  interface_ip.out_w               = interface_ip.line_sz*8;
885                  interface_ip.access_mode         = 1;
886                  interface_ip.throughput          = 1.0/clockRate;
887                  interface_ip.latency             = 1.0/clockRate;
888                  interface_ip.obj_func_dyn_energy = 0;
889                  interface_ip.obj_func_dyn_power  = 0;
890                  interface_ip.obj_func_leak_power = 0;
891                  interface_ip.obj_func_cycle_t    = 1;
892                  interface_ip.num_rw_ports        = 0;
893                  interface_ip.num_rd_ports        = XML->sys.core[ithCore].memory_ports;
894                  interface_ip.num_wr_ports        = XML->sys.core[ithCore].memory_ports;
895                  interface_ip.num_se_rd_ports     = 0;
896                  interface_ip.num_search_ports    =XML->sys.core[ithCore].memory_ports;
897                  LoadQ = new ArrayST(&interface_ip, "LoadQueue", Core_device, coredynp.opt_local, coredynp.core_ty);
898                  LoadQ->area.set_area(LoadQ->area.get_area()+ LoadQ->local_result.area);
899                  area.set_area(area.get_area()+ LoadQ->local_result.area);
900                  area.set_area(area.get_area()*cdb_overhead);
901                  //output_data_csv(LoadQ.LoadQ.local_result);
902                  lsq_height=(LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht)*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/
903          }
904
905}
906
907MemManU::MemManU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
908:XML(XML_interface),
909 ithCore(ithCore_),
910 interface_ip(*interface_ip_),
911 coredynp(dyn_p_),
912 itlb(0),
913 dtlb(0),
914 exist(exist_)
915{
916          if (!exist) return;
917          int  tag, data;
918          bool debug= false;
919
920          clockRate = coredynp.clockRate;
921          executionTime = coredynp.executionTime;
922          interface_ip.is_cache			   = true;
923          interface_ip.pure_cam            = false;
924          interface_ip.pure_ram            = false;
925          interface_ip.specific_tag        = 1;
926          //Itlb TLBs are partioned among threads according to Nigara and Nehalem
927          tag							   = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS;
928          data							   = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size)));
929          interface_ip.tag_w               = tag;
930          interface_ip.line_sz             = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
931          interface_ip.cache_sz            = XML->sys.core[ithCore].itlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads;
932          interface_ip.assoc               = 0;
933          interface_ip.nbanks              = 1;
934          interface_ip.out_w               = interface_ip.line_sz*8;
935          interface_ip.access_mode         = 0;
936          interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
937          interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
938          interface_ip.obj_func_dyn_energy = 0;
939          interface_ip.obj_func_dyn_power  = 0;
940          interface_ip.obj_func_leak_power = 0;
941          interface_ip.obj_func_cycle_t    = 1;
942          interface_ip.num_rw_ports    = 0;
943          interface_ip.num_rd_ports    = 0;
944          interface_ip.num_wr_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
945          interface_ip.num_se_rd_ports = 0;
946          interface_ip.num_search_ports    = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
947          itlb = new ArrayST(&interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty);
948          itlb->area.set_area(itlb->area.get_area()+ itlb->local_result.area);
949          area.set_area(area.get_area()+ itlb->local_result.area);
950          //output_data_csv(itlb.tlb.local_result);
951
952          //dtlb
953          tag							   = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS;
954          data							   = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size)));
955          interface_ip.specific_tag        = 1;
956          interface_ip.tag_w               = tag;
957          interface_ip.line_sz             = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
958          interface_ip.cache_sz            = XML->sys.core[ithCore].dtlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads;
959          interface_ip.assoc               = 0;
960          interface_ip.nbanks              = 1;
961          interface_ip.out_w               = interface_ip.line_sz*8;
962          interface_ip.access_mode         = 0;
963          interface_ip.throughput          = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
964          interface_ip.latency             = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
965          interface_ip.obj_func_dyn_energy = 0;
966          interface_ip.obj_func_dyn_power  = 0;
967          interface_ip.obj_func_leak_power = 0;
968          interface_ip.obj_func_cycle_t    = 1;
969          interface_ip.num_rw_ports    = 0;
970          interface_ip.num_rd_ports    = 0;
971          interface_ip.num_wr_ports    = XML->sys.core[ithCore].memory_ports;
972          interface_ip.num_se_rd_ports = 0;
973          interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports;
974          dtlb = new ArrayST(&interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty);
975          dtlb->area.set_area(dtlb->area.get_area()+ dtlb->local_result.area);
976          area.set_area(area.get_area()+ dtlb->local_result.area);
977          //output_data_csv(dtlb.tlb.local_result);
978
979}
980
981RegFU::RegFU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
982:XML(XML_interface),
983 ithCore(ithCore_),
984 interface_ip(*interface_ip_),
985 coredynp(dyn_p_),
986 IRF (0),
987 FRF (0),
988 RFWIN (0),
989 exist(exist_)
990 {
991        /*
992         * processors have separate architectural register files for each thread.
993         * therefore, the bypass buses need to travel across all the register files.
994         */
995        if (!exist) return;
996        int  data;
997
998        clockRate = coredynp.clockRate;
999        executionTime = coredynp.executionTime;
1000        //**********************************IRF***************************************
1001        data							 = coredynp.int_data_width;
1002        interface_ip.is_cache			 = false;
1003        interface_ip.pure_cam            = false;
1004        interface_ip.pure_ram            = true;
1005        interface_ip.line_sz             = int(ceil(data/32.0))*4;
1006        interface_ip.cache_sz            = coredynp.num_IRF_entry*interface_ip.line_sz;
1007        interface_ip.assoc               = 1;
1008        interface_ip.nbanks              = 1;
1009        interface_ip.out_w               = interface_ip.line_sz*8;
1010        interface_ip.access_mode         = 1;
1011        interface_ip.throughput          = 1.0/clockRate;
1012        interface_ip.latency             = 1.0/clockRate;
1013        interface_ip.obj_func_dyn_energy = 0;
1014        interface_ip.obj_func_dyn_power  = 0;
1015        interface_ip.obj_func_leak_power = 0;
1016        interface_ip.obj_func_cycle_t    = 1;
1017        interface_ip.num_rw_ports    = 1;//this is the transfer port for saving/restoring states when exceptions happen.
1018        interface_ip.num_rd_ports    = 2*coredynp.peak_issueW;
1019        interface_ip.num_wr_ports    = coredynp.peak_issueW;
1020        interface_ip.num_se_rd_ports = 0;
1021        IRF = new ArrayST(&interface_ip, "Integer Register File", Core_device, coredynp.opt_local, coredynp.core_ty);
1022        IRF->area.set_area(IRF->area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead);
1023        area.set_area(area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead);
1024        //area.set_area(area.get_area()*cdb_overhead);
1025        //output_data_csv(IRF.RF.local_result);
1026
1027        //**********************************FRF***************************************
1028        data							 = coredynp.fp_data_width;
1029        interface_ip.is_cache			 = false;
1030        interface_ip.pure_cam            = false;
1031        interface_ip.pure_ram            = true;
1032        interface_ip.line_sz             = int(ceil(data/32.0))*4;
1033        interface_ip.cache_sz            = coredynp.num_FRF_entry*interface_ip.line_sz;
1034        interface_ip.assoc               = 1;
1035        interface_ip.nbanks              = 1;
1036        interface_ip.out_w               = interface_ip.line_sz*8;
1037        interface_ip.access_mode         = 1;
1038        interface_ip.throughput          = 1.0/clockRate;
1039        interface_ip.latency             = 1.0/clockRate;
1040        interface_ip.obj_func_dyn_energy = 0;
1041        interface_ip.obj_func_dyn_power  = 0;
1042        interface_ip.obj_func_leak_power = 0;
1043        interface_ip.obj_func_cycle_t    = 1;
1044        interface_ip.num_rw_ports    = 1;//this is the transfer port for saving/restoring states when exceptions happen.
1045        interface_ip.num_rd_ports    = 2*XML->sys.core[ithCore].issue_width;
1046        interface_ip.num_wr_ports    = XML->sys.core[ithCore].issue_width;
1047        interface_ip.num_se_rd_ports = 0;
1048        FRF = new ArrayST(&interface_ip, "Floating point Register File", Core_device, coredynp.opt_local, coredynp.core_ty);
1049        FRF->area.set_area(FRF->area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead);
1050        area.set_area(area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead);
1051        //area.set_area(area.get_area()*cdb_overhead);
1052        //output_data_csv(FRF.RF.local_result);
1053        int_regfile_height= IRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead);
1054        fp_regfile_height = FRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead);
1055    //since a EXU is associated with each pipeline, the cdb should not have longer length.
1056        if (coredynp.regWindowing)
1057        {
1058                //*********************************REG_WIN************************************
1059                data							 = coredynp.int_data_width; //ECC, and usually 2 regs are transfered together during window shifting.Niagara Mega cell
1060                interface_ip.is_cache			 = false;
1061                interface_ip.pure_cam            = false;
1062                interface_ip.pure_ram            = true;
1063                interface_ip.line_sz             = int(ceil(data/8.0));
1064                interface_ip.cache_sz            = XML->sys.core[ithCore].register_windows_size*IRF->l_ip.cache_sz*XML->sys.core[ithCore].number_hardware_threads;
1065                interface_ip.assoc               = 1;
1066                interface_ip.nbanks              = 1;
1067                interface_ip.out_w               = interface_ip.line_sz*8;
1068                interface_ip.access_mode         = 1;
1069                interface_ip.throughput          = 4.0/clockRate;
1070                interface_ip.latency             = 4.0/clockRate;
1071                interface_ip.obj_func_dyn_energy = 0;
1072                interface_ip.obj_func_dyn_power  = 0;
1073                interface_ip.obj_func_leak_power = 0;
1074                interface_ip.obj_func_cycle_t    = 1;
1075                interface_ip.num_rw_ports    = 1;//this is the transfer port for saving/restoring states when exceptions happen.
1076                interface_ip.num_rd_ports    = 0;
1077                interface_ip.num_wr_ports    = 0;
1078                interface_ip.num_se_rd_ports = 0;
1079                RFWIN = new ArrayST(&interface_ip, "RegWindow", Core_device, coredynp.opt_local, coredynp.core_ty);
1080                RFWIN->area.set_area(RFWIN->area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines);
1081                area.set_area(area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines);
1082                //output_data_csv(RFWIN.RF.local_result);
1083        }
1084
1085
1086 }
1087
1088EXECU::EXECU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_, const CoreDynParam & dyn_p_, bool exist_)
1089:XML(XML_interface),
1090 ithCore(ithCore_),
1091 interface_ip(*interface_ip_),
1092 lsq_height(lsq_height_),
1093 coredynp(dyn_p_),
1094 rfu(0),
1095 scheu(0),
1096 fp_u(0),
1097 exeu(0),
1098 mul(0),
1099 int_bypass(0),
1100 intTagBypass(0),
1101 int_mul_bypass(0),
1102 intTag_mul_Bypass(0),
1103 fp_bypass(0),
1104 fpTagBypass(0),
1105 exist(exist_)
1106{
1107          if (!exist) return;
1108          double fu_height = 0.0;
1109      clockRate = coredynp.clockRate;
1110      executionTime = coredynp.executionTime;
1111          rfu   = new RegFU(XML, ithCore, &interface_ip,coredynp);
1112          scheu = new SchedulerU(XML, ithCore, &interface_ip,coredynp);
1113          exeu  = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, ALU);
1114          area.set_area(area.get_area()+ exeu->area.get_area() + rfu->area.get_area() +scheu->area.get_area() );
1115          fu_height = exeu->FU_height;
1116          if (coredynp.num_fpus >0)
1117          {
1118                  fp_u  = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, FPU);
1119                  area.set_area(area.get_area()+ fp_u->area.get_area());
1120          }
1121          if (coredynp.num_muls >0)
1122          {
1123                  mul   = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, MUL);
1124                  area.set_area(area.get_area()+ mul->area.get_area());
1125                  fu_height +=  mul->FU_height;
1126          }
1127          /*
1128           * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; fp_tag-broadcast
1129           * integer by pass has two paths and fp has 3 paths.
1130           * on the same bus there are multiple tri-state drivers and muxes that go to different components on the same bus
1131           */
1132          if (XML->sys.Embedded)
1133                        {
1134                        interface_ip.wt                  =Global_30;
1135                        interface_ip.wire_is_mat_type = 0;
1136                        interface_ip.wire_os_mat_type = 0;
1137                    interface_ip.throughput       = 1.0/clockRate;
1138                    interface_ip.latency          = 1.0/clockRate;
1139                        }
1140                else
1141                        {
1142                        interface_ip.wt                  =Global;
1143                        interface_ip.wire_is_mat_type = 2;//start from semi-global since local wires are already used
1144                        interface_ip.wire_os_mat_type = 2;
1145                    interface_ip.throughput       = 10.0/clockRate; //Do not care
1146                    interface_ip.latency          = 10.0/clockRate;
1147                        }
1148
1149          if (coredynp.core_ty==Inorder)
1150          {
1151                  int_bypass   = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32),
1152                                  rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3,
1153                                  false, 1.0, coredynp.opt_local, coredynp.core_ty);
1154                  bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area());
1155                  intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
1156                                  rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
1157                                  false, 1.0, coredynp.opt_local, coredynp.core_ty);
1158                  bypass.area.set_area(bypass.area.get_area()  +intTagBypass->area.get_area());
1159
1160                  if (coredynp.num_muls>0)
1161                  {
1162                          int_mul_bypass     = new interconnect("Mul Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5),
1163                                          rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3,
1164                                          false, 1.0, coredynp.opt_local, coredynp.core_ty);
1165                          bypass.area.set_area(bypass.area.get_area()  +int_mul_bypass->area.get_area());
1166                          intTag_mul_Bypass  = new interconnect("Mul Bypass tag"  , Core_device, 1, 1, coredynp.perThreadState,
1167                                          rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
1168                                          false, 1.0, coredynp.opt_local, coredynp.core_ty);
1169                          bypass.area.set_area(bypass.area.get_area()  +intTag_mul_Bypass->area.get_area());
1170                  }
1171
1172                  if (coredynp.num_fpus>0)
1173                  {
1174                          fp_bypass    = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5),
1175                                          rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3,
1176                                          false, 1.0, coredynp.opt_local, coredynp.core_ty);
1177                          bypass.area.set_area(bypass.area.get_area()  +fp_bypass->area.get_area());
1178                          fpTagBypass  = new interconnect("FP Bypass tag"  , Core_device, 1, 1, coredynp.perThreadState,
1179                                          rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
1180                                          false, 1.0, coredynp.opt_local, coredynp.core_ty);
1181                          bypass.area.set_area(bypass.area.get_area()  +fpTagBypass->area.get_area());
1182                  }
1183          }
1184          else
1185          {//OOO
1186                  if (coredynp.scheu_ty==PhysicalRegFile)
1187                  {
1188                          /* For physical register based OOO,
1189                           * data broadcast interconnects cover across functional units, lsq, inst windows and register files,
1190                           * while tag broadcast interconnects also cover across ROB
1191                           */
1192                          int_bypass   = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
1193                                                    rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3,
1194                                                                false, 1.0, coredynp.opt_local, coredynp.core_ty);
1195                          bypass.area.set_area(bypass.area.get_area()  +int_bypass->area.get_area());
1196                          intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
1197                                                    rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
1198                                                                false, 1.0, coredynp.opt_local, coredynp.core_ty);
1199
1200                          if (coredynp.num_muls>0)
1201                          {
1202                                  int_mul_bypass   = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
1203                                                                                rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3,
1204                                                                                false, 1.0, coredynp.opt_local, coredynp.core_ty);
1205                                  intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
1206                                                                                rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
1207                                                                                false, 1.0, coredynp.opt_local, coredynp.core_ty);
1208                                  bypass.area.set_area(bypass.area.get_area()  +int_mul_bypass->area.get_area());
1209                                  bypass.area.set_area(bypass.area.get_area()  +intTag_mul_Bypass->area.get_area());
1210                          }
1211
1212                          if (coredynp.num_fpus>0)
1213                          {
1214                                  fp_bypass    = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)),
1215                                                                  rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3,
1216                                                                  false, 1.0, coredynp.opt_local, coredynp.core_ty);
1217                                  fpTagBypass  = new interconnect("FP Bypass tag"  , Core_device, 1, 1, coredynp.phy_freg_width,
1218                                                                  rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
1219                                                                  false, 1.0, coredynp.opt_local, coredynp.core_ty);
1220                                  bypass.area.set_area(bypass.area.get_area()  +fp_bypass->area.get_area());
1221                                  bypass.area.set_area(bypass.area.get_area()  +fpTagBypass->area.get_area());
1222                          }
1223                  }
1224                  else
1225                  {
1226             /*
1227              * In RS based processor both data and tag are broadcast together,
1228              * covering functional units, lsq, nst windows, register files, and ROBs
1229              */
1230                          int_bypass   = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
1231                                                    rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3,
1232                                                                  false, 1.0, coredynp.opt_local, coredynp.core_ty);
1233                          intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
1234                                                    rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
1235                                                                  false, 1.0, coredynp.opt_local, coredynp.core_ty);
1236                          bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area());
1237                          bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area());
1238                          if (coredynp.num_muls>0)
1239                          {
1240                                  int_mul_bypass   = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
1241                                                            rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3,
1242                                                                          false, 1.0, coredynp.opt_local, coredynp.core_ty);
1243                                  intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
1244                                                            rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
1245                                                                          false, 1.0, coredynp.opt_local, coredynp.core_ty);
1246                                  bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
1247                                  bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
1248                          }
1249
1250                          if (coredynp.num_fpus>0)
1251                          {
1252                                  fp_bypass    = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)),
1253                                                  rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
1254                                                  false, 1.0, coredynp.opt_local, coredynp.core_ty);
1255                                  fpTagBypass  = new interconnect("FP Bypass tag"  , Core_device, 1, 1, coredynp.phy_freg_width,
1256                                                  rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
1257                                                  false, 1.0, coredynp.opt_local, coredynp.core_ty);
1258                                  bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
1259                                  bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
1260                          }
1261                  }
1262
1263
1264          }
1265          area.set_area(area.get_area()+ bypass.area.get_area());
1266}
1267
1268RENAMINGU::RENAMINGU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
1269:XML(XML_interface),
1270 ithCore(ithCore_),
1271 interface_ip(*interface_ip_),
1272 coredynp(dyn_p_),
1273 iFRAT(0),
1274 fFRAT(0),
1275 iRRAT(0),
1276 fRRAT(0),
1277 ifreeL(0),
1278 ffreeL(0),
1279 idcl(0),
1280 fdcl(0),
1281 RAHT(0),
1282 exist(exist_)
1283 {
1284        /*
1285         * Although renaming logic maybe be used in in-order processors,
1286     * McPAT assumes no renaming logic is used since the performance gain is very limited and
1287     * the only major inorder processor with renaming logic is Itainium
1288     * that is a VLIW processor and different from current McPAT's model.
1289         * physical register base OOO must have Dual-RAT architecture or equivalent structure.FRAT:FrontRAT, RRAT:RetireRAT;
1290         * i,f prefix mean int and fp
1291         * RAT for all Renaming logic, random accessible checkpointing is used, but only update when instruction retires.
1292         * FRAT will be read twice and written once per instruction;
1293         * RRAT will be write once per instruction when committing and reads out all when context switch
1294         * checkpointing is implicit
1295         * Renaming logic is duplicated for each different hardware threads
1296         *
1297         * No Dual-RAT is needed in RS-based OOO processors,
1298         * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry,
1299         * to make sure all the renamings associated with the ROB to be released are updated at the same time.
1300         * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag,
1301         * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag,
1302         *
1303         * Both RAM and CAM have same DCL
1304         */
1305        if (!exist) return;
1306        int  tag, data, out_w;
1307//	interface_ip.wire_is_mat_type = 0;
1308//	interface_ip.wire_os_mat_type = 0;
1309//	interface_ip.wt               = Global_30;
1310        clockRate = coredynp.clockRate;
1311        executionTime = coredynp.executionTime;
1312    if (coredynp.core_ty==OOO)
1313    {
1314        //integer pipeline
1315        if (coredynp.scheu_ty==PhysicalRegFile)
1316        {
1317                if (coredynp.rm_ty ==RAMbased)
1318                {	  //FRAT with global checkpointing (GCs) please see paper tech report for detailed explaintions
1319                        data							 = 33;//int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));
1320//			data							 = int(ceil(coredynp.phy_ireg_width/8.0));
1321                        out_w                            = 1;//int(ceil(coredynp.phy_ireg_width/8.0));
1322                        interface_ip.is_cache			 = false;
1323                        interface_ip.pure_cam            = false;
1324                        interface_ip.pure_ram            = true;
1325                        interface_ip.line_sz             = data;
1326                        interface_ip.cache_sz            = data*XML->sys.core[ithCore].archi_Regs_IRF_size;
1327                        interface_ip.assoc               = 1;
1328                        interface_ip.nbanks              = 1;
1329                        interface_ip.out_w               = out_w*8;
1330                        interface_ip.access_mode         = 2;
1331                        interface_ip.throughput          = 1.0/clockRate;
1332                        interface_ip.latency             = 1.0/clockRate;
1333                        interface_ip.obj_func_dyn_energy = 0;
1334                        interface_ip.obj_func_dyn_power  = 0;
1335                        interface_ip.obj_func_leak_power = 0;
1336                        interface_ip.obj_func_cycle_t    = 1;
1337                        interface_ip.num_rw_ports    = 1;//the extra one port is for GCs
1338                        interface_ip.num_rd_ports    = 2*coredynp.decodeW;
1339                        interface_ip.num_wr_ports    = coredynp.decodeW;
1340                        interface_ip.num_se_rd_ports = 0;
1341                        iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
1342                        iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
1343                        area.set_area(area.get_area()+ iFRAT->area.get_area());
1344
1345//			//RAHT According to Intel, combine GC with FRAT is very costly.
1346//			data							 = int(ceil(coredynp.phy_ireg_width/8.0)*coredynp.num_IRF_entry);
1347//			out_w                            = data;
1348//			interface_ip.is_cache			 = false;
1349//			interface_ip.pure_cam            = false;
1350//			interface_ip.pure_ram            = true;
1351//			interface_ip.line_sz             = data;
1352//			interface_ip.cache_sz            = data*coredynp.globalCheckpoint;
1353//			interface_ip.assoc               = 1;
1354//			interface_ip.nbanks              = 1;
1355//			interface_ip.out_w               = out_w*8;
1356//			interface_ip.access_mode         = 0;
1357//			interface_ip.throughput          = 1.0/clockRate;
1358//			interface_ip.latency             = 1.0/clockRate;
1359//			interface_ip.obj_func_dyn_energy = 0;
1360//			interface_ip.obj_func_dyn_power  = 0;
1361//			interface_ip.obj_func_leak_power = 0;
1362//			interface_ip.obj_func_cycle_t    = 1;
1363//			interface_ip.num_rw_ports    = 1;//the extra one port is for GCs
1364//			interface_ip.num_rd_ports    = 2*coredynp.decodeW;
1365//			interface_ip.num_wr_ports    = coredynp.decodeW;
1366//			interface_ip.num_se_rd_ports = 0;
1367//			iFRAT = new ArrayST(&interface_ip, "Int FrontRAT");
1368//			iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
1369//			area.set_area(area.get_area()+ iFRAT->area.get_area());
1370
1371                        //FRAT floating point
1372                        data							 = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0));
1373                        out_w                            = int(ceil(coredynp.phy_freg_width/8.0));
1374                        interface_ip.is_cache			 = false;
1375                        interface_ip.pure_cam            = false;
1376                        interface_ip.pure_ram            = true;
1377                        interface_ip.line_sz             = data;
1378                        interface_ip.cache_sz            = data*XML->sys.core[ithCore].archi_Regs_FRF_size;
1379                        interface_ip.assoc               = 1;
1380                        interface_ip.nbanks              = 1;
1381                        interface_ip.out_w               = out_w*8;
1382                        interface_ip.access_mode         = 2;
1383                        interface_ip.throughput          = 1.0/clockRate;
1384                        interface_ip.latency             = 1.0/clockRate;
1385                        interface_ip.obj_func_dyn_energy = 0;
1386                        interface_ip.obj_func_dyn_power  = 0;
1387                        interface_ip.obj_func_leak_power = 0;
1388                        interface_ip.obj_func_cycle_t    = 1;
1389                        interface_ip.num_rw_ports    = 1;//the extra one port is for GCs
1390                        interface_ip.num_rd_ports    = 2*coredynp.fp_decodeW;
1391                        interface_ip.num_wr_ports    = coredynp.fp_decodeW;
1392                        interface_ip.num_se_rd_ports = 0;
1393                        fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
1394                        fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
1395                        area.set_area(area.get_area()+ fFRAT->area.get_area());
1396
1397                }
1398                else if ((coredynp.rm_ty ==CAMbased))
1399                {
1400                        //FRAT
1401                        tag							     = coredynp.arch_ireg_width;
1402                        data							 = int(ceil ((coredynp.arch_ireg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out
1403                        out_w                            = int(ceil (coredynp.arch_ireg_width/8.0));
1404                        interface_ip.is_cache			 = true;
1405                        interface_ip.pure_cam            = false;
1406                        interface_ip.pure_ram            = false;
1407                        interface_ip.line_sz             = data;
1408                        interface_ip.cache_sz            = data*XML->sys.core[ithCore].phy_Regs_IRF_size;
1409                        interface_ip.assoc               = 0;
1410                        interface_ip.nbanks              = 1;
1411                        interface_ip.out_w               = out_w*8;
1412                        interface_ip.specific_tag        = 1;
1413                        interface_ip.tag_w               = tag;
1414                        interface_ip.access_mode         = 2;
1415                        interface_ip.throughput          = 1.0/clockRate;
1416                        interface_ip.latency             = 1.0/clockRate;
1417                        interface_ip.obj_func_dyn_energy = 0;
1418                        interface_ip.obj_func_dyn_power  = 0;
1419                        interface_ip.obj_func_leak_power = 0;
1420                        interface_ip.obj_func_cycle_t    = 1;
1421                        interface_ip.num_rw_ports    = 1;//for GCs
1422                        interface_ip.num_rd_ports    = coredynp.decodeW;
1423                        interface_ip.num_wr_ports    = coredynp.decodeW;
1424                        interface_ip.num_se_rd_ports = 0;
1425                        interface_ip.num_search_ports= 2*coredynp.decodeW;
1426                        iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
1427                        iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
1428                        area.set_area(area.get_area()+ iFRAT->area.get_area());
1429
1430                        //FRAT for FP
1431                        tag							     = coredynp.arch_freg_width;
1432                        data							 = int(ceil ((coredynp.arch_freg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out
1433                        out_w                            = int(ceil (coredynp.arch_freg_width/8.0));
1434                        interface_ip.is_cache			 = true;
1435                        interface_ip.pure_cam            = false;
1436                        interface_ip.pure_ram            = false;
1437                        interface_ip.line_sz             = data;
1438                        interface_ip.cache_sz            = data*XML->sys.core[ithCore].phy_Regs_FRF_size;
1439                        interface_ip.assoc               = 0;
1440                        interface_ip.nbanks              = 1;
1441                        interface_ip.out_w               = out_w*8;
1442                        interface_ip.specific_tag        = 1;
1443                        interface_ip.tag_w               = tag;
1444                        interface_ip.access_mode         = 2;
1445                        interface_ip.throughput          = 1.0/clockRate;
1446                        interface_ip.latency             = 1.0/clockRate;
1447                        interface_ip.obj_func_dyn_energy = 0;
1448                        interface_ip.obj_func_dyn_power  = 0;
1449                        interface_ip.obj_func_leak_power = 0;
1450                        interface_ip.obj_func_cycle_t    = 1;
1451                        interface_ip.num_rw_ports    = 1;//for GCs
1452                        interface_ip.num_rd_ports    = coredynp.fp_decodeW;
1453                        interface_ip.num_wr_ports    = coredynp.fp_decodeW;
1454                        interface_ip.num_se_rd_ports = 0;
1455                        interface_ip.num_search_ports= 2*coredynp.fp_decodeW;
1456                        fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
1457                        fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
1458                        area.set_area(area.get_area()+ fFRAT->area.get_area());
1459
1460                }
1461
1462                //RRAT is always RAM based, does not have GCs, and is used only for record latest non-speculative mapping
1463                data							 = int(ceil(coredynp.phy_ireg_width/8.0));
1464                interface_ip.is_cache			 = false;
1465                interface_ip.pure_cam            = false;
1466                interface_ip.pure_ram            = true;
1467                interface_ip.line_sz             = data;
1468                interface_ip.cache_sz            = data*XML->sys.core[ithCore].archi_Regs_IRF_size*2;//HACK to make it as least 64B
1469                interface_ip.assoc               = 1;
1470                interface_ip.nbanks              = 1;
1471                interface_ip.out_w               = interface_ip.line_sz*8;
1472                interface_ip.access_mode         = 1;
1473                interface_ip.throughput          = 1.0/clockRate;
1474                interface_ip.latency             = 1.0/clockRate;
1475                interface_ip.obj_func_dyn_energy = 0;
1476                interface_ip.obj_func_dyn_power  = 0;
1477                interface_ip.obj_func_leak_power = 0;
1478                interface_ip.obj_func_cycle_t    = 1;
1479                interface_ip.num_rw_ports    = 0;
1480                interface_ip.num_rd_ports    = XML->sys.core[ithCore].commit_width;
1481                interface_ip.num_wr_ports    = XML->sys.core[ithCore].commit_width;
1482                interface_ip.num_se_rd_ports = 0;
1483                iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
1484                iRRAT->area.set_area(iRRAT->area.get_area()+ iRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
1485                area.set_area(area.get_area()+ iRRAT->area.get_area());
1486
1487                //RRAT for FP
1488                data							 = int(ceil(coredynp.phy_freg_width/8.0));
1489                interface_ip.is_cache			 = false;
1490                interface_ip.pure_cam            = false;
1491                interface_ip.pure_ram            = true;
1492                interface_ip.line_sz             = data;
1493                interface_ip.cache_sz            = data*XML->sys.core[ithCore].archi_Regs_FRF_size*2;//HACK to make it as least 64B
1494                interface_ip.assoc               = 1;
1495                interface_ip.nbanks              = 1;
1496                interface_ip.out_w               = interface_ip.line_sz*8;
1497                interface_ip.access_mode         = 1;
1498                interface_ip.throughput          = 1.0/clockRate;
1499                interface_ip.latency             = 1.0/clockRate;
1500                interface_ip.obj_func_dyn_energy = 0;
1501                interface_ip.obj_func_dyn_power  = 0;
1502                interface_ip.obj_func_leak_power = 0;
1503                interface_ip.obj_func_cycle_t    = 1;
1504                interface_ip.num_rw_ports    = 0;
1505                interface_ip.num_rd_ports    = coredynp.fp_decodeW;
1506                interface_ip.num_wr_ports    = coredynp.fp_decodeW;
1507                interface_ip.num_se_rd_ports = 0;
1508                fRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
1509                fRRAT->area.set_area(fRRAT->area.get_area()+ fRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
1510                area.set_area(area.get_area()+ fRRAT->area.get_area());
1511
1512                //Freelist of renaming unit always RAM based
1513                //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist
1514                // 2)When instruction commits the Phyregisters/ROB needed to be recycled.
1515                //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width
1516                data							 = int(ceil(coredynp.phy_ireg_width/8.0));
1517                interface_ip.is_cache			 = false;
1518                interface_ip.pure_cam            = false;
1519                interface_ip.pure_ram            = true;
1520                interface_ip.line_sz             = data;
1521                interface_ip.cache_sz            = data*coredynp.num_ifreelist_entries;
1522                interface_ip.assoc               = 1;
1523                interface_ip.nbanks              = 1;
1524                interface_ip.out_w               = interface_ip.line_sz*8;
1525                interface_ip.access_mode         = 1;
1526                interface_ip.throughput          = 1.0/clockRate;
1527                interface_ip.latency             = 1.0/clockRate;
1528                interface_ip.obj_func_dyn_energy = 0;
1529                interface_ip.obj_func_dyn_power  = 0;
1530                interface_ip.obj_func_leak_power = 0;
1531                interface_ip.obj_func_cycle_t    = 1;
1532                interface_ip.num_rw_ports    = 1;//TODO
1533                interface_ip.num_rd_ports    = coredynp.decodeW;
1534                interface_ip.num_wr_ports    = coredynp.decodeW -1 + XML->sys.core[ithCore].commit_width;
1535                //every cycle, (coredynp.decodeW -1) inst may need to send back it dest tags, committW insts needs to update freelist buffers
1536                interface_ip.num_se_rd_ports = 0;
1537                ifreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
1538                ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
1539                area.set_area(area.get_area()+ ifreeL->area.get_area());
1540
1541                //freelist for FP
1542                data							 = int(ceil(coredynp.phy_freg_width/8.0));
1543                interface_ip.is_cache			 = false;
1544                interface_ip.pure_cam            = false;
1545                interface_ip.pure_ram            = true;
1546                interface_ip.line_sz             = data;
1547                interface_ip.cache_sz            = data*coredynp.num_ffreelist_entries;
1548                interface_ip.assoc               = 1;
1549                interface_ip.nbanks              = 1;
1550                interface_ip.out_w               = interface_ip.line_sz*8;
1551                interface_ip.access_mode         = 1;
1552                interface_ip.throughput          = 1.0/clockRate;
1553                interface_ip.latency             = 1.0/clockRate;
1554                interface_ip.obj_func_dyn_energy = 0;
1555                interface_ip.obj_func_dyn_power  = 0;
1556                interface_ip.obj_func_leak_power = 0;
1557                interface_ip.obj_func_cycle_t    = 1;
1558                interface_ip.num_rw_ports    = 1;
1559                interface_ip.num_rd_ports    = coredynp.fp_decodeW;
1560                interface_ip.num_wr_ports    = coredynp.fp_decodeW -1 + XML->sys.core[ithCore].commit_width;
1561                interface_ip.num_se_rd_ports = 0;
1562                ffreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
1563                ffreeL->area.set_area(ffreeL->area.get_area()+ ffreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
1564                area.set_area(area.get_area()+ ffreeL->area.get_area());
1565
1566                idcl  = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
1567                fdcl  = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
1568
1569        }
1570        else if (coredynp.scheu_ty==ReservationStation){
1571                if (coredynp.rm_ty ==RAMbased){
1572                        /*
1573                         * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry,
1574                         * to make sure all the renamings associated with the ROB to be released are updated to ARF at the same time.
1575                         * RAM based RAT for RS base OOO does not save the search operations. Its advantage is to have less entries than
1576                         * CAM based RAT so that it is more scalable as number of ROB/physical regs increases.
1577                         */
1578                        tag							     = coredynp.phy_ireg_width;
1579                        data							 = int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));
1580                        out_w                            = int(ceil(coredynp.phy_ireg_width/8.0));
1581                        interface_ip.is_cache			 = true;
1582                        interface_ip.pure_cam            = false;
1583                        interface_ip.pure_ram            = false;
1584                        interface_ip.line_sz             = data;
1585                        interface_ip.cache_sz            = data*XML->sys.core[ithCore].archi_Regs_IRF_size;
1586                        interface_ip.assoc               = 0;
1587                        interface_ip.nbanks              = 1;
1588                        interface_ip.out_w               = out_w*8;
1589                        interface_ip.access_mode         = 2;
1590                        interface_ip.throughput          = 1.0/clockRate;
1591                        interface_ip.latency             = 1.0/clockRate;
1592                        interface_ip.obj_func_dyn_energy = 0;
1593                        interface_ip.obj_func_dyn_power  = 0;
1594                        interface_ip.obj_func_leak_power = 0;
1595                        interface_ip.obj_func_cycle_t    = 1;
1596                        interface_ip.num_rw_ports    = 1;//the extra one port is for GCs
1597                        interface_ip.num_rd_ports    = 2*coredynp.decodeW;
1598                        interface_ip.num_wr_ports    = coredynp.decodeW;
1599                        interface_ip.num_se_rd_ports = 0;
1600                        interface_ip.num_search_ports= coredynp.commitW;//TODO
1601                        iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
1602                        iFRAT->local_result.adjust_area();
1603                        iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
1604                        area.set_area(area.get_area()+ iFRAT->area.get_area());
1605
1606                        //FP
1607                        tag							     = coredynp.phy_freg_width;
1608                        data							 = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0));
1609                        out_w                            = int(ceil(coredynp.phy_freg_width/8.0));
1610                        interface_ip.is_cache			 = true;
1611                        interface_ip.pure_cam            = false;
1612                        interface_ip.pure_ram            = false;
1613                        interface_ip.line_sz             = data;
1614                        interface_ip.cache_sz            = data*XML->sys.core[ithCore].archi_Regs_FRF_size;
1615                        interface_ip.assoc               = 0;
1616                        interface_ip.nbanks              = 1;
1617                        interface_ip.out_w               = out_w*8;
1618                        interface_ip.access_mode         = 2;
1619                        interface_ip.throughput          = 1.0/clockRate;
1620                        interface_ip.latency             = 1.0/clockRate;
1621                        interface_ip.obj_func_dyn_energy = 0;
1622                        interface_ip.obj_func_dyn_power  = 0;
1623                        interface_ip.obj_func_leak_power = 0;
1624                        interface_ip.obj_func_cycle_t    = 1;
1625                        interface_ip.num_rw_ports    = 1;//the extra one port is for GCs
1626                        interface_ip.num_rd_ports    = 2*coredynp.fp_decodeW;
1627                        interface_ip.num_wr_ports    = coredynp.fp_decodeW;
1628                        interface_ip.num_se_rd_ports = 0;
1629                        interface_ip.num_search_ports= coredynp.fp_decodeW;//actually is fp commit width
1630                        fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
1631                        fFRAT->local_result.adjust_area();
1632                        fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
1633                        area.set_area(area.get_area()+ fFRAT->area.get_area());
1634
1635                }
1636                else if ((coredynp.rm_ty ==CAMbased))
1637                {
1638                        //FRAT
1639                        tag							     = coredynp.arch_ireg_width;
1640                        data							 = int(ceil (coredynp.arch_ireg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out
1641                        out_w                            = int(ceil (coredynp.arch_ireg_width/8.0));
1642                        interface_ip.is_cache			 = true;
1643                        interface_ip.pure_cam            = false;
1644                        interface_ip.pure_ram            = false;
1645                        interface_ip.line_sz             = data;
1646                        interface_ip.cache_sz            = data*XML->sys.core[ithCore].phy_Regs_IRF_size;
1647                        interface_ip.assoc               = 0;
1648                        interface_ip.nbanks              = 1;
1649                        interface_ip.out_w               = out_w*8;
1650                        interface_ip.specific_tag        = 1;
1651                        interface_ip.tag_w               = tag;
1652                        interface_ip.access_mode         = 2;
1653                        interface_ip.throughput          = 1.0/clockRate;
1654                        interface_ip.latency             = 1.0/clockRate;
1655                        interface_ip.obj_func_dyn_energy = 0;
1656                        interface_ip.obj_func_dyn_power  = 0;
1657                        interface_ip.obj_func_leak_power = 0;
1658                        interface_ip.obj_func_cycle_t    = 1;
1659                        interface_ip.num_rw_ports    = 1;//for GCs
1660                        interface_ip.num_rd_ports    = XML->sys.core[ithCore].decode_width;//0;TODO
1661                        interface_ip.num_wr_ports    = XML->sys.core[ithCore].decode_width;
1662                        interface_ip.num_se_rd_ports = 0;
1663                        interface_ip.num_search_ports= 2*XML->sys.core[ithCore].decode_width;
1664                        iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
1665                        iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
1666                        area.set_area(area.get_area()+ iFRAT->area.get_area());
1667
1668                        //FRAT
1669                        tag							     = coredynp.arch_freg_width;
1670                        data							 = int(ceil (coredynp.arch_freg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out
1671                        out_w                            = int(ceil (coredynp.arch_freg_width/8.0));
1672                        interface_ip.is_cache			 = true;
1673                        interface_ip.pure_cam            = false;
1674                        interface_ip.pure_ram            = false;
1675                        interface_ip.line_sz             = data;
1676                        interface_ip.cache_sz            = data*XML->sys.core[ithCore].phy_Regs_FRF_size;
1677                        interface_ip.assoc               = 0;
1678                        interface_ip.nbanks              = 1;
1679                        interface_ip.out_w               = out_w*8;
1680                        interface_ip.specific_tag        = 1;
1681                        interface_ip.tag_w               = tag;
1682                        interface_ip.access_mode         = 2;
1683                        interface_ip.throughput          = 1.0/clockRate;
1684                        interface_ip.latency             = 1.0/clockRate;
1685                        interface_ip.obj_func_dyn_energy = 0;
1686                        interface_ip.obj_func_dyn_power  = 0;
1687                        interface_ip.obj_func_leak_power = 0;
1688                        interface_ip.obj_func_cycle_t    = 1;
1689                        interface_ip.num_rw_ports    = 1;//for GCs
1690                        interface_ip.num_rd_ports    = XML->sys.core[ithCore].decode_width;//0;TODO;
1691                        interface_ip.num_wr_ports    = coredynp.fp_decodeW;
1692                        interface_ip.num_se_rd_ports = 0;
1693                        interface_ip.num_search_ports= 2*coredynp.fp_decodeW;
1694                        fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
1695                        fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
1696                        area.set_area(area.get_area()+ fFRAT->area.get_area());
1697
1698                }
1699                //No RRAT for RS based OOO
1700                //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified
1701                data							 = int(ceil(coredynp.phy_ireg_width/8.0));
1702                interface_ip.is_cache			 = false;
1703                interface_ip.pure_cam            = false;
1704                interface_ip.pure_ram            = true;
1705                interface_ip.line_sz             = data;
1706                interface_ip.cache_sz            = data*coredynp.num_ifreelist_entries;
1707                interface_ip.assoc               = 1;
1708                interface_ip.nbanks              = 1;
1709                interface_ip.out_w               = interface_ip.line_sz*8;
1710                interface_ip.access_mode         = 1;
1711                interface_ip.throughput          = 1.0/clockRate;
1712                interface_ip.latency             = 1.0/clockRate;
1713                interface_ip.obj_func_dyn_energy = 0;
1714                interface_ip.obj_func_dyn_power  = 0;
1715                interface_ip.obj_func_leak_power = 0;
1716                interface_ip.obj_func_cycle_t    = 1;
1717                interface_ip.num_rw_ports    = 1;//TODO
1718                interface_ip.num_rd_ports    = XML->sys.core[ithCore].decode_width;
1719                interface_ip.num_wr_ports    = XML->sys.core[ithCore].decode_width -1 + XML->sys.core[ithCore].commit_width;
1720                interface_ip.num_se_rd_ports = 0;
1721                ifreeL = new ArrayST(&interface_ip, "Unified Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
1722                ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
1723                area.set_area(area.get_area()+ ifreeL->area.get_area());
1724
1725                idcl  = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
1726                fdcl  = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
1727        }
1728
1729}
1730    if (coredynp.core_ty==Inorder&& coredynp.issueW>1)
1731    {
1732          /* Dependency check logic will only present when decode(issue) width>1.
1733          *  Multiple issue in order processor can do without renaming, but dcl is a must.
1734          */
1735        idcl  = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
1736        fdcl  = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
1737    }
1738}
1739
1740Core::Core(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_)
1741:XML(XML_interface),
1742 ithCore(ithCore_),
1743 interface_ip(*interface_ip_),
1744 ifu  (0),
1745 lsu  (0),
1746 mmu  (0),
1747 exu  (0),
1748 rnu  (0),
1749 corepipe (0),
1750 undiffCore (0),
1751 l2cache (0)
1752{
1753 /*
1754  * initialize, compute and optimize individual components.
1755  */
1756
1757  double pipeline_area_per_unit;
1758  if (XML->sys.Private_L2)
1759  {
1760          l2cache = new SharedCache(XML,ithCore, &interface_ip);
1761
1762  }
1763//  interface_ip.wire_is_mat_type = 2;
1764//  interface_ip.wire_os_mat_type = 2;
1765//  interface_ip.wt               =Global_30;
1766  set_core_param();
1767  clockRate = coredynp.clockRate;
1768  executionTime = coredynp.executionTime;
1769  ifu          = new InstFetchU(XML, ithCore, &interface_ip,coredynp);
1770  lsu          = new LoadStoreU(XML, ithCore, &interface_ip,coredynp);
1771  mmu          = new MemManU   (XML, ithCore, &interface_ip,coredynp);
1772  exu          = new EXECU     (XML, ithCore, &interface_ip,lsu->lsq_height, coredynp);
1773  undiffCore   = new UndiffCore(XML, ithCore, &interface_ip,coredynp);
1774  if (coredynp.core_ty==OOO)
1775  {
1776          rnu = new RENAMINGU(XML, ithCore, &interface_ip,coredynp);
1777  }
1778  corepipe = new Pipeline(&interface_ip,coredynp);
1779
1780  if (coredynp.core_ty==OOO)
1781  {
1782          pipeline_area_per_unit    = (corepipe->area.get_area()*coredynp.num_pipelines)/5.0;
1783          if (rnu->exist)
1784          {
1785                  rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit);
1786          }
1787  }
1788  else {
1789          pipeline_area_per_unit    = (corepipe->area.get_area()*coredynp.num_pipelines)/4.0;
1790  }
1791
1792  //area.set_area(area.get_area()+ corepipe->area.get_area());
1793  if (ifu->exist)
1794  {
1795          ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit);
1796          area.set_area(area.get_area() + ifu->area.get_area());
1797  }
1798  if (lsu->exist)
1799  {
1800          lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit);
1801      area.set_area(area.get_area() + lsu->area.get_area());
1802  }
1803  if (exu->exist)
1804  {
1805          exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit);
1806          area.set_area(area.get_area()+exu->area.get_area());
1807  }
1808  if (mmu->exist)
1809  {
1810          mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit);
1811      area.set_area(area.get_area()+mmu->area.get_area());
1812  }
1813
1814  if (coredynp.core_ty==OOO)
1815  {
1816          if (rnu->exist)
1817          {
1818
1819                  area.set_area(area.get_area() + rnu->area.get_area());
1820          }
1821  }
1822
1823  if (undiffCore->exist)
1824  {
1825          area.set_area(area.get_area() + undiffCore->area.get_area());
1826  }
1827
1828  if (XML->sys.Private_L2)
1829  {
1830          area.set_area(area.get_area() + l2cache->area.get_area());
1831
1832  }
1833//  //clock power
1834//  clockNetwork.init_wire_external(is_default, &interface_ip);
1835//  clockNetwork.clk_area           =area*1.1;//10% of placement overhead. rule of thumb
1836//  clockNetwork.end_wiring_level   =5;//toplevel metal
1837//  clockNetwork.start_wiring_level =5;//toplevel metal
1838//  clockNetwork.num_regs           = corepipe.tot_stage_vector;
1839//  clockNetwork.optimize_wire();
1840}
1841
1842
1843void BranchPredictor::computeEnergy(bool is_tdp)
1844{
1845        if (!exist) return;
1846        double r_access;
1847        double w_access;
1848        if (is_tdp)
1849    {
1850        r_access = coredynp.predictionW*coredynp.BR_duty_cycle;
1851        w_access = 0*coredynp.BR_duty_cycle;
1852        globalBPT->stats_t.readAc.access  = r_access;
1853        globalBPT->stats_t.writeAc.access = w_access;
1854        globalBPT->tdp_stats = globalBPT->stats_t;
1855
1856        L1_localBPT->stats_t.readAc.access  = r_access;
1857        L1_localBPT->stats_t.writeAc.access = w_access;
1858        L1_localBPT->tdp_stats = L1_localBPT->stats_t;
1859
1860        L2_localBPT->stats_t.readAc.access  = r_access;
1861        L2_localBPT->stats_t.writeAc.access = w_access;
1862        L2_localBPT->tdp_stats = L2_localBPT->stats_t;
1863
1864        chooser->stats_t.readAc.access  = r_access;
1865        chooser->stats_t.writeAc.access = w_access;
1866        chooser->tdp_stats = chooser->stats_t;
1867
1868        RAS->stats_t.readAc.access  = r_access;
1869        RAS->stats_t.writeAc.access = w_access;
1870        RAS->tdp_stats = RAS->stats_t;
1871    }
1872    else
1873    {
1874        //The resolution of BPT accesses is coarse, but this is
1875        //because most simulators cannot track finer grained details
1876        r_access = XML->sys.core[ithCore].branch_instructions;
1877        w_access = XML->sys.core[ithCore].branch_mispredictions + 0.1*XML->sys.core[ithCore].branch_instructions;//10% of BR will flip internal bits//0
1878        globalBPT->stats_t.readAc.access  = r_access;
1879        globalBPT->stats_t.writeAc.access = w_access;
1880        globalBPT->rtp_stats = globalBPT->stats_t;
1881
1882        L1_localBPT->stats_t.readAc.access  = r_access;
1883        L1_localBPT->stats_t.writeAc.access = w_access;
1884        L1_localBPT->rtp_stats = L1_localBPT->stats_t;
1885
1886        L2_localBPT->stats_t.readAc.access  = r_access;
1887        L2_localBPT->stats_t.writeAc.access = w_access;
1888        L2_localBPT->rtp_stats = L2_localBPT->stats_t;
1889
1890        chooser->stats_t.readAc.access  = r_access;
1891        chooser->stats_t.writeAc.access = w_access;
1892        chooser->rtp_stats = chooser->stats_t;
1893
1894        RAS->stats_t.readAc.access  = XML->sys.core[ithCore].function_calls;
1895        RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls;
1896        RAS->rtp_stats = RAS->stats_t;
1897   }
1898
1899        globalBPT->power_t.reset();
1900        L1_localBPT->power_t.reset();
1901        L2_localBPT->power_t.reset();
1902        chooser->power_t.reset();
1903        RAS->power_t.reset();
1904
1905    globalBPT->power_t.readOp.dynamic   +=  globalBPT->local_result.power.readOp.dynamic*globalBPT->stats_t.readAc.access +
1906                globalBPT->stats_t.writeAc.access*globalBPT->local_result.power.writeOp.dynamic;
1907    L1_localBPT->power_t.readOp.dynamic   +=  L1_localBPT->local_result.power.readOp.dynamic*L1_localBPT->stats_t.readAc.access +
1908                L1_localBPT->stats_t.writeAc.access*L1_localBPT->local_result.power.writeOp.dynamic;
1909
1910    L2_localBPT->power_t.readOp.dynamic   +=  L2_localBPT->local_result.power.readOp.dynamic*L2_localBPT->stats_t.readAc.access +
1911                L2_localBPT->stats_t.writeAc.access*L2_localBPT->local_result.power.writeOp.dynamic;
1912
1913    chooser->power_t.readOp.dynamic   +=  chooser->local_result.power.readOp.dynamic*chooser->stats_t.readAc.access +
1914                chooser->stats_t.writeAc.access*chooser->local_result.power.writeOp.dynamic;
1915    RAS->power_t.readOp.dynamic   +=  RAS->local_result.power.readOp.dynamic*RAS->stats_t.readAc.access +
1916                RAS->stats_t.writeAc.access*RAS->local_result.power.writeOp.dynamic;
1917
1918    if (is_tdp)
1919    {
1920        globalBPT->power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg;
1921        L1_localBPT->power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg;
1922        L2_localBPT->power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg;
1923        chooser->power = chooser->power_t + chooser->local_result.power*pppm_lkg;
1924        RAS->power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread;
1925
1926        power = power + globalBPT->power + L1_localBPT->power + chooser->power + RAS->power;
1927    }
1928    else
1929    {
1930        globalBPT->rt_power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg;
1931        L1_localBPT->rt_power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg;
1932        L2_localBPT->rt_power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg;
1933        chooser->rt_power = chooser->power_t + chooser->local_result.power*pppm_lkg;
1934        RAS->rt_power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread;
1935        rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + chooser->rt_power + RAS->rt_power;
1936    }
1937}
1938
1939void BranchPredictor::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
1940{
1941        if (!exist) return;
1942        string indent_str(indent, ' ');
1943        string indent_str_next(indent+2, ' ');
1944        bool long_channel = XML->sys.longer_channel_device;
1945        if (is_tdp)
1946        {
1947                cout << indent_str<< "Global Predictor:" << endl;
1948                cout << indent_str_next << "Area = " << globalBPT->area.get_area()*1e-6<< " mm^2" << endl;
1949                cout << indent_str_next << "Peak Dynamic = " << globalBPT->power.readOp.dynamic*clockRate << " W" << endl;
1950                cout << indent_str_next << "Subthreshold Leakage = "
1951                        << (long_channel? globalBPT->power.readOp.longer_channel_leakage:globalBPT->power.readOp.leakage) <<" W" << endl;
1952                cout << indent_str_next << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" << endl;
1953                cout << indent_str_next << "Runtime Dynamic = " << globalBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
1954                cout <<endl;
1955                cout << indent_str << "Local Predictor:" << endl;
1956                cout << indent_str << "L1_Local Predictor:" << endl;
1957                cout << indent_str_next << "Area = " << L1_localBPT->area.get_area() *1e-6 << " mm^2" << endl;
1958                cout << indent_str_next << "Peak Dynamic = " << L1_localBPT->power.readOp.dynamic*clockRate  << " W" << endl;
1959                cout << indent_str_next << "Subthreshold Leakage = "
1960                        << (long_channel? L1_localBPT->power.readOp.longer_channel_leakage:L1_localBPT->power.readOp.leakage)  << " W" << endl;
1961                cout << indent_str_next << "Gate Leakage = " << L1_localBPT->power.readOp.gate_leakage  << " W" << endl;
1962                cout << indent_str_next << "Runtime Dynamic = " << L1_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
1963                cout <<endl;
1964                cout << indent_str << "L2_Local Predictor:" << endl;
1965                cout << indent_str_next << "Area = " << L2_localBPT->area.get_area() *1e-6 << " mm^2" << endl;
1966                cout << indent_str_next << "Peak Dynamic = " << L2_localBPT->power.readOp.dynamic*clockRate  << " W" << endl;
1967                cout << indent_str_next << "Subthreshold Leakage = "
1968                        << (long_channel? L2_localBPT->power.readOp.longer_channel_leakage:L2_localBPT->power.readOp.leakage)  << " W" << endl;
1969                cout << indent_str_next << "Gate Leakage = " << L2_localBPT->power.readOp.gate_leakage  << " W" << endl;
1970                cout << indent_str_next << "Runtime Dynamic = " << L2_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
1971                cout <<endl;
1972
1973                cout << indent_str << "Chooser:" << endl;
1974                cout << indent_str_next << "Area = " << chooser->area.get_area()  *1e-6 << " mm^2" << endl;
1975                cout << indent_str_next << "Peak Dynamic = " << chooser->power.readOp.dynamic*clockRate  << " W" << endl;
1976                cout << indent_str_next << "Subthreshold Leakage = "
1977                        << (long_channel? chooser->power.readOp.longer_channel_leakage:chooser->power.readOp.leakage)  << " W" << endl;
1978                cout << indent_str_next << "Gate Leakage = " << chooser->power.readOp.gate_leakage  << " W" << endl;
1979                cout << indent_str_next << "Runtime Dynamic = " << chooser->rt_power.readOp.dynamic/executionTime << " W" << endl;
1980                cout <<endl;
1981                cout << indent_str << "RAS:" << endl;
1982                cout << indent_str_next << "Area = " << RAS->area.get_area() *1e-6 << " mm^2" << endl;
1983                cout << indent_str_next << "Peak Dynamic = " << RAS->power.readOp.dynamic*clockRate  << " W" << endl;
1984                cout << indent_str_next << "Subthreshold Leakage = "
1985                        << (long_channel? RAS->power.readOp.longer_channel_leakage:RAS->power.readOp.leakage)  << " W" << endl;
1986                cout << indent_str_next << "Gate Leakage = " << RAS->power.readOp.gate_leakage  << " W" << endl;
1987                cout << indent_str_next << "Runtime Dynamic = " << RAS->rt_power.readOp.dynamic/executionTime << " W" << endl;
1988                cout <<endl;
1989        }
1990        else
1991        {
1992//		cout << indent_str_next << "Global Predictor    Peak Dynamic = " << globalBPT->rt_power.readOp.dynamic*clockRate << " W" << endl;
1993//		cout << indent_str_next << "Global Predictor    Subthreshold Leakage = " << globalBPT->rt_power.readOp.leakage <<" W" << endl;
1994//		cout << indent_str_next << "Global Predictor    Gate Leakage = " << globalBPT->rt_power.readOp.gate_leakage << " W" << endl;
1995//		cout << indent_str_next << "Local Predictor   Peak Dynamic = " << L1_localBPT->rt_power.readOp.dynamic*clockRate  << " W" << endl;
1996//		cout << indent_str_next << "Local Predictor   Subthreshold Leakage = " << L1_localBPT->rt_power.readOp.leakage  << " W" << endl;
1997//		cout << indent_str_next << "Local Predictor   Gate Leakage = " << L1_localBPT->rt_power.readOp.gate_leakage  << " W" << endl;
1998//		cout << indent_str_next << "Chooser   Peak Dynamic = " << chooser->rt_power.readOp.dynamic*clockRate  << " W" << endl;
1999//		cout << indent_str_next << "Chooser   Subthreshold Leakage = " << chooser->rt_power.readOp.leakage  << " W" << endl;
2000//		cout << indent_str_next << "Chooser   Gate Leakage = " << chooser->rt_power.readOp.gate_leakage  << " W" << endl;
2001//		cout << indent_str_next << "RAS   Peak Dynamic = " << RAS->rt_power.readOp.dynamic*clockRate  << " W" << endl;
2002//		cout << indent_str_next << "RAS   Subthreshold Leakage = " << RAS->rt_power.readOp.leakage  << " W" << endl;
2003//		cout << indent_str_next << "RAS   Gate Leakage = " << RAS->rt_power.readOp.gate_leakage  << " W" << endl;
2004        }
2005
2006}
2007
2008void InstFetchU::computeEnergy(bool is_tdp)
2009{
2010        if (!exist) return;
2011        if (is_tdp)
2012    {
2013                //init stats for Peak
2014        icache.caches->stats_t.readAc.access  = icache.caches->l_ip.num_rw_ports*coredynp.IFU_duty_cycle;
2015        icache.caches->stats_t.readAc.miss    = 0;
2016        icache.caches->stats_t.readAc.hit     = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss;
2017        icache.caches->tdp_stats = icache.caches->stats_t;
2018
2019        icache.missb->stats_t.readAc.access  = icache.missb->stats_t.readAc.hit=  icache.missb->l_ip.num_search_ports;
2020        icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit= icache.missb->l_ip.num_search_ports;
2021        icache.missb->tdp_stats = icache.missb->stats_t;
2022
2023        icache.ifb->stats_t.readAc.access  = icache.ifb->stats_t.readAc.hit=  icache.ifb->l_ip.num_search_ports;
2024        icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports;
2025        icache.ifb->tdp_stats = icache.ifb->stats_t;
2026
2027        icache.prefetchb->stats_t.readAc.access  = icache.prefetchb->stats_t.readAc.hit= icache.prefetchb->l_ip.num_search_ports;
2028        icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports;
2029        icache.prefetchb->tdp_stats = icache.prefetchb->stats_t;
2030
2031        IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].peak_issue_width;
2032        IB->tdp_stats = IB->stats_t;
2033
2034        if (coredynp.predictionW>0)
2035        {
2036                BTB->stats_t.readAc.access  = coredynp.predictionW;//XML->sys.core[ithCore].BTB.read_accesses;
2037                BTB->stats_t.writeAc.access = 0;//XML->sys.core[ithCore].BTB.write_accesses;
2038        }
2039
2040        ID_inst->stats_t.readAc.access     = coredynp.decodeW;
2041        ID_operand->stats_t.readAc.access  = coredynp.decodeW;
2042        ID_misc->stats_t.readAc.access     = coredynp.decodeW;
2043        ID_inst->tdp_stats = ID_inst->stats_t;
2044        ID_operand->tdp_stats = ID_operand->stats_t;
2045        ID_misc->tdp_stats = ID_misc->stats_t;
2046
2047
2048    }
2049    else
2050    {
2051        //init stats for Runtime Dynamic (RTP)
2052        icache.caches->stats_t.readAc.access  = XML->sys.core[ithCore].icache.read_accesses;
2053        icache.caches->stats_t.readAc.miss    = XML->sys.core[ithCore].icache.read_misses;
2054        icache.caches->stats_t.readAc.hit     = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss;
2055        icache.caches->rtp_stats = icache.caches->stats_t;
2056
2057        icache.missb->stats_t.readAc.access  = icache.caches->stats_t.readAc.miss;
2058        icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
2059        icache.missb->rtp_stats = icache.missb->stats_t;
2060
2061        icache.ifb->stats_t.readAc.access  = icache.caches->stats_t.readAc.miss;
2062        icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
2063        icache.ifb->rtp_stats = icache.ifb->stats_t;
2064
2065        icache.prefetchb->stats_t.readAc.access  = icache.caches->stats_t.readAc.miss;
2066        icache.prefetchb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
2067        icache.prefetchb->rtp_stats = icache.prefetchb->stats_t;
2068
2069        IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].total_instructions;
2070        IB->rtp_stats = IB->stats_t;
2071
2072        if (coredynp.predictionW>0)
2073        {
2074                BTB->stats_t.readAc.access  = XML->sys.core[ithCore].BTB.read_accesses;//XML->sys.core[ithCore].branch_instructions;
2075                BTB->stats_t.writeAc.access = XML->sys.core[ithCore].BTB.write_accesses;//XML->sys.core[ithCore].branch_mispredictions;
2076                BTB->rtp_stats = BTB->stats_t;
2077        }
2078
2079        ID_inst->stats_t.readAc.access     = XML->sys.core[ithCore].total_instructions;
2080        ID_operand->stats_t.readAc.access  = XML->sys.core[ithCore].total_instructions;
2081        ID_misc->stats_t.readAc.access     = XML->sys.core[ithCore].total_instructions;
2082        ID_inst->rtp_stats = ID_inst->stats_t;
2083        ID_operand->rtp_stats = ID_operand->stats_t;
2084        ID_misc->rtp_stats = ID_misc->stats_t;
2085
2086    }
2087
2088    icache.power_t.reset();
2089    IB->power_t.reset();
2090//	ID_inst->power_t.reset();
2091//	ID_operand->power_t.reset();
2092//	ID_misc->power_t.reset();
2093    if (coredynp.predictionW>0)
2094    {
2095        BTB->power_t.reset();
2096    }
2097
2098    icache.power_t.readOp.dynamic	+= (icache.caches->stats_t.readAc.hit*icache.caches->local_result.power.readOp.dynamic+
2099                //icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+
2100                icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.readOp.dynamic+ //assume tag data accessed in parallel
2101                icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.writeOp.dynamic); //read miss in Icache cause a write to Icache
2102    icache.power_t.readOp.dynamic	+=  icache.missb->stats_t.readAc.access*icache.missb->local_result.power.searchOp.dynamic +
2103            icache.missb->stats_t.writeAc.access*icache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
2104    icache.power_t.readOp.dynamic	+=  icache.ifb->stats_t.readAc.access*icache.ifb->local_result.power.searchOp.dynamic +
2105            icache.ifb->stats_t.writeAc.access*icache.ifb->local_result.power.writeOp.dynamic;
2106    icache.power_t.readOp.dynamic	+=  icache.prefetchb->stats_t.readAc.access*icache.prefetchb->local_result.power.searchOp.dynamic +
2107            icache.prefetchb->stats_t.writeAc.access*icache.prefetchb->local_result.power.writeOp.dynamic;
2108
2109        IB->power_t.readOp.dynamic   +=  IB->local_result.power.readOp.dynamic*IB->stats_t.readAc.access +
2110                        IB->stats_t.writeAc.access*IB->local_result.power.writeOp.dynamic;
2111
2112        if (coredynp.predictionW>0)
2113        {
2114                BTB->power_t.readOp.dynamic   +=  BTB->local_result.power.readOp.dynamic*BTB->stats_t.readAc.access +
2115                BTB->stats_t.writeAc.access*BTB->local_result.power.writeOp.dynamic;
2116
2117                BPT->computeEnergy(is_tdp);
2118        }
2119
2120    if (is_tdp)
2121    {
2122//    	icache.power = icache.power_t +
2123//    	        (icache.caches->local_result.power)*pppm_lkg +
2124//    			(icache.missb->local_result.power +
2125//    			icache.ifb->local_result.power +
2126//    			icache.prefetchb->local_result.power)*pppm_Isub;
2127        icache.power = icache.power_t +
2128                (icache.caches->local_result.power +
2129                        icache.missb->local_result.power +
2130                        icache.ifb->local_result.power +
2131                        icache.prefetchb->local_result.power)*pppm_lkg;
2132
2133        IB->power = IB->power_t + IB->local_result.power*pppm_lkg;
2134        power     = power + icache.power + IB->power;
2135        if (coredynp.predictionW>0)
2136        {
2137                BTB->power = BTB->power_t + BTB->local_result.power*pppm_lkg;
2138                power     = power  + BTB->power + BPT->power;
2139        }
2140
2141        ID_inst->power_t.readOp.dynamic    = ID_inst->power.readOp.dynamic;
2142        ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic;
2143        ID_misc->power_t.readOp.dynamic    = ID_misc->power.readOp.dynamic;
2144
2145        ID_inst->power.readOp.dynamic    *= ID_inst->tdp_stats.readAc.access;
2146        ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access;
2147        ID_misc->power.readOp.dynamic    *= ID_misc->tdp_stats.readAc.access;
2148
2149        power = power + (ID_inst->power +
2150                                                        ID_operand->power +
2151                                                        ID_misc->power);
2152    }
2153    else
2154    {
2155//    	icache.rt_power = icache.power_t +
2156//    	        (icache.caches->local_result.power)*pppm_lkg +
2157//    			(icache.missb->local_result.power +
2158//    			icache.ifb->local_result.power +
2159//    			icache.prefetchb->local_result.power)*pppm_Isub;
2160
2161        icache.rt_power = icache.power_t +
2162                (icache.caches->local_result.power +
2163                        icache.missb->local_result.power +
2164                        icache.ifb->local_result.power +
2165                        icache.prefetchb->local_result.power)*pppm_lkg;
2166
2167        IB->rt_power = IB->power_t + IB->local_result.power*pppm_lkg;
2168        rt_power     = rt_power + icache.rt_power + IB->rt_power;
2169        if (coredynp.predictionW>0)
2170        {
2171                BTB->rt_power = BTB->power_t + BTB->local_result.power*pppm_lkg;
2172                rt_power     = rt_power + BTB->rt_power + BPT->rt_power;
2173        }
2174
2175        ID_inst->rt_power.readOp.dynamic    = ID_inst->power_t.readOp.dynamic*ID_inst->rtp_stats.readAc.access;
2176        ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * ID_operand->rtp_stats.readAc.access;
2177        ID_misc->rt_power.readOp.dynamic    = ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access;
2178
2179        rt_power = rt_power + (ID_inst->rt_power +
2180                                                        ID_operand->rt_power +
2181                                                        ID_misc->rt_power);
2182    }
2183}
2184
2185void InstFetchU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
2186{
2187        if (!exist) return;
2188        string indent_str(indent, ' ');
2189        string indent_str_next(indent+2, ' ');
2190        bool long_channel = XML->sys.longer_channel_device;
2191
2192
2193        if (is_tdp)
2194        {
2195
2196                cout << indent_str<< "Instruction Cache:" << endl;
2197                cout << indent_str_next << "Area = " << icache.area.get_area()*1e-6<< " mm^2" << endl;
2198                cout << indent_str_next << "Peak Dynamic = " << icache.power.readOp.dynamic*clockRate << " W" << endl;
2199                cout << indent_str_next << "Subthreshold Leakage = "
2200                        << (long_channel? icache.power.readOp.longer_channel_leakage:icache.power.readOp.leakage) <<" W" << endl;
2201                cout << indent_str_next << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" << endl;
2202                cout << indent_str_next << "Runtime Dynamic = " << icache.rt_power.readOp.dynamic/executionTime << " W" << endl;
2203                cout <<endl;
2204                if (coredynp.predictionW>0)
2205                {
2206                        cout << indent_str<< "Branch Target Buffer:" << endl;
2207                        cout << indent_str_next << "Area = " << BTB->area.get_area() *1e-6 << " mm^2" << endl;
2208                        cout << indent_str_next << "Peak Dynamic = " << BTB->power.readOp.dynamic*clockRate  << " W" << endl;
2209                        cout << indent_str_next << "Subthreshold Leakage = "
2210                                << (long_channel? BTB->power.readOp.longer_channel_leakage:BTB->power.readOp.leakage)  << " W" << endl;
2211                        cout << indent_str_next << "Gate Leakage = " << BTB->power.readOp.gate_leakage  << " W" << endl;
2212                        cout << indent_str_next << "Runtime Dynamic = " << BTB->rt_power.readOp.dynamic/executionTime << " W" << endl;
2213                        cout <<endl;
2214                        if (BPT->exist)
2215                        {
2216                                cout << indent_str<< "Branch Predictor:" << endl;
2217                                cout << indent_str_next << "Area = " << BPT->area.get_area()  *1e-6<< " mm^2" << endl;
2218                                cout << indent_str_next << "Peak Dynamic = " << BPT->power.readOp.dynamic*clockRate  << " W" << endl;
2219                                cout << indent_str_next << "Subthreshold Leakage = "
2220                                        << (long_channel? BPT->power.readOp.longer_channel_leakage:BPT->power.readOp.leakage)  << " W" << endl;
2221                                cout << indent_str_next << "Gate Leakage = " << BPT->power.readOp.gate_leakage  << " W" << endl;
2222                                cout << indent_str_next << "Runtime Dynamic = " << BPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
2223                                cout <<endl;
2224                                if (plevel>3)
2225                                {
2226                                        BPT->displayEnergy(indent+4, plevel, is_tdp);
2227                                }
2228                        }
2229                }
2230                cout << indent_str<< "Instruction Buffer:" << endl;
2231                cout << indent_str_next << "Area = " << IB->area.get_area()*1e-6  << " mm^2" << endl;
2232                cout << indent_str_next << "Peak Dynamic = " << IB->power.readOp.dynamic*clockRate  << " W" << endl;
2233                cout << indent_str_next << "Subthreshold Leakage = "
2234                << (long_channel? IB->power.readOp.longer_channel_leakage:IB->power.readOp.leakage)  << " W" << endl;
2235                cout << indent_str_next << "Gate Leakage = " << IB->power.readOp.gate_leakage  << " W" << endl;
2236                cout << indent_str_next << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic/executionTime << " W" << endl;
2237                cout <<endl;
2238                cout << indent_str<< "Instruction Decoder:" << endl;
2239                cout << indent_str_next << "Area = " << (ID_inst->area.get_area() +
2240                                ID_operand->area.get_area() +
2241                                ID_misc->area.get_area())*coredynp.decodeW*1e-6  << " mm^2" << endl;
2242                cout << indent_str_next << "Peak Dynamic = " << (ID_inst->power.readOp.dynamic +
2243                                ID_operand->power.readOp.dynamic +
2244                                ID_misc->power.readOp.dynamic)*clockRate  << " W" << endl;
2245                cout << indent_str_next << "Subthreshold Leakage = "
2246                << (long_channel? (ID_inst->power.readOp.longer_channel_leakage +
2247                                ID_operand->power.readOp.longer_channel_leakage +
2248                                ID_misc->power.readOp.longer_channel_leakage):
2249                                        (ID_inst->power.readOp.leakage +
2250                                                        ID_operand->power.readOp.leakage +
2251                                                        ID_misc->power.readOp.leakage))  << " W" << endl;
2252                cout << indent_str_next << "Gate Leakage = " << (ID_inst->power.readOp.gate_leakage +
2253                                ID_operand->power.readOp.gate_leakage +
2254                                ID_misc->power.readOp.gate_leakage)  << " W" << endl;
2255                cout << indent_str_next << "Runtime Dynamic = " << (ID_inst->rt_power.readOp.dynamic +
2256                                ID_operand->rt_power.readOp.dynamic +
2257                                ID_misc->rt_power.readOp.dynamic)/executionTime << " W" << endl;
2258                cout <<endl;
2259        }
2260        else
2261        {
2262//		cout << indent_str_next << "Instruction Cache    Peak Dynamic = " << icache.rt_power.readOp.dynamic*clockRate << " W" << endl;
2263//		cout << indent_str_next << "Instruction Cache    Subthreshold Leakage = " << icache.rt_power.readOp.leakage <<" W" << endl;
2264//		cout << indent_str_next << "Instruction Cache    Gate Leakage = " << icache.rt_power.readOp.gate_leakage << " W" << endl;
2265//		cout << indent_str_next << "Instruction Buffer   Peak Dynamic = " << IB->rt_power.readOp.dynamic*clockRate  << " W" << endl;
2266//		cout << indent_str_next << "Instruction Buffer   Subthreshold Leakage = " << IB->rt_power.readOp.leakage  << " W" << endl;
2267//		cout << indent_str_next << "Instruction Buffer   Gate Leakage = " << IB->rt_power.readOp.gate_leakage  << " W" << endl;
2268//		cout << indent_str_next << "Branch Target Buffer   Peak Dynamic = " << BTB->rt_power.readOp.dynamic*clockRate  << " W" << endl;
2269//		cout << indent_str_next << "Branch Target Buffer   Subthreshold Leakage = " << BTB->rt_power.readOp.leakage  << " W" << endl;
2270//		cout << indent_str_next << "Branch Target Buffer   Gate Leakage = " << BTB->rt_power.readOp.gate_leakage  << " W" << endl;
2271//		cout << indent_str_next << "Branch Predictor   Peak Dynamic = " << BPT->rt_power.readOp.dynamic*clockRate  << " W" << endl;
2272//		cout << indent_str_next << "Branch Predictor   Subthreshold Leakage = " << BPT->rt_power.readOp.leakage  << " W" << endl;
2273//		cout << indent_str_next << "Branch Predictor   Gate Leakage = " << BPT->rt_power.readOp.gate_leakage  << " W" << endl;
2274        }
2275
2276}
2277
2278void RENAMINGU::computeEnergy(bool is_tdp)
2279{
2280        if (!exist) return;
2281        double pppm_t[4]    = {1,1,1,1};
2282        if (is_tdp)
2283        {//init stats for Peak
2284                if (coredynp.core_ty==OOO){
2285                        if (coredynp.scheu_ty==PhysicalRegFile)
2286                        {
2287                                if (coredynp.rm_ty ==RAMbased)
2288                                {
2289                                        iFRAT->stats_t.readAc.access   = iFRAT->l_ip.num_rd_ports;
2290                                        iFRAT->stats_t.writeAc.access  = iFRAT->l_ip.num_wr_ports;
2291                                        iFRAT->tdp_stats = iFRAT->stats_t;
2292
2293                                        fFRAT->stats_t.readAc.access   = fFRAT->l_ip.num_rd_ports;
2294                                        fFRAT->stats_t.writeAc.access  = fFRAT->l_ip.num_wr_ports;
2295                                        fFRAT->tdp_stats = fFRAT->stats_t;
2296
2297                                }
2298                                else if ((coredynp.rm_ty ==CAMbased))
2299                                {
2300                                        iFRAT->stats_t.readAc.access   = iFRAT->l_ip.num_search_ports;
2301                                        iFRAT->stats_t.writeAc.access  = iFRAT->l_ip.num_wr_ports;
2302                                        iFRAT->tdp_stats = iFRAT->stats_t;
2303
2304                                        fFRAT->stats_t.readAc.access   = fFRAT->l_ip.num_search_ports;
2305                                        fFRAT->stats_t.writeAc.access  = fFRAT->l_ip.num_wr_ports;
2306                                        fFRAT->tdp_stats = fFRAT->stats_t;
2307                                }
2308
2309                                iRRAT->stats_t.readAc.access   = iRRAT->l_ip.num_rd_ports;
2310                                iRRAT->stats_t.writeAc.access  = iRRAT->l_ip.num_wr_ports;
2311                                iRRAT->tdp_stats = iRRAT->stats_t;
2312
2313                                fRRAT->stats_t.readAc.access   = fRRAT->l_ip.num_rd_ports;
2314                                fRRAT->stats_t.writeAc.access  = fRRAT->l_ip.num_wr_ports;
2315                                fRRAT->tdp_stats = fRRAT->stats_t;
2316
2317                                ifreeL->stats_t.readAc.access   = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;;
2318                                ifreeL->stats_t.writeAc.access  = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports;
2319                                ifreeL->tdp_stats = ifreeL->stats_t;
2320
2321                                ffreeL->stats_t.readAc.access   = coredynp.decodeW;//ffreeL->l_ip.num_rd_ports;
2322                                ffreeL->stats_t.writeAc.access  = coredynp.decodeW;//ffreeL->l_ip.num_wr_ports;
2323                                ffreeL->tdp_stats = ffreeL->stats_t;
2324                        }
2325                        else if (coredynp.scheu_ty==ReservationStation){
2326                                if (coredynp.rm_ty ==RAMbased)
2327                                {
2328                                        iFRAT->stats_t.readAc.access    = iFRAT->l_ip.num_rd_ports;
2329                                        iFRAT->stats_t.writeAc.access   = iFRAT->l_ip.num_wr_ports;
2330                                        iFRAT->stats_t.searchAc.access  = iFRAT->l_ip.num_search_ports;
2331                                        iFRAT->tdp_stats = iFRAT->stats_t;
2332
2333                                        fFRAT->stats_t.readAc.access    = fFRAT->l_ip.num_rd_ports;
2334                                        fFRAT->stats_t.writeAc.access   = fFRAT->l_ip.num_wr_ports;
2335                                        fFRAT->stats_t.searchAc.access  = fFRAT->l_ip.num_search_ports;
2336                                        fFRAT->tdp_stats = fFRAT->stats_t;
2337
2338                                }
2339                                else if ((coredynp.rm_ty ==CAMbased))
2340                                {
2341                                        iFRAT->stats_t.readAc.access   = iFRAT->l_ip.num_search_ports;
2342                                        iFRAT->stats_t.writeAc.access  = iFRAT->l_ip.num_wr_ports;
2343                                        iFRAT->tdp_stats = iFRAT->stats_t;
2344
2345                                        fFRAT->stats_t.readAc.access   = fFRAT->l_ip.num_search_ports;
2346                                        fFRAT->stats_t.writeAc.access  = fFRAT->l_ip.num_wr_ports;
2347                                        fFRAT->tdp_stats = fFRAT->stats_t;
2348                                }
2349                                //Unified free list for both int and fp
2350                                ifreeL->stats_t.readAc.access   = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;
2351                                ifreeL->stats_t.writeAc.access  = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports;
2352                                ifreeL->tdp_stats = ifreeL->stats_t;
2353                        }
2354                        idcl->stats_t.readAc.access = coredynp.decodeW;
2355                        fdcl->stats_t.readAc.access = coredynp.decodeW;
2356                        idcl->tdp_stats = idcl->stats_t;
2357                        fdcl->tdp_stats = fdcl->stats_t;
2358                }
2359                else
2360                {
2361                        if (coredynp.issueW>1)
2362                        {
2363                                idcl->stats_t.readAc.access = coredynp.decodeW;
2364                                fdcl->stats_t.readAc.access = coredynp.decodeW;
2365                                idcl->tdp_stats = idcl->stats_t;
2366                                fdcl->tdp_stats = fdcl->stats_t;
2367                        }
2368                }
2369
2370        }
2371        else
2372        {//init stats for Runtime Dynamic (RTP)
2373                if (coredynp.core_ty==OOO){
2374                        if (coredynp.scheu_ty==PhysicalRegFile)
2375                        {
2376                                if (coredynp.rm_ty ==RAMbased)
2377                                {
2378                                        iFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].rename_reads;
2379                                        iFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].rename_writes;
2380                                        iFRAT->rtp_stats = iFRAT->stats_t;
2381
2382                                        fFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].fp_rename_reads;
2383                                        fFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].fp_rename_writes;
2384                                        fFRAT->rtp_stats = fFRAT->stats_t;
2385                                }
2386                                else if ((coredynp.rm_ty ==CAMbased))
2387                                {
2388                                        iFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].rename_reads;
2389                                        iFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].rename_writes;
2390                                        iFRAT->rtp_stats = iFRAT->stats_t;
2391
2392                                        fFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].fp_rename_reads;
2393                                        fFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].fp_rename_writes;
2394                                        fFRAT->rtp_stats = fFRAT->stats_t;
2395                                }
2396
2397                                iRRAT->stats_t.readAc.access   = XML->sys.core[ithCore].rename_writes;//Hack, should be (context switch + branch mispredictions)*16
2398                                iRRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].rename_writes;
2399                                iRRAT->rtp_stats = iRRAT->stats_t;
2400
2401                                fRRAT->stats_t.readAc.access   = XML->sys.core[ithCore].fp_rename_writes;//Hack, should be (context switch + branch mispredictions)*16
2402                                fRRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].fp_rename_writes;
2403                                fRRAT->rtp_stats = fRRAT->stats_t;
2404
2405                                ifreeL->stats_t.readAc.access   = XML->sys.core[ithCore].rename_reads;
2406                                ifreeL->stats_t.writeAc.access  = 2*XML->sys.core[ithCore].rename_writes;
2407                                ifreeL->rtp_stats = ifreeL->stats_t;
2408
2409                                ffreeL->stats_t.readAc.access   = XML->sys.core[ithCore].fp_rename_reads;
2410                                ffreeL->stats_t.writeAc.access  = 2*XML->sys.core[ithCore].fp_rename_writes;
2411                                ffreeL->rtp_stats = ffreeL->stats_t;
2412                        }
2413                        else if (coredynp.scheu_ty==ReservationStation){
2414                                if (coredynp.rm_ty ==RAMbased)
2415                                {
2416                                        iFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].rename_reads;
2417                                        iFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].rename_writes;
2418                                        iFRAT->stats_t.searchAc.access  = XML->sys.core[ithCore].committed_int_instructions;//hack: not all committed instructions use regs.
2419                                        iFRAT->rtp_stats = iFRAT->stats_t;
2420
2421                                        fFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].fp_rename_reads;
2422                                        fFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].fp_rename_writes;
2423                                        fFRAT->stats_t.searchAc.access  = XML->sys.core[ithCore].committed_fp_instructions;
2424                                        fFRAT->rtp_stats = fFRAT->stats_t;
2425                                }
2426                                else if ((coredynp.rm_ty ==CAMbased))
2427                                {
2428                                        iFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].rename_reads;
2429                                        iFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].rename_writes;
2430                                        iFRAT->rtp_stats = iFRAT->stats_t;
2431
2432                                        fFRAT->stats_t.readAc.access   = XML->sys.core[ithCore].fp_rename_reads;
2433                                        fFRAT->stats_t.writeAc.access  = XML->sys.core[ithCore].fp_rename_writes;
2434                                        fFRAT->rtp_stats = fFRAT->stats_t;
2435                                }
2436                                //Unified free list for both int and fp since the ROB act as physcial registers
2437                                ifreeL->stats_t.readAc.access   = XML->sys.core[ithCore].rename_reads +
2438                                        XML->sys.core[ithCore].fp_rename_reads;
2439                                ifreeL->stats_t.writeAc.access  = 2*(XML->sys.core[ithCore].rename_writes +
2440                                        XML->sys.core[ithCore].fp_rename_writes);//HACK: 2-> since some of renaming in the same group
2441                                                                                                                         //are terminated early
2442                                ifreeL->rtp_stats = ifreeL->stats_t;
2443                        }
2444                        idcl->stats_t.readAc.access = 3*coredynp.decodeW*coredynp.decodeW*XML->sys.core[ithCore].rename_reads;
2445                        fdcl->stats_t.readAc.access = 3*coredynp.fp_issueW*coredynp.fp_issueW*XML->sys.core[ithCore].fp_rename_writes;
2446                        idcl->rtp_stats = idcl->stats_t;
2447                        fdcl->rtp_stats = fdcl->stats_t;
2448                }
2449                else
2450                {
2451                        if (coredynp.issueW>1)
2452                        {
2453                                idcl->stats_t.readAc.access = 2*XML->sys.core[ithCore].int_instructions;
2454                                fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions;
2455                                idcl->rtp_stats = idcl->stats_t;
2456                                fdcl->rtp_stats = fdcl->stats_t;
2457                        }
2458                }
2459
2460        }
2461    /* Compute engine */
2462        if (coredynp.core_ty==OOO)
2463        {
2464                if (coredynp.scheu_ty==PhysicalRegFile)
2465                {
2466                        if (coredynp.rm_ty ==RAMbased)
2467                        {
2468                                iFRAT->power_t.reset();
2469                                fFRAT->power_t.reset();
2470
2471                                iFRAT->power_t.readOp.dynamic  +=  (iFRAT->stats_t.readAc.access
2472                                                *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic)
2473                                                +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
2474                                fFRAT->power_t.readOp.dynamic  +=  (fFRAT->stats_t.readAc.access
2475                                                *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic)
2476                                                +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
2477                        }
2478                        else if ((coredynp.rm_ty ==CAMbased))
2479                        {
2480                                iFRAT->power_t.reset();
2481                                fFRAT->power_t.reset();
2482                                iFRAT->power_t.readOp.dynamic  +=  (iFRAT->stats_t.readAc.access
2483                                                *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic)
2484                                                +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
2485                                fFRAT->power_t.readOp.dynamic  +=  (fFRAT->stats_t.readAc.access
2486                                                *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic)
2487                                                +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
2488                        }
2489
2490                        iRRAT->power_t.reset();
2491                        fRRAT->power_t.reset();
2492                        ifreeL->power_t.reset();
2493                        ffreeL->power_t.reset();
2494
2495                        iRRAT->power_t.readOp.dynamic  +=  (iRRAT->stats_t.readAc.access*iRRAT->local_result.power.readOp.dynamic
2496                                        +iRRAT->stats_t.writeAc.access*iRRAT->local_result.power.writeOp.dynamic);
2497                        fRRAT->power_t.readOp.dynamic  +=  (fRRAT->stats_t.readAc.access*fRRAT->local_result.power.readOp.dynamic
2498                                        +fRRAT->stats_t.writeAc.access*fRRAT->local_result.power.writeOp.dynamic);
2499                        ifreeL->power_t.readOp.dynamic  +=  (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic
2500                                        +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic);
2501                        ffreeL->power_t.readOp.dynamic  +=  (ffreeL->stats_t.readAc.access*ffreeL->local_result.power.readOp.dynamic
2502                                        +ffreeL->stats_t.writeAc.access*ffreeL->local_result.power.writeOp.dynamic);
2503
2504                }
2505                else if (coredynp.scheu_ty==ReservationStation)
2506                {
2507                        if (coredynp.rm_ty ==RAMbased)
2508                        {
2509                                iFRAT->power_t.reset();
2510                                fFRAT->power_t.reset();
2511
2512                                iFRAT->power_t.readOp.dynamic  +=  (iFRAT->stats_t.readAc.access
2513                                                *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic)
2514                                                +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic
2515                                                +iFRAT->stats_t.searchAc.access*iFRAT->local_result.power.searchOp.dynamic);
2516                                fFRAT->power_t.readOp.dynamic  +=  (fFRAT->stats_t.readAc.access
2517                                                *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic)
2518                                                +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic
2519                                                +fFRAT->stats_t.searchAc.access*fFRAT->local_result.power.searchOp.dynamic);
2520                        }
2521                        else if ((coredynp.rm_ty ==CAMbased))
2522                        {
2523                                iFRAT->power_t.reset();
2524                                fFRAT->power_t.reset();
2525                                iFRAT->power_t.readOp.dynamic  +=  (iFRAT->stats_t.readAc.access
2526                                                *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic)
2527                                                +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
2528                                fFRAT->power_t.readOp.dynamic  +=  (fFRAT->stats_t.readAc.access
2529                                                *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic)
2530                                                +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
2531                        }
2532                        ifreeL->power_t.reset();
2533                        ifreeL->power_t.readOp.dynamic  +=  (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic
2534                                        +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic);
2535                }
2536
2537        }
2538        else
2539        {
2540                if (coredynp.issueW>1)
2541                {
2542                        idcl->power_t.reset();
2543                        fdcl->power_t.reset();
2544                        set_pppm(pppm_t, idcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access);
2545                        idcl->power_t = idcl->power * pppm_t;
2546                        set_pppm(pppm_t, fdcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access);
2547                        fdcl->power_t = fdcl->power * pppm_t;
2548                }
2549
2550        }
2551
2552        //assign value to tpd and rtp
2553        if (is_tdp)
2554        {
2555                if (coredynp.core_ty==OOO)
2556                {
2557                        if (coredynp.scheu_ty==PhysicalRegFile)
2558                        {
2559                                iFRAT->power   =  iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
2560                                fFRAT->power   =  fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
2561                                iRRAT->power   =  iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread;
2562                                fRRAT->power   =  fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread;
2563                                ifreeL->power  =  ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
2564                                ffreeL->power  =  ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread;
2565                                power	       =  power + (iFRAT->power + fFRAT->power)
2566                                                 + (iRRAT->power + fRRAT->power)
2567                                                 + (ifreeL->power + ffreeL->power);
2568                        }
2569                        else if (coredynp.scheu_ty==ReservationStation)
2570                        {
2571                                iFRAT->power   =  iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
2572                                fFRAT->power   =  fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
2573                                ifreeL->power  =  ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
2574                                power	       =  power + (iFRAT->power + fFRAT->power)
2575                                                 + ifreeL->power;
2576                        }
2577                }
2578                else
2579                {
2580                        power   =  power + idcl->power_t + fdcl->power_t;
2581                }
2582
2583        }
2584        else
2585        {
2586                if (coredynp.core_ty==OOO)
2587                {
2588                        if (coredynp.scheu_ty==PhysicalRegFile)
2589                        {
2590                                iFRAT->rt_power   =  iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
2591                                fFRAT->rt_power   =  fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
2592                                iRRAT->rt_power   =  iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread;
2593                                fRRAT->rt_power   =  fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread;
2594                                ifreeL->rt_power  =  ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
2595                                ffreeL->rt_power  =  ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread;
2596                                rt_power	      =  rt_power + (iFRAT->rt_power + fFRAT->rt_power)
2597                                                   + (iRRAT->rt_power + fRRAT->rt_power)
2598                                                   + (ifreeL->rt_power + ffreeL->rt_power);
2599                        }
2600                        else if (coredynp.scheu_ty==ReservationStation)
2601                        {
2602                                iFRAT->rt_power   =  iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
2603                                fFRAT->rt_power   =  fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
2604                                ifreeL->rt_power  =  ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
2605                                rt_power	      =  rt_power + (iFRAT->rt_power + fFRAT->rt_power)
2606                                                   + ifreeL->rt_power;
2607                        }
2608                }
2609                else
2610                {
2611                        rt_power   =  rt_power + idcl->power_t + fdcl->power_t;
2612                }
2613
2614        }
2615}
2616
2617void RENAMINGU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
2618{
2619        if (!exist) return;
2620        string indent_str(indent, ' ');
2621        string indent_str_next(indent+2, ' ');
2622        bool long_channel = XML->sys.longer_channel_device;
2623
2624
2625        if (is_tdp)
2626        {
2627
2628                if (coredynp.core_ty==OOO)
2629                {
2630                        cout << indent_str<< "Int Front End RAT:" << endl;
2631                        cout << indent_str_next << "Area = " << iFRAT->area.get_area()*1e-6<< " mm^2" << endl;
2632                        cout << indent_str_next << "Peak Dynamic = " << iFRAT->power.readOp.dynamic*clockRate << " W" << endl;
2633                        cout << indent_str_next << "Subthreshold Leakage = "
2634                                << (long_channel? iFRAT->power.readOp.longer_channel_leakage:iFRAT->power.readOp.leakage) <<" W" << endl;
2635                        cout << indent_str_next << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" << endl;
2636                        cout << indent_str_next << "Runtime Dynamic = " << iFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
2637                        cout <<endl;
2638                        cout << indent_str<< "FP Front End RAT:" << endl;
2639                        cout << indent_str_next << "Area = " << fFRAT->area.get_area()*1e-6  << " mm^2" << endl;
2640                        cout << indent_str_next << "Peak Dynamic = " << fFRAT->power.readOp.dynamic*clockRate  << " W" << endl;
2641                        cout << indent_str_next << "Subthreshold Leakage = "
2642                                << (long_channel? fFRAT->power.readOp.longer_channel_leakage:fFRAT->power.readOp.leakage)  << " W" << endl;
2643                        cout << indent_str_next << "Gate Leakage = " << fFRAT->power.readOp.gate_leakage  << " W" << endl;
2644                        cout << indent_str_next << "Runtime Dynamic = " << fFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
2645                        cout <<endl;
2646                        cout << indent_str<<"Free List:" << endl;
2647                        cout << indent_str_next << "Area = " << ifreeL->area.get_area()*1e-6  << " mm^2" << endl;
2648                        cout << indent_str_next << "Peak Dynamic = " << ifreeL->power.readOp.dynamic*clockRate  << " W" << endl;
2649                        cout << indent_str_next << "Subthreshold Leakage = "
2650                                << (long_channel? ifreeL->power.readOp.longer_channel_leakage:ifreeL->power.readOp.leakage)  << " W" << endl;
2651                        cout << indent_str_next << "Gate Leakage = " << ifreeL->power.readOp.gate_leakage  << " W" << endl;
2652                        cout << indent_str_next << "Runtime Dynamic = " << ifreeL->rt_power.readOp.dynamic/executionTime << " W" << endl;
2653                        cout <<endl;
2654
2655                        if (coredynp.scheu_ty==PhysicalRegFile)
2656                        {
2657                                cout << indent_str<< "Int Retire RAT: " << endl;
2658                                cout << indent_str_next << "Area = " << iRRAT->area.get_area() *1e-6 << " mm^2" << endl;
2659                                cout << indent_str_next << "Peak Dynamic = " << iRRAT->power.readOp.dynamic*clockRate  << " W" << endl;
2660                                cout << indent_str_next << "Subthreshold Leakage = "
2661                                        << (long_channel? iRRAT->power.readOp.longer_channel_leakage:iRRAT->power.readOp.leakage)  << " W" << endl;
2662                                cout << indent_str_next << "Gate Leakage = " << iRRAT->power.readOp.gate_leakage  << " W" << endl;
2663                                cout << indent_str_next << "Runtime Dynamic = " << iRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
2664                                cout <<endl;
2665                                cout << indent_str<< "FP Retire RAT:" << endl;
2666                                cout << indent_str_next << "Area = " << fRRAT->area.get_area()  *1e-6<< " mm^2" << endl;
2667                                cout << indent_str_next << "Peak Dynamic = " << fRRAT->power.readOp.dynamic*clockRate  << " W" << endl;
2668                                cout << indent_str_next << "Subthreshold Leakage = "
2669                                        << (long_channel? fRRAT->power.readOp.longer_channel_leakage:fRRAT->power.readOp.leakage)  << " W" << endl;
2670                                cout << indent_str_next << "Gate Leakage = " << fRRAT->power.readOp.gate_leakage  << " W" << endl;
2671                                cout << indent_str_next << "Runtime Dynamic = " << fRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
2672                                cout <<endl;
2673                                cout << indent_str<< "FP Free List:" << endl;
2674                                cout << indent_str_next << "Area = " << ffreeL->area.get_area()*1e-6  << " mm^2" << endl;
2675                                cout << indent_str_next << "Peak Dynamic = " << ffreeL->power.readOp.dynamic*clockRate  << " W" << endl;
2676                                cout << indent_str_next << "Subthreshold Leakage = "
2677                                        << (long_channel? ffreeL->power.readOp.longer_channel_leakage:ffreeL->power.readOp.leakage)  << " W" << endl;
2678                                cout << indent_str_next << "Gate Leakage = " << ffreeL->power.readOp.gate_leakage  << " W" << endl;
2679                                cout << indent_str_next << "Runtime Dynamic = " << ffreeL->rt_power.readOp.dynamic/executionTime << " W" << endl;
2680                                cout <<endl;
2681                        }
2682                }
2683                else
2684                {
2685                        cout << indent_str<< "Int DCL:" << endl;
2686                        cout << indent_str_next << "Peak Dynamic = " << idcl->power.readOp.dynamic*clockRate  << " W" << endl;
2687                        cout << indent_str_next << "Subthreshold Leakage = "
2688                                << (long_channel? idcl->power.readOp.longer_channel_leakage:idcl->power.readOp.leakage)  << " W" << endl;
2689                        cout << indent_str_next << "Gate Leakage = " << idcl->power.readOp.gate_leakage  << " W" << endl;
2690                        cout << indent_str_next << "Runtime Dynamic = " << idcl->rt_power.readOp.dynamic/executionTime << " W" << endl;
2691                        cout << indent_str<<"FP DCL:" << endl;
2692                        cout << indent_str_next << "Peak Dynamic = " << fdcl->power.readOp.dynamic*clockRate  << " W" << endl;
2693                        cout << indent_str_next << "Subthreshold Leakage = "
2694                                << (long_channel? fdcl->power.readOp.longer_channel_leakage:fdcl->power.readOp.leakage)  << " W" << endl;
2695                        cout << indent_str_next << "Gate Leakage = " << fdcl->power.readOp.gate_leakage  << " W" << endl;
2696                        cout << indent_str_next << "Runtime Dynamic = " << fdcl->rt_power.readOp.dynamic/executionTime << " W" << endl;
2697                }
2698        }
2699        else
2700        {
2701                if (coredynp.core_ty==OOO)
2702                {
2703                        cout << indent_str_next << "Int Front End RAT    Peak Dynamic = " << iFRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
2704                        cout << indent_str_next << "Int Front End RAT    Subthreshold Leakage = " << iFRAT->rt_power.readOp.leakage <<" W" << endl;
2705                        cout << indent_str_next << "Int Front End RAT    Gate Leakage = " << iFRAT->rt_power.readOp.gate_leakage << " W" << endl;
2706                        cout << indent_str_next << "FP Front End RAT   Peak Dynamic = " << fFRAT->rt_power.readOp.dynamic*clockRate  << " W" << endl;
2707                        cout << indent_str_next << "FP Front End RAT   Subthreshold Leakage = " << fFRAT->rt_power.readOp.leakage  << " W" << endl;
2708                        cout << indent_str_next << "FP Front End RAT   Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage  << " W" << endl;
2709                        cout << indent_str_next << "Free List   Peak Dynamic = " << ifreeL->rt_power.readOp.dynamic*clockRate  << " W" << endl;
2710                        cout << indent_str_next << "Free List   Subthreshold Leakage = " << ifreeL->rt_power.readOp.leakage  << " W" << endl;
2711                        cout << indent_str_next << "Free List   Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage  << " W" << endl;
2712                        if (coredynp.scheu_ty==PhysicalRegFile)
2713                        {
2714                                cout << indent_str_next << "Int Retire RAT   Peak Dynamic = " << iRRAT->rt_power.readOp.dynamic*clockRate  << " W" << endl;
2715                                cout << indent_str_next << "Int Retire RAT   Subthreshold Leakage = " << iRRAT->rt_power.readOp.leakage  << " W" << endl;
2716                                cout << indent_str_next << "Int Retire RAT   Gate Leakage = " << iRRAT->rt_power.readOp.gate_leakage  << " W" << endl;
2717                                cout << indent_str_next << "FP Retire RAT   Peak Dynamic = " << fRRAT->rt_power.readOp.dynamic*clockRate  << " W" << endl;
2718                                cout << indent_str_next << "FP Retire RAT   Subthreshold Leakage = " << fRRAT->rt_power.readOp.leakage  << " W" << endl;
2719                                cout << indent_str_next << "FP Retire RAT   Gate Leakage = " << fRRAT->rt_power.readOp.gate_leakage  << " W" << endl;
2720                                cout << indent_str_next << "FP Free List   Peak Dynamic = " << ffreeL->rt_power.readOp.dynamic*clockRate  << " W" << endl;
2721                                cout << indent_str_next << "FP Free List   Subthreshold Leakage = " << ffreeL->rt_power.readOp.leakage  << " W" << endl;
2722                                cout << indent_str_next << "FP Free List   Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage  << " W" << endl;
2723                        }
2724                }
2725                else
2726                {
2727                        cout << indent_str_next << "Int DCL   Peak Dynamic = " << idcl->rt_power.readOp.dynamic*clockRate  << " W" << endl;
2728                        cout << indent_str_next << "Int DCL   Subthreshold Leakage = " << idcl->rt_power.readOp.leakage  << " W" << endl;
2729                        cout << indent_str_next << "Int DCL   Gate Leakage = " << idcl->rt_power.readOp.gate_leakage  << " W" << endl;
2730                        cout << indent_str_next << "FP DCL   Peak Dynamic = " << fdcl->rt_power.readOp.dynamic*clockRate  << " W" << endl;
2731                        cout << indent_str_next << "FP DCL   Subthreshold Leakage = " << fdcl->rt_power.readOp.leakage  << " W" << endl;
2732                        cout << indent_str_next << "FP DCL   Gate Leakage = " << fdcl->rt_power.readOp.gate_leakage  << " W" << endl;
2733                }
2734        }
2735
2736}
2737
2738
2739void SchedulerU::computeEnergy(bool is_tdp)
2740{
2741        if (!exist) return;
2742        double ROB_duty_cycle;
2743//	ROB_duty_cycle = ((coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0
2744//			+ coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0))*1.1<1 ? (coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0
2745//					+ coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0)*1.1:1;
2746        ROB_duty_cycle = 1;
2747        //init stats
2748        if (is_tdp)
2749        {
2750                if (coredynp.core_ty==OOO)
2751                {
2752                        int_inst_window->stats_t.readAc.access    = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports;
2753                        int_inst_window->stats_t.writeAc.access   = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports;
2754                        int_inst_window->stats_t.searchAc.access  = coredynp.issueW*coredynp.num_pipelines;
2755                        int_inst_window->tdp_stats                = int_inst_window->stats_t;
2756                        fp_inst_window->stats_t.readAc.access     = fp_inst_window->l_ip.num_rd_ports*coredynp.num_fp_pipelines;
2757                        fp_inst_window->stats_t.writeAc.access    = fp_inst_window->l_ip.num_wr_ports*coredynp.num_fp_pipelines;
2758                        fp_inst_window->stats_t.searchAc.access   = fp_inst_window->l_ip.num_search_ports*coredynp.num_fp_pipelines;
2759                        fp_inst_window->tdp_stats                 = fp_inst_window->stats_t;
2760
2761                        if (XML->sys.core[ithCore].ROB_size >0)
2762                        {
2763                                ROB->stats_t.readAc.access   = coredynp.commitW*coredynp.num_pipelines*ROB_duty_cycle;
2764                                ROB->stats_t.writeAc.access  = coredynp.issueW*coredynp.num_pipelines*ROB_duty_cycle;
2765                                ROB->tdp_stats        = ROB->stats_t;
2766
2767                                /*
2768                                 * When inst commits, ROB must be read.
2769                                 * Because for Physcial register based cores, physical register tag in ROB
2770                                 * need to be read out and write into RRAT/CAM based RAT.
2771                                 * For RS based cores, register content that stored in ROB must be
2772                                 * read out and stored in architectural registers.
2773                                 *
2774                                 * if no-register is involved, the ROB read out operation when instruction commits can be ignored.
2775                                 * assuming 20% insts. belong this type.
2776                                 * TODO: ROB duty_cycle need to be revisited
2777                                 */
2778                        }
2779
2780                }
2781                else if (coredynp.multithreaded)
2782                {
2783                        int_inst_window->stats_t.readAc.access   = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports;
2784                        int_inst_window->stats_t.writeAc.access  = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports;
2785                        int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines;
2786                        int_inst_window->tdp_stats       = int_inst_window->stats_t;
2787                }
2788
2789     }
2790    else
2791    {//rtp
2792                if (coredynp.core_ty==OOO)
2793                {
2794                        int_inst_window->stats_t.readAc.access   = XML->sys.core[ithCore].inst_window_reads;
2795                        int_inst_window->stats_t.writeAc.access  = XML->sys.core[ithCore].inst_window_writes;
2796                        int_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].inst_window_wakeup_accesses;
2797                        int_inst_window->rtp_stats               = int_inst_window->stats_t;
2798                        fp_inst_window->stats_t.readAc.access    = XML->sys.core[ithCore].fp_inst_window_reads;
2799                        fp_inst_window->stats_t.writeAc.access   = XML->sys.core[ithCore].fp_inst_window_writes;
2800                        fp_inst_window->stats_t.searchAc.access  = XML->sys.core[ithCore].fp_inst_window_wakeup_accesses;
2801                        fp_inst_window->rtp_stats                = fp_inst_window->stats_t;
2802
2803                        if (XML->sys.core[ithCore].ROB_size >0)
2804                        {
2805
2806                                ROB->stats_t.readAc.access   = XML->sys.core[ithCore].ROB_reads;
2807                                ROB->stats_t.writeAc.access  = XML->sys.core[ithCore].ROB_writes;
2808                                /* ROB need to be updated in RS based OOO when new values are produced,
2809                                 * this update may happen before the commit stage when ROB entry is released
2810                                 * 1. ROB write at instruction inserted in
2811                                 * 2. ROB write as results produced (for RS based OOO only)
2812                                 * 3. ROB read  as instruction committed. For RS based OOO, data values are read out and sent to ARF
2813                                 * For Physical reg based OOO, no data stored in ROB, but register tags need to be
2814                                 * read out and used to set the RRAT and to recycle the register tag to free list buffer
2815                                 */
2816                                ROB->rtp_stats        = ROB->stats_t;
2817                        }
2818
2819                }
2820                else if (coredynp.multithreaded)
2821                {
2822                        int_inst_window->stats_t.readAc.access    = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions;
2823                        int_inst_window->stats_t.writeAc.access   = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions;
2824                        int_inst_window->stats_t.searchAc.access  = 2*(XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions);
2825                        int_inst_window->rtp_stats                = int_inst_window->stats_t;
2826                }
2827    }
2828
2829        //computation engine
2830        if (coredynp.core_ty==OOO)
2831        {
2832                int_inst_window->power_t.reset();
2833                fp_inst_window->power_t.reset();
2834
2835                /* each instruction needs to write to scheduler, read out when all resources and source operands are ready
2836                 * two search ops with one for each source operand
2837                 *
2838                 */
2839                int_inst_window->power_t.readOp.dynamic  +=  int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access
2840                                        + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access
2841                                        + int_inst_window->local_result.power.writeOp.dynamic  * int_inst_window->stats_t.writeAc.access
2842                                        + int_inst_window->stats_t.readAc.access * instruction_selection->power.readOp.dynamic;
2843
2844                fp_inst_window->power_t.readOp.dynamic   +=  fp_inst_window->local_result.power.readOp.dynamic * fp_inst_window->stats_t.readAc.access
2845                                        + fp_inst_window->local_result.power.searchOp.dynamic * fp_inst_window->stats_t.searchAc.access
2846                                        + fp_inst_window->local_result.power.writeOp.dynamic * fp_inst_window->stats_t.writeAc.access
2847                                        + fp_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic;
2848
2849                if (XML->sys.core[ithCore].ROB_size >0)
2850                {
2851                        ROB->power_t.reset();
2852                        ROB->power_t.readOp.dynamic   +=  ROB->local_result.power.readOp.dynamic*ROB->stats_t.readAc.access +
2853                                                ROB->stats_t.writeAc.access*ROB->local_result.power.writeOp.dynamic;
2854                }
2855
2856
2857
2858
2859        }
2860        else if (coredynp.multithreaded)
2861        {
2862                int_inst_window->power_t.reset();
2863                int_inst_window->power_t.readOp.dynamic  +=  int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access
2864                                                  + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access
2865                                          + int_inst_window->local_result.power.writeOp.dynamic  * int_inst_window->stats_t.writeAc.access
2866                                          + int_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic;
2867        }
2868
2869        //assign values
2870        if (is_tdp)
2871        {
2872                if (coredynp.core_ty==OOO)
2873                {
2874                        int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
2875                        fp_inst_window->power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
2876                        power	   = power + int_inst_window->power + fp_inst_window->power;
2877                        if (XML->sys.core[ithCore].ROB_size >0)
2878                        {
2879                                ROB->power = ROB->power_t + ROB->local_result.power*pppm_lkg;
2880                                power	   = power + ROB->power;
2881                        }
2882
2883                }
2884                else if (coredynp.multithreaded)
2885                {
2886                        //			set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
2887                        int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
2888                        power	   = power + int_inst_window->power;
2889        }
2890
2891     }
2892    else
2893    {//rtp
2894                if (coredynp.core_ty==OOO)
2895                {
2896                        int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
2897                        fp_inst_window->rt_power  = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
2898                        rt_power	              = rt_power + int_inst_window->rt_power + fp_inst_window->rt_power;
2899                        if (XML->sys.core[ithCore].ROB_size >0)
2900                        {
2901                                ROB->rt_power = ROB->power_t + ROB->local_result.power*pppm_lkg;
2902                                rt_power	              = rt_power + ROB->rt_power;
2903                        }
2904
2905                }
2906                else if (coredynp.multithreaded)
2907                {
2908                        //			set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
2909                        int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
2910                        rt_power	              = rt_power + int_inst_window->rt_power;
2911        }
2912    }
2913//	set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
2914//	cout<<"Scheduler power="<<power.readOp.dynamic<<"leakage="<<power.readOp.leakage<<endl;
2915//	cout<<"IW="<<int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.readAc.access +
2916//    + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access<<"leakage="<<int_inst_window->local_result.power.readOp.leakage<<endl;
2917//	cout<<"selection"<<instruction_selection->power.readOp.dynamic<<"leakage"<<instruction_selection->power.readOp.leakage<<endl;
2918}
2919
2920void SchedulerU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
2921{
2922        if (!exist) return;
2923        string indent_str(indent, ' ');
2924        string indent_str_next(indent+2, ' ');
2925        bool long_channel = XML->sys.longer_channel_device;
2926
2927
2928        if (is_tdp)
2929        {
2930                if (coredynp.core_ty==OOO)
2931                {
2932                        cout << indent_str << "Instruction Window:" << endl;
2933                        cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl;
2934                        cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
2935                        cout << indent_str_next << "Subthreshold Leakage = "
2936                                << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl;
2937                        cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl;
2938                        cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
2939                        cout <<endl;
2940                        cout << indent_str << "FP Instruction Window:" << endl;
2941                        cout << indent_str_next << "Area = " << fp_inst_window->area.get_area()*1e-6  << " mm^2" << endl;
2942                        cout << indent_str_next << "Peak Dynamic = " << fp_inst_window->power.readOp.dynamic*clockRate  << " W" << endl;
2943                        cout << indent_str_next << "Subthreshold Leakage = "
2944                                << (long_channel? fp_inst_window->power.readOp.longer_channel_leakage:fp_inst_window->power.readOp.leakage ) << " W" << endl;
2945                        cout << indent_str_next << "Gate Leakage = " << fp_inst_window->power.readOp.gate_leakage  << " W" << endl;
2946                        cout << indent_str_next << "Runtime Dynamic = " << fp_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
2947                        cout <<endl;
2948                        if (XML->sys.core[ithCore].ROB_size >0)
2949                        {
2950                                cout << indent_str<<"ROB:" << endl;
2951                                cout << indent_str_next << "Area = " << ROB->area.get_area() *1e-6 << " mm^2" << endl;
2952                                cout << indent_str_next << "Peak Dynamic = " << ROB->power.readOp.dynamic*clockRate  << " W" << endl;
2953                                cout << indent_str_next << "Subthreshold Leakage = "
2954                                << (long_channel? ROB->power.readOp.longer_channel_leakage:ROB->power.readOp.leakage)  << " W" << endl;
2955                                cout << indent_str_next << "Gate Leakage = " << ROB->power.readOp.gate_leakage  << " W" << endl;
2956                                cout << indent_str_next << "Runtime Dynamic = " << ROB->rt_power.readOp.dynamic/executionTime << " W" << endl;
2957                                cout <<endl;
2958                        }
2959                }
2960                else if (coredynp.multithreaded)
2961                {
2962                        cout << indent_str << "Instruction Window:" << endl;
2963                        cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl;
2964                        cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
2965                        cout << indent_str_next << "Subthreshold Leakage = "
2966                                << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl;
2967                        cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl;
2968                        cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
2969                        cout <<endl;
2970                }
2971        }
2972        else
2973        {
2974                if (coredynp.core_ty==OOO)
2975                {
2976                        cout << indent_str_next << "Instruction Window    Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
2977                        cout << indent_str_next << "Instruction Window    Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl;
2978                        cout << indent_str_next << "Instruction Window    Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
2979                        cout << indent_str_next << "FP Instruction Window   Peak Dynamic = " << fp_inst_window->rt_power.readOp.dynamic*clockRate  << " W" << endl;
2980                        cout << indent_str_next << "FP Instruction Window   Subthreshold Leakage = " << fp_inst_window->rt_power.readOp.leakage  << " W" << endl;
2981                        cout << indent_str_next << "FP Instruction Window   Gate Leakage = " << fp_inst_window->rt_power.readOp.gate_leakage  << " W" << endl;
2982                        if (XML->sys.core[ithCore].ROB_size >0)
2983                        {
2984                                cout << indent_str_next << "ROB   Peak Dynamic = " << ROB->rt_power.readOp.dynamic*clockRate  << " W" << endl;
2985                                cout << indent_str_next << "ROB   Subthreshold Leakage = " << ROB->rt_power.readOp.leakage  << " W" << endl;
2986                                cout << indent_str_next << "ROB   Gate Leakage = " << ROB->rt_power.readOp.gate_leakage  << " W" << endl;
2987                        }
2988                }
2989                else if (coredynp.multithreaded)
2990                {
2991                        cout << indent_str_next << "Instruction Window    Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
2992                        cout << indent_str_next << "Instruction Window    Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl;
2993                        cout << indent_str_next << "Instruction Window    Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
2994                }
2995        }
2996
2997}
2998
2999void LoadStoreU::computeEnergy(bool is_tdp)
3000{
3001        if (!exist) return;
3002        if (is_tdp)
3003            {
3004                //init stats for Peak
3005                dcache.caches->stats_t.readAc.access  = 0.67*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
3006                dcache.caches->stats_t.readAc.miss    = 0;
3007                dcache.caches->stats_t.readAc.hit     = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss;
3008                dcache.caches->stats_t.writeAc.access = 0.33*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
3009                dcache.caches->stats_t.writeAc.miss   = 0;
3010                dcache.caches->stats_t.writeAc.hit    = dcache.caches->stats_t.writeAc.access -	dcache.caches->stats_t.writeAc.miss;
3011                dcache.caches->tdp_stats = dcache.caches->stats_t;
3012
3013                dcache.missb->stats_t.readAc.access  = dcache.missb->l_ip.num_search_ports;
3014                dcache.missb->stats_t.writeAc.access = dcache.missb->l_ip.num_search_ports;
3015                dcache.missb->tdp_stats = dcache.missb->stats_t;
3016
3017                dcache.ifb->stats_t.readAc.access  = dcache.ifb->l_ip.num_search_ports;
3018                dcache.ifb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports;
3019                dcache.ifb->tdp_stats = dcache.ifb->stats_t;
3020
3021                dcache.prefetchb->stats_t.readAc.access  = dcache.prefetchb->l_ip.num_search_ports;
3022                dcache.prefetchb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports;
3023                dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t;
3024                if (cache_p==Write_back)
3025                {
3026                        dcache.wbb->stats_t.readAc.access  = dcache.wbb->l_ip.num_search_ports;
3027                        dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports;
3028                        dcache.wbb->tdp_stats = dcache.wbb->stats_t;
3029                }
3030
3031                LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = LSQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
3032                LSQ->tdp_stats = LSQ->stats_t;
3033                if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
3034                {
3035                        LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = LoadQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
3036                        LoadQ->tdp_stats = LoadQ->stats_t;
3037                }
3038            }
3039            else
3040            {
3041                //init stats for Runtime Dynamic (RTP)
3042                dcache.caches->stats_t.readAc.access  = XML->sys.core[ithCore].dcache.read_accesses;
3043                dcache.caches->stats_t.readAc.miss    = XML->sys.core[ithCore].dcache.read_misses;
3044                dcache.caches->stats_t.readAc.hit     = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss;
3045                dcache.caches->stats_t.writeAc.access = XML->sys.core[ithCore].dcache.write_accesses;
3046                dcache.caches->stats_t.writeAc.miss   = XML->sys.core[ithCore].dcache.write_misses;
3047                dcache.caches->stats_t.writeAc.hit    = dcache.caches->stats_t.writeAc.access -	dcache.caches->stats_t.writeAc.miss;
3048                dcache.caches->rtp_stats = dcache.caches->stats_t;
3049
3050                if (cache_p==Write_back)
3051                {
3052                        dcache.missb->stats_t.readAc.access  = dcache.caches->stats_t.writeAc.miss;
3053                        dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
3054                        dcache.missb->rtp_stats = dcache.missb->stats_t;
3055
3056                        dcache.ifb->stats_t.readAc.access  = dcache.caches->stats_t.writeAc.miss;
3057                        dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
3058                        dcache.ifb->rtp_stats = dcache.ifb->stats_t;
3059
3060                        dcache.prefetchb->stats_t.readAc.access  = dcache.caches->stats_t.writeAc.miss;
3061                        dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
3062                        dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t;
3063
3064                        dcache.wbb->stats_t.readAc.access  = dcache.caches->stats_t.writeAc.miss;
3065                        dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
3066                        dcache.wbb->rtp_stats = dcache.wbb->stats_t;
3067                }
3068                else
3069                {
3070                        dcache.missb->stats_t.readAc.access  = dcache.caches->stats_t.readAc.miss;
3071                        dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
3072                        dcache.missb->rtp_stats = dcache.missb->stats_t;
3073
3074                        dcache.ifb->stats_t.readAc.access  = dcache.caches->stats_t.readAc.miss;
3075                        dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
3076                        dcache.ifb->rtp_stats = dcache.ifb->stats_t;
3077
3078                        dcache.prefetchb->stats_t.readAc.access  = dcache.caches->stats_t.readAc.miss;
3079                        dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
3080                        dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t;
3081                }
3082
3083                LSQ->stats_t.readAc.access  = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;//flush overhead considered
3084                LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;
3085                LSQ->rtp_stats = LSQ->stats_t;
3086
3087                if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
3088                {
3089                        LoadQ->stats_t.readAc.access  = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions;
3090                        LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions;
3091                        LoadQ->rtp_stats = LoadQ->stats_t;
3092                }
3093
3094            }
3095
3096        dcache.power_t.reset();
3097        LSQ->power_t.reset();
3098    dcache.power_t.readOp.dynamic	+= (dcache.caches->stats_t.readAc.hit*dcache.caches->local_result.power.readOp.dynamic+
3099                dcache.caches->stats_t.readAc.miss*dcache.caches->local_result.power.readOp.dynamic+
3100                dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.tag_array2->power.readOp.dynamic+
3101                dcache.caches->stats_t.writeAc.access*dcache.caches->local_result.power.writeOp.dynamic);
3102
3103    if (cache_p==Write_back)
3104    {//write miss will generate a write later
3105        dcache.power_t.readOp.dynamic	+= dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.power.writeOp.dynamic;
3106    }
3107
3108    dcache.power_t.readOp.dynamic	+=  dcache.missb->stats_t.readAc.access*dcache.missb->local_result.power.searchOp.dynamic +
3109            dcache.missb->stats_t.writeAc.access*dcache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
3110    dcache.power_t.readOp.dynamic	+=  dcache.ifb->stats_t.readAc.access*dcache.ifb->local_result.power.searchOp.dynamic +
3111            dcache.ifb->stats_t.writeAc.access*dcache.ifb->local_result.power.writeOp.dynamic;
3112    dcache.power_t.readOp.dynamic	+=  dcache.prefetchb->stats_t.readAc.access*dcache.prefetchb->local_result.power.searchOp.dynamic +
3113            dcache.prefetchb->stats_t.writeAc.access*dcache.prefetchb->local_result.power.writeOp.dynamic;
3114    if (cache_p==Write_back)
3115    {
3116        dcache.power_t.readOp.dynamic	+=  dcache.wbb->stats_t.readAc.access*dcache.wbb->local_result.power.searchOp.dynamic
3117                        + dcache.wbb->stats_t.writeAc.access*dcache.wbb->local_result.power.writeOp.dynamic;
3118    }
3119
3120    if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
3121    {
3122        LoadQ->power_t.reset();
3123        LoadQ->power_t.readOp.dynamic  +=  LoadQ->stats_t.readAc.access*(LoadQ->local_result.power.searchOp.dynamic+ LoadQ->local_result.power.readOp.dynamic)+
3124                LoadQ->stats_t.writeAc.access*LoadQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LoadQ
3125
3126        LSQ->power_t.readOp.dynamic  +=  LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic)
3127                        + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ
3128
3129    }
3130    else
3131    {
3132        LSQ->power_t.readOp.dynamic  +=  LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic)
3133                        + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ
3134
3135    }
3136
3137    if (is_tdp)
3138    {
3139//    	dcache.power = dcache.power_t + (dcache.caches->local_result.power)*pppm_lkg +
3140//    			(dcache.missb->local_result.power +
3141//    			dcache.ifb->local_result.power +
3142//    			dcache.prefetchb->local_result.power +
3143//    			dcache.wbb->local_result.power)*pppm_Isub;
3144        dcache.power = dcache.power_t + (dcache.caches->local_result.power +
3145                        dcache.missb->local_result.power +
3146                        dcache.ifb->local_result.power +
3147                        dcache.prefetchb->local_result.power) *pppm_lkg;
3148        if (cache_p==Write_back)
3149        {
3150                dcache.power = dcache.power + dcache.wbb->local_result.power*pppm_lkg;
3151        }
3152
3153        LSQ->power = LSQ->power_t + LSQ->local_result.power *pppm_lkg;
3154        power     = power + dcache.power + LSQ->power;
3155
3156        if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
3157        {
3158                LoadQ->power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg;
3159                power     = power + LoadQ->power;
3160        }
3161    }
3162    else
3163    {
3164//    	dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power +
3165//    			dcache.missb->local_result.power +
3166//    			dcache.ifb->local_result.power +
3167//    			dcache.prefetchb->local_result.power +
3168//    			dcache.wbb->local_result.power)*pppm_lkg;
3169        dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power +
3170                        dcache.missb->local_result.power +
3171                        dcache.ifb->local_result.power +
3172                        dcache.prefetchb->local_result.power )*pppm_lkg;
3173
3174        if (cache_p==Write_back)
3175        {
3176                dcache.rt_power = dcache.rt_power + dcache.wbb->local_result.power*pppm_lkg;
3177        }
3178
3179        LSQ->rt_power = LSQ->power_t + LSQ->local_result.power *pppm_lkg;
3180        rt_power     = rt_power + dcache.rt_power + LSQ->rt_power;
3181
3182        if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
3183        {
3184                LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg;
3185                rt_power     = rt_power + LoadQ->rt_power;
3186        }
3187    }
3188}
3189
3190
3191void LoadStoreU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
3192{
3193        if (!exist) return;
3194        string indent_str(indent, ' ');
3195        string indent_str_next(indent+2, ' ');
3196        bool long_channel = XML->sys.longer_channel_device;
3197
3198
3199        if (is_tdp)
3200        {
3201                cout << indent_str << "Data Cache:" << endl;
3202                cout << indent_str_next << "Area = " << dcache.area.get_area()*1e-6<< " mm^2" << endl;
3203                cout << indent_str_next << "Peak Dynamic = " << dcache.power.readOp.dynamic*clockRate << " W" << endl;
3204                cout << indent_str_next << "Subthreshold Leakage = "
3205                        << (long_channel? dcache.power.readOp.longer_channel_leakage:dcache.power.readOp.leakage )<<" W" << endl;
3206                cout << indent_str_next << "Gate Leakage = " << dcache.power.readOp.gate_leakage << " W" << endl;
3207                cout << indent_str_next << "Runtime Dynamic = " << dcache.rt_power.readOp.dynamic/executionTime << " W" << endl;
3208                cout <<endl;
3209                if (coredynp.core_ty==Inorder)
3210                {
3211                        cout << indent_str << "Load/Store Queue:" << endl;
3212                        cout << indent_str_next << "Area = " << LSQ->area.get_area()*1e-6  << " mm^2" << endl;
3213                        cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate  << " W" << endl;
3214                        cout << indent_str_next << "Subthreshold Leakage = "
3215                                << (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage)  << " W" << endl;
3216                        cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage  << " W" << endl;
3217                        cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime << " W" << endl;
3218                        cout <<endl;
3219                }
3220                else
3221
3222                {
3223                        if (XML->sys.core[ithCore].load_buffer_size >0)
3224                        {
3225                                cout << indent_str << "LoadQ:" << endl;
3226                                cout << indent_str_next << "Area = " << LoadQ->area.get_area() *1e-6 << " mm^2" << endl;
3227                                cout << indent_str_next << "Peak Dynamic = " << LoadQ->power.readOp.dynamic*clockRate  << " W" << endl;
3228                                cout << indent_str_next << "Subthreshold Leakage = "
3229                                << (long_channel? LoadQ->power.readOp.longer_channel_leakage:LoadQ->power.readOp.leakage)  << " W" << endl;
3230                                cout << indent_str_next << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage  << " W" << endl;
3231                                cout << indent_str_next << "Runtime Dynamic = " << LoadQ->rt_power.readOp.dynamic/executionTime << " W" << endl;
3232                                cout <<endl;
3233                        }
3234                        cout << indent_str<< "StoreQ:" << endl;
3235                        cout << indent_str_next << "Area = " << LSQ->area.get_area()  *1e-6<< " mm^2" << endl;
3236                        cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate  << " W" << endl;
3237                        cout << indent_str_next << "Subthreshold Leakage = "
3238                                << (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage)  << " W" << endl;
3239                        cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage  << " W" << endl;
3240                        cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime<< " W" << endl;
3241                        cout <<endl;
3242                }
3243        }
3244        else
3245        {
3246                cout << indent_str_next << "Data Cache    Peak Dynamic = " << dcache.rt_power.readOp.dynamic*clockRate << " W" << endl;
3247                cout << indent_str_next << "Data Cache    Subthreshold Leakage = " << dcache.rt_power.readOp.leakage <<" W" << endl;
3248                cout << indent_str_next << "Data Cache    Gate Leakage = " << dcache.rt_power.readOp.gate_leakage << " W" << endl;
3249                if (coredynp.core_ty==Inorder)
3250                {
3251                        cout << indent_str_next << "Load/Store Queue   Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate  << " W" << endl;
3252                        cout << indent_str_next << "Load/Store Queue   Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage  << " W" << endl;
3253                        cout << indent_str_next << "Load/Store Queue   Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage  << " W" << endl;
3254                }
3255                else
3256                {
3257                        cout << indent_str_next << "LoadQ   Peak Dynamic = " << LoadQ->rt_power.readOp.dynamic*clockRate  << " W" << endl;
3258                        cout << indent_str_next << "LoadQ   Subthreshold Leakage = " << LoadQ->rt_power.readOp.leakage  << " W" << endl;
3259                        cout << indent_str_next << "LoadQ   Gate Leakage = " << LoadQ->rt_power.readOp.gate_leakage  << " W" << endl;
3260                        cout << indent_str_next << "StoreQ   Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate  << " W" << endl;
3261                        cout << indent_str_next << "StoreQ   Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage  << " W" << endl;
3262                        cout << indent_str_next << "StoreQ   Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage  << " W" << endl;
3263                }
3264        }
3265
3266}
3267
3268void MemManU::computeEnergy(bool is_tdp)
3269{
3270
3271        if (!exist) return;
3272        if (is_tdp)
3273    {
3274        //init stats for Peak
3275        itlb->stats_t.readAc.access  = itlb->l_ip.num_search_ports;
3276        itlb->stats_t.readAc.miss    = 0;
3277        itlb->stats_t.readAc.hit     = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss;
3278        itlb->tdp_stats = itlb->stats_t;
3279
3280        dtlb->stats_t.readAc.access  = dtlb->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
3281        dtlb->stats_t.readAc.miss    = 0;
3282        dtlb->stats_t.readAc.hit     = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss;
3283        dtlb->tdp_stats = dtlb->stats_t;
3284     }
3285    else
3286    {
3287        //init stats for Runtime Dynamic (RTP)
3288        itlb->stats_t.readAc.access  = XML->sys.core[ithCore].itlb.total_accesses;
3289        itlb->stats_t.readAc.miss    = XML->sys.core[ithCore].itlb.total_misses;
3290        itlb->stats_t.readAc.hit     = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss;
3291        itlb->rtp_stats = itlb->stats_t;
3292
3293        dtlb->stats_t.readAc.access  = XML->sys.core[ithCore].dtlb.total_accesses;
3294        dtlb->stats_t.readAc.miss    = XML->sys.core[ithCore].dtlb.total_misses;
3295        dtlb->stats_t.readAc.hit     = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss;
3296        dtlb->rtp_stats = dtlb->stats_t;
3297    }
3298
3299    itlb->power_t.reset();
3300    dtlb->power_t.reset();
3301        itlb->power_t.readOp.dynamic +=  itlb->stats_t.readAc.access*itlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits
3302                              +itlb->stats_t.readAc.miss*itlb->local_result.power.writeOp.dynamic;
3303        dtlb->power_t.readOp.dynamic +=  dtlb->stats_t.readAc.access*dtlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits
3304                              +dtlb->stats_t.readAc.miss*dtlb->local_result.power.writeOp.dynamic;
3305
3306        if (is_tdp)
3307            {
3308                itlb->power = itlb->power_t + itlb->local_result.power *pppm_lkg;
3309                dtlb->power = dtlb->power_t + dtlb->local_result.power *pppm_lkg;
3310                power     = power + itlb->power + dtlb->power;
3311            }
3312            else
3313            {
3314                        itlb->rt_power = itlb->power_t + itlb->local_result.power *pppm_lkg;
3315                        dtlb->rt_power = dtlb->power_t + dtlb->local_result.power *pppm_lkg;
3316                        rt_power     = rt_power + itlb->rt_power + dtlb->rt_power;
3317            }
3318}
3319
3320void MemManU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
3321{
3322        if (!exist) return;
3323        string indent_str(indent, ' ');
3324        string indent_str_next(indent+2, ' ');
3325        bool long_channel = XML->sys.longer_channel_device;
3326
3327
3328
3329
3330        if (is_tdp)
3331        {
3332                cout << indent_str << "Itlb:" << endl;
3333                cout << indent_str_next << "Area = " << itlb->area.get_area()*1e-6<< " mm^2" << endl;
3334                cout << indent_str_next << "Peak Dynamic = " << itlb->power.readOp.dynamic*clockRate << " W" << endl;
3335                cout << indent_str_next << "Subthreshold Leakage = "
3336                        << (long_channel? itlb->power.readOp.longer_channel_leakage:itlb->power.readOp.leakage) <<" W" << endl;
3337                cout << indent_str_next << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" << endl;
3338                cout << indent_str_next << "Runtime Dynamic = " << itlb->rt_power.readOp.dynamic/executionTime << " W" << endl;
3339                cout <<endl;
3340                cout << indent_str<< "Dtlb:" << endl;
3341                cout << indent_str_next << "Area = " << dtlb->area.get_area()*1e-6  << " mm^2" << endl;
3342                cout << indent_str_next << "Peak Dynamic = " << dtlb->power.readOp.dynamic*clockRate  << " W" << endl;
3343                cout << indent_str_next << "Subthreshold Leakage = "
3344                        << (long_channel? dtlb->power.readOp.longer_channel_leakage:dtlb->power.readOp.leakage)  << " W" << endl;
3345                cout << indent_str_next << "Gate Leakage = " << dtlb->power.readOp.gate_leakage  << " W" << endl;
3346                cout << indent_str_next << "Runtime Dynamic = " << dtlb->rt_power.readOp.dynamic/executionTime << " W" << endl;
3347                cout <<endl;
3348        }
3349        else
3350        {
3351                cout << indent_str_next << "Itlb    Peak Dynamic = " << itlb->rt_power.readOp.dynamic*clockRate << " W" << endl;
3352                cout << indent_str_next << "Itlb    Subthreshold Leakage = " << itlb->rt_power.readOp.leakage <<" W" << endl;
3353                cout << indent_str_next << "Itlb    Gate Leakage = " << itlb->rt_power.readOp.gate_leakage << " W" << endl;
3354                cout << indent_str_next << "Dtlb   Peak Dynamic = " << dtlb->rt_power.readOp.dynamic*clockRate  << " W" << endl;
3355                cout << indent_str_next << "Dtlb   Subthreshold Leakage = " << dtlb->rt_power.readOp.leakage  << " W" << endl;
3356                cout << indent_str_next << "Dtlb   Gate Leakage = " << dtlb->rt_power.readOp.gate_leakage  << " W" << endl;
3357        }
3358
3359}
3360
3361void RegFU::computeEnergy(bool is_tdp)
3362{
3363/*
3364 * Architecture RF and physical RF cannot be present at the same time.
3365 * Therefore, the RF stats can only refer to either ARF or PRF;
3366 * And the same stats can be used for both.
3367 */
3368        if (!exist) return;
3369        if (is_tdp)
3370    {
3371        //init stats for Peak
3372        IRF->stats_t.readAc.access  = coredynp.issueW*2*(coredynp.ALU_duty_cycle*1.1+
3373                        (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines;
3374        IRF->stats_t.writeAc.access  = coredynp.issueW*(coredynp.ALU_duty_cycle*1.1+
3375                        (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines;
3376        //Rule of Thumb: about 10% RF related instructions do not need to access ALUs
3377        IRF->tdp_stats = IRF->stats_t;
3378
3379        FRF->stats_t.readAc.access  = FRF->l_ip.num_rd_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines;
3380        FRF->stats_t.writeAc.access  = FRF->l_ip.num_wr_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines;
3381        FRF->tdp_stats = FRF->stats_t;
3382        if (coredynp.regWindowing)
3383        {
3384                RFWIN->stats_t.readAc.access  = 0;//0.5*RFWIN->l_ip.num_rw_ports;
3385                RFWIN->stats_t.writeAc.access  = 0;//0.5*RFWIN->l_ip.num_rw_ports;
3386                RFWIN->tdp_stats = RFWIN->stats_t;
3387        }
3388     }
3389    else
3390    {
3391        //init stats for Runtime Dynamic (RTP)
3392        IRF->stats_t.readAc.access  = XML->sys.core[ithCore].int_regfile_reads;//TODO: no diff on archi and phy
3393        IRF->stats_t.writeAc.access  = XML->sys.core[ithCore].int_regfile_writes;
3394        IRF->rtp_stats = IRF->stats_t;
3395
3396        FRF->stats_t.readAc.access  = XML->sys.core[ithCore].float_regfile_reads;
3397        FRF->stats_t.writeAc.access  = XML->sys.core[ithCore].float_regfile_writes;
3398        FRF->rtp_stats = FRF->stats_t;
3399        if (coredynp.regWindowing)
3400        {
3401                RFWIN->stats_t.readAc.access  = XML->sys.core[ithCore].function_calls*16;
3402                RFWIN->stats_t.writeAc.access  = XML->sys.core[ithCore].function_calls*16;
3403                RFWIN->rtp_stats = RFWIN->stats_t;
3404
3405                IRF->stats_t.readAc.access  = XML->sys.core[ithCore].int_regfile_reads +
3406                     XML->sys.core[ithCore].function_calls*16;
3407                IRF->stats_t.writeAc.access  = XML->sys.core[ithCore].int_regfile_writes +
3408                     XML->sys.core[ithCore].function_calls*16;
3409                IRF->rtp_stats = IRF->stats_t;
3410
3411                FRF->stats_t.readAc.access  = XML->sys.core[ithCore].float_regfile_reads +
3412                     XML->sys.core[ithCore].function_calls*16;;
3413                FRF->stats_t.writeAc.access  = XML->sys.core[ithCore].float_regfile_writes+
3414                     XML->sys.core[ithCore].function_calls*16;;
3415                FRF->rtp_stats = FRF->stats_t;
3416        }
3417    }
3418        IRF->power_t.reset();
3419        FRF->power_t.reset();
3420        IRF->power_t.readOp.dynamic  +=  (IRF->stats_t.readAc.access*IRF->local_result.power.readOp.dynamic
3421                        +IRF->stats_t.writeAc.access*IRF->local_result.power.writeOp.dynamic);
3422        FRF->power_t.readOp.dynamic  +=  (FRF->stats_t.readAc.access*FRF->local_result.power.readOp.dynamic
3423                        +FRF->stats_t.writeAc.access*FRF->local_result.power.writeOp.dynamic);
3424        if (coredynp.regWindowing)
3425        {
3426                RFWIN->power_t.reset();
3427                RFWIN->power_t.readOp.dynamic   +=  (RFWIN->stats_t.readAc.access*RFWIN->local_result.power.readOp.dynamic +
3428                                RFWIN->stats_t.writeAc.access*RFWIN->local_result.power.writeOp.dynamic);
3429        }
3430
3431        if (is_tdp)
3432        {
3433                IRF->power  =  IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread;
3434                FRF->power  =  FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread;
3435                power	    =  power + (IRF->power + FRF->power);
3436                if (coredynp.regWindowing)
3437                {
3438                        RFWIN->power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg;
3439                        power        = power + RFWIN->power;
3440                }
3441        }
3442        else
3443        {
3444                IRF->rt_power  =  IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread;
3445                FRF->rt_power  =  FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread;
3446                rt_power	   =  rt_power + (IRF->power_t + FRF->power_t);
3447                if (coredynp.regWindowing)
3448                {
3449                        RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg;
3450                        rt_power        = rt_power + RFWIN->rt_power;
3451                }
3452        }
3453}
3454
3455
3456void RegFU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
3457{
3458        if (!exist) return;
3459        string indent_str(indent, ' ');
3460        string indent_str_next(indent+2, ' ');
3461        bool long_channel = XML->sys.longer_channel_device;
3462
3463        if (is_tdp)
3464        {	cout << indent_str << "Integer RF:" << endl;
3465                cout << indent_str_next << "Area = " << IRF->area.get_area()*1e-6<< " mm^2" << endl;
3466                cout << indent_str_next << "Peak Dynamic = " << IRF->power.readOp.dynamic*clockRate << " W" << endl;
3467                cout << indent_str_next << "Subthreshold Leakage = "
3468                        << (long_channel? IRF->power.readOp.longer_channel_leakage:IRF->power.readOp.leakage) <<" W" << endl;
3469                cout << indent_str_next << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl;
3470                cout << indent_str_next << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic/executionTime << " W" << endl;
3471                cout <<endl;
3472                cout << indent_str<< "Floating Point RF:" << endl;
3473                cout << indent_str_next << "Area = " << FRF->area.get_area()*1e-6  << " mm^2" << endl;
3474                cout << indent_str_next << "Peak Dynamic = " << FRF->power.readOp.dynamic*clockRate  << " W" << endl;
3475                cout << indent_str_next << "Subthreshold Leakage = "
3476                        << (long_channel? FRF->power.readOp.longer_channel_leakage:FRF->power.readOp.leakage)  << " W" << endl;
3477                cout << indent_str_next << "Gate Leakage = " << FRF->power.readOp.gate_leakage  << " W" << endl;
3478                cout << indent_str_next << "Runtime Dynamic = " << FRF->rt_power.readOp.dynamic/executionTime << " W" << endl;
3479                cout <<endl;
3480                if (coredynp.regWindowing)
3481                {
3482                        cout << indent_str << "Register Windows:" << endl;
3483                        cout << indent_str_next << "Area = " << RFWIN->area.get_area() *1e-6 << " mm^2" << endl;
3484                        cout << indent_str_next << "Peak Dynamic = " << RFWIN->power.readOp.dynamic*clockRate  << " W" << endl;
3485                        cout << indent_str_next << "Subthreshold Leakage = "
3486                                << (long_channel? RFWIN->power.readOp.longer_channel_leakage:RFWIN->power.readOp.leakage)  << " W" << endl;
3487                        cout << indent_str_next << "Gate Leakage = " << RFWIN->power.readOp.gate_leakage  << " W" << endl;
3488                        cout << indent_str_next << "Runtime Dynamic = " << RFWIN->rt_power.readOp.dynamic/executionTime << " W" << endl;
3489                        cout <<endl;
3490                }
3491        }
3492        else
3493        {
3494                cout << indent_str_next << "Integer RF    Peak Dynamic = " << IRF->rt_power.readOp.dynamic*clockRate << " W" << endl;
3495                cout << indent_str_next << "Integer RF    Subthreshold Leakage = " << IRF->rt_power.readOp.leakage <<" W" << endl;
3496                cout << indent_str_next << "Integer RF    Gate Leakage = " << IRF->rt_power.readOp.gate_leakage << " W" << endl;
3497                cout << indent_str_next << "Floating Point RF   Peak Dynamic = " << FRF->rt_power.readOp.dynamic*clockRate  << " W" << endl;
3498                cout << indent_str_next << "Floating Point RF   Subthreshold Leakage = " << FRF->rt_power.readOp.leakage  << " W" << endl;
3499                cout << indent_str_next << "Floating Point RF   Gate Leakage = " << FRF->rt_power.readOp.gate_leakage  << " W" << endl;
3500                if (coredynp.regWindowing)
3501                {
3502                        cout << indent_str_next << "Register Windows   Peak Dynamic = " << RFWIN->rt_power.readOp.dynamic*clockRate  << " W" << endl;
3503                        cout << indent_str_next << "Register Windows   Subthreshold Leakage = " << RFWIN->rt_power.readOp.leakage  << " W" << endl;
3504                        cout << indent_str_next << "Register Windows   Gate Leakage = " << RFWIN->rt_power.readOp.gate_leakage  << " W" << endl;
3505                }
3506        }
3507}
3508
3509
3510void EXECU::computeEnergy(bool is_tdp)
3511{
3512        if (!exist) return;
3513        double pppm_t[4]    = {1,1,1,1};
3514//	rfu->power.reset();
3515//	rfu->rt_power.reset();
3516//	scheu->power.reset();
3517//	scheu->rt_power.reset();
3518//	exeu->power.reset();
3519//	exeu->rt_power.reset();
3520
3521        rfu->computeEnergy(is_tdp);
3522        scheu->computeEnergy(is_tdp);
3523        exeu->computeEnergy(is_tdp);
3524        if (coredynp.num_fpus >0)
3525        {
3526                fp_u->computeEnergy(is_tdp);
3527        }
3528        if (coredynp.num_muls >0)
3529        {
3530                mul->computeEnergy(is_tdp);
3531        }
3532
3533        if (is_tdp)
3534        {
3535                set_pppm(pppm_t, 2*coredynp.ALU_cdb_duty_cycle, 2, 2, 2*coredynp.ALU_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction.
3536                bypass.power = bypass.power + intTagBypass->power*pppm_t + int_bypass->power*pppm_t;
3537                if (coredynp.num_muls >0)
3538                {
3539                        set_pppm(pppm_t, 2*coredynp.MUL_cdb_duty_cycle, 2, 2, 2*coredynp.MUL_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction.
3540                        bypass.power = bypass.power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t;
3541                        power      = power + mul->power;
3542                }
3543                if (coredynp.num_fpus>0)
3544                {
3545                        set_pppm(pppm_t, 3*coredynp.FPU_cdb_duty_cycle, 3, 3, 3*coredynp.FPU_cdb_duty_cycle);//3 means three source operands needs to be passed for each fp instruction.
3546                        bypass.power = bypass.power + fp_bypass->power*pppm_t  + fpTagBypass->power*pppm_t ;
3547                        power      = power + fp_u->power;
3548                }
3549
3550                power      = power + rfu->power + exeu->power + bypass.power + scheu->power;
3551        }
3552        else
3553        {
3554                set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, 2, 2, XML->sys.core[ithCore].cdb_alu_accesses);
3555                bypass.rt_power = bypass.rt_power + intTagBypass->power*pppm_t;
3556                bypass.rt_power = bypass.rt_power + int_bypass->power*pppm_t;
3557
3558                if (coredynp.num_muls >0)
3559                {
3560                        set_pppm(pppm_t, XML->sys.core[ithCore].cdb_mul_accesses, 2, 2, XML->sys.core[ithCore].cdb_mul_accesses);//2 means two source operands needs to be passed for each int instruction.
3561                        bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t;
3562                        rt_power      = rt_power + mul->rt_power;
3563                }
3564
3565                if (coredynp.num_fpus>0)
3566                {
3567                        set_pppm(pppm_t, XML->sys.core[ithCore].cdb_fpu_accesses, 3, 3, XML->sys.core[ithCore].cdb_fpu_accesses);
3568                        bypass.rt_power = bypass.rt_power + fp_bypass->power*pppm_t;
3569                        bypass.rt_power = bypass.rt_power + fpTagBypass->power*pppm_t;
3570                        rt_power      = rt_power + fp_u->rt_power;
3571                }
3572                rt_power      = rt_power + rfu->rt_power + exeu->rt_power + bypass.rt_power + scheu->rt_power;
3573        }
3574}
3575
3576void EXECU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
3577{
3578        if (!exist) return;
3579        string indent_str(indent, ' ');
3580        string indent_str_next(indent+2, ' ');
3581        bool long_channel = XML->sys.longer_channel_device;
3582
3583
3584//	cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl;
3585        if (is_tdp)
3586        {
3587                cout << indent_str << "Register Files:" << endl;
3588                cout << indent_str_next << "Area = " << rfu->area.get_area()*1e-6<< " mm^2" << endl;
3589                cout << indent_str_next << "Peak Dynamic = " << rfu->power.readOp.dynamic*clockRate << " W" << endl;
3590                cout << indent_str_next << "Subthreshold Leakage = "
3591                        << (long_channel? rfu->power.readOp.longer_channel_leakage:rfu->power.readOp.leakage) <<" W" << endl;
3592                cout << indent_str_next << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl;
3593                cout << indent_str_next << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic/executionTime << " W" << endl;
3594                cout <<endl;
3595                if (plevel>3){
3596                        rfu->displayEnergy(indent+4,is_tdp);
3597                }
3598                cout << indent_str << "Instruction Scheduler:" << endl;
3599                cout << indent_str_next << "Area = " << scheu->area.get_area()*1e-6  << " mm^2" << endl;
3600                cout << indent_str_next << "Peak Dynamic = " << scheu->power.readOp.dynamic*clockRate  << " W" << endl;
3601                cout << indent_str_next << "Subthreshold Leakage = "
3602                        << (long_channel? scheu->power.readOp.longer_channel_leakage:scheu->power.readOp.leakage)  << " W" << endl;
3603                cout << indent_str_next << "Gate Leakage = " << scheu->power.readOp.gate_leakage  << " W" << endl;
3604                cout << indent_str_next << "Runtime Dynamic = " << scheu->rt_power.readOp.dynamic/executionTime << " W" << endl;
3605                cout <<endl;
3606                if (plevel>3){
3607                        scheu->displayEnergy(indent+4,is_tdp);
3608                }
3609                exeu->displayEnergy(indent,is_tdp);
3610                if (coredynp.num_fpus>0)
3611                {
3612                        fp_u->displayEnergy(indent,is_tdp);
3613                }
3614                if (coredynp.num_muls >0)
3615                {
3616                        mul->displayEnergy(indent,is_tdp);
3617                }
3618                cout << indent_str << "Results Broadcast Bus:" << endl;
3619                cout << indent_str_next << "Area Overhead = " << bypass.area.get_area()*1e-6  << " mm^2" << endl;
3620                cout << indent_str_next << "Peak Dynamic = " << bypass.power.readOp.dynamic*clockRate  << " W" << endl;
3621                cout << indent_str_next << "Subthreshold Leakage = "
3622                        << (long_channel? bypass.power.readOp.longer_channel_leakage:bypass.power.readOp.leakage ) << " W" << endl;
3623                cout << indent_str_next << "Gate Leakage = " << bypass.power.readOp.gate_leakage  << " W" << endl;
3624                cout << indent_str_next << "Runtime Dynamic = " << bypass.rt_power.readOp.dynamic/executionTime << " W" << endl;
3625                cout <<endl;
3626        }
3627        else
3628        {
3629                cout << indent_str_next << "Register Files    Peak Dynamic = " << rfu->rt_power.readOp.dynamic*clockRate << " W" << endl;
3630                cout << indent_str_next << "Register Files    Subthreshold Leakage = " << rfu->rt_power.readOp.leakage <<" W" << endl;
3631                cout << indent_str_next << "Register Files    Gate Leakage = " << rfu->rt_power.readOp.gate_leakage << " W" << endl;
3632                cout << indent_str_next << "Instruction Sheduler   Peak Dynamic = " << scheu->rt_power.readOp.dynamic*clockRate  << " W" << endl;
3633                cout << indent_str_next << "Instruction Sheduler   Subthreshold Leakage = " << scheu->rt_power.readOp.leakage  << " W" << endl;
3634                cout << indent_str_next << "Instruction Sheduler   Gate Leakage = " << scheu->rt_power.readOp.gate_leakage  << " W" << endl;
3635                cout << indent_str_next << "Results Broadcast Bus   Peak Dynamic = " << bypass.rt_power.readOp.dynamic*clockRate  << " W" << endl;
3636                cout << indent_str_next << "Results Broadcast Bus   Subthreshold Leakage = " << bypass.rt_power.readOp.leakage  << " W" << endl;
3637                cout << indent_str_next << "Results Broadcast Bus   Gate Leakage = " << bypass.rt_power.readOp.gate_leakage  << " W" << endl;
3638        }
3639
3640}
3641
3642void Core::computeEnergy(bool is_tdp)
3643{
3644        //power_point_product_masks
3645        double pppm_t[4]    = {1,1,1,1};
3646    double rtp_pipeline_coe;
3647    double num_units = 4.0;
3648        if (is_tdp)
3649        {
3650                ifu->computeEnergy(is_tdp);
3651                lsu->computeEnergy(is_tdp);
3652                mmu->computeEnergy(is_tdp);
3653                exu->computeEnergy(is_tdp);
3654
3655                if (coredynp.core_ty==OOO)
3656                {
3657                        num_units = 5.0;
3658                        rnu->computeEnergy(is_tdp);
3659                        set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
3660                        if (rnu->exist)
3661                        {
3662                                rnu->power = rnu->power + corepipe->power*pppm_t;
3663                                power     = power + rnu->power;
3664                        }
3665                }
3666
3667                if (ifu->exist)
3668                {
3669                        set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.IFU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
3670//			cout << "IFU = " << ifu->power.readOp.dynamic*clockRate  << " W" << endl;
3671                        ifu->power = ifu->power + corepipe->power*pppm_t;
3672//			cout << "IFU = " << ifu->power.readOp.dynamic*clockRate  << " W" << endl;
3673//			cout << "1/4 pipe = " << corepipe->power.readOp.dynamic*clockRate/num_units  << " W" << endl;
3674                        power     = power + ifu->power;
3675//			cout << "core = " << power.readOp.dynamic*clockRate  << " W" << endl;
3676                }
3677                if (lsu->exist)
3678                {
3679                        set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.LSU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
3680                        lsu->power = lsu->power + corepipe->power*pppm_t;
3681//			cout << "LSU = " << lsu->power.readOp.dynamic*clockRate  << " W" << endl;
3682                        power     = power + lsu->power;
3683//			cout << "core = " << power.readOp.dynamic*clockRate  << " W" << endl;
3684                }
3685                if (exu->exist)
3686                {
3687                        set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.ALU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
3688                        exu->power = exu->power + corepipe->power*pppm_t;
3689//			cout << "EXE = " << exu->power.readOp.dynamic*clockRate  << " W" << endl;
3690                        power     = power + exu->power;
3691//			cout << "core = " << power.readOp.dynamic*clockRate  << " W" << endl;
3692                }
3693                if (mmu->exist)
3694                {
3695                        set_pppm(pppm_t, coredynp.num_pipelines/num_units*(0.5+0.5*coredynp.LSU_duty_cycle), coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
3696                        mmu->power = mmu->power + corepipe->power*pppm_t;
3697//			cout << "MMU = " << mmu->power.readOp.dynamic*clockRate  << " W" << endl;
3698                        power     = power +  mmu->power;
3699//			cout << "core = " << power.readOp.dynamic*clockRate  << " W" << endl;
3700                }
3701
3702                power     = power +  undiffCore->power;
3703
3704                if (XML->sys.Private_L2)
3705                {
3706
3707                        l2cache->computeEnergy(is_tdp);
3708                        set_pppm(pppm_t,l2cache->cachep.clockRate/clockRate, 1,1,1);
3709                        //l2cache->power = l2cache->power*pppm_t;
3710                        power = power  + l2cache->power*pppm_t;
3711                }
3712        }
3713        else
3714        {
3715                ifu->computeEnergy(is_tdp);
3716                lsu->computeEnergy(is_tdp);
3717                mmu->computeEnergy(is_tdp);
3718                exu->computeEnergy(is_tdp);
3719                if (coredynp.core_ty==OOO)
3720                {
3721                        num_units = 5.0;
3722                        rnu->computeEnergy(is_tdp);
3723                set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
3724                        if (rnu->exist)
3725                        {
3726                rnu->rt_power = rnu->rt_power + corepipe->power*pppm_t;
3727
3728                        rt_power      = rt_power + rnu->rt_power;
3729                        }
3730                }
3731                else
3732                {
3733                        if (XML->sys.homogeneous_cores==1)
3734                        {
3735                                rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores;
3736                        }
3737                        else
3738                        {
3739                                rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles;
3740                        }
3741                set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
3742                }
3743
3744                if (ifu->exist)
3745                {
3746                        ifu->rt_power = ifu->rt_power + corepipe->power*pppm_t;
3747                        rt_power     = rt_power + ifu->rt_power ;
3748                }
3749                if (lsu->exist)
3750                {
3751                        lsu->rt_power = lsu->rt_power + corepipe->power*pppm_t;
3752                        rt_power     = rt_power  + lsu->rt_power;
3753                }
3754                if (exu->exist)
3755                {
3756                        exu->rt_power = exu->rt_power + corepipe->power*pppm_t;
3757                        rt_power     = rt_power  + exu->rt_power;
3758                }
3759                if (mmu->exist)
3760                {
3761                        mmu->rt_power = mmu->rt_power + corepipe->power*pppm_t;
3762                        rt_power     = rt_power +  mmu->rt_power ;
3763                }
3764
3765                rt_power     = rt_power +  undiffCore->power;
3766//		cout << "EXE = " << exu->power.readOp.dynamic*clockRate  << " W" << endl;
3767                if (XML->sys.Private_L2)
3768                {
3769
3770                        l2cache->computeEnergy(is_tdp);
3771                        //set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1);
3772                        //l2cache->rt_power = l2cache->rt_power*pppm_t;
3773                        rt_power = rt_power  + l2cache->rt_power;
3774                }
3775        }
3776
3777}
3778
3779void Core::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
3780{
3781        string indent_str(indent, ' ');
3782        string indent_str_next(indent+2, ' ');
3783        bool long_channel = XML->sys.longer_channel_device;
3784        if (is_tdp)
3785        {
3786                cout << "Core:" << endl;
3787                cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
3788                cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl;
3789                cout << indent_str << "Subthreshold Leakage = "
3790                        << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
3791                //cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
3792                cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
3793                cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl;
3794                cout<<endl;
3795                if (ifu->exist)
3796                {
3797                        cout << indent_str << "Instruction Fetch Unit:" << endl;
3798                        cout << indent_str_next << "Area = " << ifu->area.get_area()*1e-6<< " mm^2" << endl;
3799                        cout << indent_str_next << "Peak Dynamic = " << ifu->power.readOp.dynamic*clockRate << " W" << endl;
3800                        cout << indent_str_next << "Subthreshold Leakage = "
3801                                << (long_channel? ifu->power.readOp.longer_channel_leakage:ifu->power.readOp.leakage) <<" W" << endl;
3802                        //cout << indent_str_next << "Subthreshold Leakage = " << ifu->power.readOp.longer_channel_leakage <<" W" << endl;
3803                        cout << indent_str_next << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" << endl;
3804                        cout << indent_str_next << "Runtime Dynamic = " << ifu->rt_power.readOp.dynamic/executionTime << " W" << endl;
3805                        cout <<endl;
3806                        if (plevel >2){
3807                                ifu->displayEnergy(indent+4,plevel,is_tdp);
3808                        }
3809                }
3810                if (coredynp.core_ty==OOO)
3811                {
3812                        if (rnu->exist)
3813                        {
3814                                cout << indent_str<< "Renaming Unit:" << endl;
3815                                cout << indent_str_next << "Area = " << rnu->area.get_area()*1e-6  << " mm^2" << endl;
3816                                cout << indent_str_next << "Peak Dynamic = " << rnu->power.readOp.dynamic*clockRate  << " W" << endl;
3817                                cout << indent_str_next << "Subthreshold Leakage = "
3818                                        << (long_channel? rnu->power.readOp.longer_channel_leakage:rnu->power.readOp.leakage)  << " W" << endl;
3819                                //cout << indent_str_next << "Subthreshold Leakage = " << rnu->power.readOp.longer_channel_leakage  << " W" << endl;
3820                                cout << indent_str_next << "Gate Leakage = " << rnu->power.readOp.gate_leakage  << " W" << endl;
3821                                cout << indent_str_next << "Runtime Dynamic = " << rnu->rt_power.readOp.dynamic/executionTime << " W" << endl;
3822                                cout <<endl;
3823                                if (plevel >2){
3824                                        rnu->displayEnergy(indent+4,plevel,is_tdp);
3825                                }
3826                        }
3827
3828                }
3829                if (lsu->exist)
3830                {
3831                        cout << indent_str<< "Load Store Unit:" << endl;
3832                        cout << indent_str_next << "Area = " << lsu->area.get_area()*1e-6  << " mm^2" << endl;
3833                        cout << indent_str_next << "Peak Dynamic = " << lsu->power.readOp.dynamic*clockRate  << " W" << endl;
3834                        cout << indent_str_next << "Subthreshold Leakage = "
3835                                << (long_channel? lsu->power.readOp.longer_channel_leakage:lsu->power.readOp.leakage ) << " W" << endl;
3836                        //cout << indent_str_next << "Subthreshold Leakage = " << lsu->power.readOp.longer_channel_leakage  << " W" << endl;
3837                        cout << indent_str_next << "Gate Leakage = " << lsu->power.readOp.gate_leakage  << " W" << endl;
3838                        cout << indent_str_next << "Runtime Dynamic = " << lsu->rt_power.readOp.dynamic/executionTime << " W" << endl;
3839                        cout <<endl;
3840                        if (plevel >2){
3841                                lsu->displayEnergy(indent+4,plevel,is_tdp);
3842                        }
3843                }
3844                if (mmu->exist)
3845                {
3846                        cout << indent_str<< "Memory Management Unit:" << endl;
3847                        cout << indent_str_next << "Area = " << mmu->area.get_area() *1e-6 << " mm^2" << endl;
3848                        cout << indent_str_next << "Peak Dynamic = " << mmu->power.readOp.dynamic*clockRate  << " W" << endl;
3849                        cout << indent_str_next << "Subthreshold Leakage = "
3850                                << (long_channel? mmu->power.readOp.longer_channel_leakage:mmu->power.readOp.leakage)   << " W" << endl;
3851                        //cout << indent_str_next << "Subthreshold Leakage = " << mmu->power.readOp.longer_channel_leakage   << " W" << endl;
3852                        cout << indent_str_next << "Gate Leakage = " << mmu->power.readOp.gate_leakage  << " W" << endl;
3853                        cout << indent_str_next << "Runtime Dynamic = " << mmu->rt_power.readOp.dynamic/executionTime << " W" << endl;
3854                        cout <<endl;
3855                        if (plevel >2){
3856                                mmu->displayEnergy(indent+4,plevel,is_tdp);
3857                        }
3858                }
3859                if (exu->exist)
3860                {
3861                        cout << indent_str<< "Execution Unit:" << endl;
3862                        cout << indent_str_next << "Area = " << exu->area.get_area()  *1e-6<< " mm^2" << endl;
3863                        cout << indent_str_next << "Peak Dynamic = " << exu->power.readOp.dynamic*clockRate  << " W" << endl;
3864                        cout << indent_str_next << "Subthreshold Leakage = "
3865                                << (long_channel? exu->power.readOp.longer_channel_leakage:exu->power.readOp.leakage)   << " W" << endl;
3866                        //cout << indent_str_next << "Subthreshold Leakage = " << exu->power.readOp.longer_channel_leakage << " W" << endl;
3867                        cout << indent_str_next << "Gate Leakage = " << exu->power.readOp.gate_leakage  << " W" << endl;
3868                        cout << indent_str_next << "Runtime Dynamic = " << exu->rt_power.readOp.dynamic/executionTime << " W" << endl;
3869                        cout <<endl;
3870                        if (plevel >2){
3871                                exu->displayEnergy(indent+4,plevel,is_tdp);
3872                        }
3873                }
3874//		if (plevel >2)
3875//		{
3876//			if (undiffCore->exist)
3877//			{
3878//				cout << indent_str << "Undifferentiated Core" << endl;
3879//				cout << indent_str_next << "Area = " << undiffCore->area.get_area()*1e-6<< " mm^2" << endl;
3880//				cout << indent_str_next << "Peak Dynamic = " << undiffCore->power.readOp.dynamic*clockRate << " W" << endl;
3881////				cout << indent_str_next << "Subthreshold Leakage = " << undiffCore->power.readOp.leakage <<" W" << endl;
3882//				cout << indent_str_next << "Subthreshold Leakage = "
3883//								<< (long_channel? undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage)   << " W" << endl;
3884//				cout << indent_str_next << "Gate Leakage = " << undiffCore->power.readOp.gate_leakage << " W" << endl;
3885//				//		cout << indent_str_next << "Runtime Dynamic = " << undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl;
3886//				cout <<endl;
3887//			}
3888//		}
3889                if (XML->sys.Private_L2)
3890                {
3891
3892                        l2cache->displayEnergy(4,is_tdp);
3893                }
3894
3895        }
3896        else
3897        {
3898//		cout << indent_str_next << "Instruction Fetch Unit    Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl;
3899//		cout << indent_str_next << "Instruction Fetch Unit    Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl;
3900//		cout << indent_str_next << "Instruction Fetch Unit    Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl;
3901//		cout << indent_str_next << "Load Store Unit   Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate  << " W" << endl;
3902//		cout << indent_str_next << "Load Store Unit   Subthreshold Leakage = " << lsu->rt_power.readOp.leakage  << " W" << endl;
3903//		cout << indent_str_next << "Load Store Unit   Gate Leakage = " << lsu->rt_power.readOp.gate_leakage  << " W" << endl;
3904//		cout << indent_str_next << "Memory Management Unit   Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate  << " W" << endl;
3905//		cout << indent_str_next << "Memory Management Unit   Subthreshold Leakage = " << mmu->rt_power.readOp.leakage  << " W" << endl;
3906//		cout << indent_str_next << "Memory Management Unit   Gate Leakage = " << mmu->rt_power.readOp.gate_leakage  << " W" << endl;
3907//		cout << indent_str_next << "Execution Unit   Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate  << " W" << endl;
3908//		cout << indent_str_next << "Execution Unit   Subthreshold Leakage = " << exu->rt_power.readOp.leakage  << " W" << endl;
3909//		cout << indent_str_next << "Execution Unit   Gate Leakage = " << exu->rt_power.readOp.gate_leakage  << " W" << endl;
3910        }
3911}
3912InstFetchU ::~InstFetchU(){
3913
3914        if (!exist) return;
3915        if(IB) 	                   {delete IB; IB = 0;}
3916        if(ID_inst) 	           {delete ID_inst; ID_inst = 0;}
3917        if(ID_operand) 	           {delete ID_operand; ID_operand = 0;}
3918        if(ID_misc) 	           {delete ID_misc; ID_misc = 0;}
3919        if (coredynp.predictionW>0)
3920        {
3921                if(BTB) 	               {delete BTB; BTB = 0;}
3922                if(BPT) 	               {delete BPT; BPT = 0;}
3923        }
3924}
3925
3926BranchPredictor ::~BranchPredictor(){
3927
3928        if (!exist) return;
3929        if(globalBPT) 	           {delete globalBPT; globalBPT = 0;}
3930        if(localBPT) 	           {delete localBPT; localBPT = 0;}
3931    if(L1_localBPT) 	       {delete L1_localBPT; L1_localBPT = 0;}
3932    if(L2_localBPT) 	       {delete L2_localBPT; L2_localBPT = 0;}
3933    if(chooser) 	           {delete chooser; chooser = 0;}
3934    if(RAS) 	               {delete RAS; RAS = 0;}
3935        }
3936
3937RENAMINGU ::~RENAMINGU(){
3938
3939        if (!exist) return;
3940        if(iFRAT ) 	               {delete iFRAT; iFRAT = 0;}
3941    if(fFRAT ) 	               {delete fFRAT; fFRAT =0;}
3942    if(iRRAT)                  {delete iRRAT; iRRAT = 0;}
3943    if(iFRAT)                  {delete iFRAT; iFRAT = 0;}
3944    if(ifreeL)                 {delete ifreeL;ifreeL= 0;}
3945    if(ffreeL)                 {delete ffreeL;ffreeL= 0;}
3946    if(idcl)                   {delete idcl;  idcl = 0;}
3947    if(fdcl)                   {delete fdcl;  fdcl = 0;}
3948    if(RAHT)                   {delete RAHT;  RAHT = 0;}
3949        }
3950
3951LoadStoreU ::~LoadStoreU(){
3952
3953        if (!exist) return;
3954        if(LSQ) 	               {delete LSQ; LSQ = 0;}
3955        }
3956
3957MemManU ::~MemManU(){
3958
3959        if (!exist) return;
3960        if(itlb) 	               {delete itlb; itlb = 0;}
3961    if(dtlb) 	               {delete dtlb; dtlb = 0;}
3962        }
3963
3964RegFU ::~RegFU(){
3965
3966        if (!exist) return;
3967        if(IRF) 	               {delete IRF; IRF = 0;}
3968    if(FRF) 	               {delete FRF; FRF = 0;}
3969    if(RFWIN) 	               {delete RFWIN; RFWIN = 0;}
3970        }
3971
3972SchedulerU ::~SchedulerU(){
3973
3974        if (!exist) return;
3975        if(int_inst_window) 	   {delete int_inst_window; int_inst_window = 0;}
3976        if(fp_inst_window) 	       {delete int_inst_window; int_inst_window = 0;}
3977        if(ROB) 	               {delete ROB; ROB = 0;}
3978    if(instruction_selection)  {delete instruction_selection;instruction_selection = 0;}
3979        }
3980
3981EXECU ::~EXECU(){
3982
3983        if (!exist) return;
3984        if(int_bypass) 	           {delete int_bypass; int_bypass = 0;}
3985    if(intTagBypass) 	       {delete intTagBypass; intTagBypass =0;}
3986    if(int_mul_bypass) 	       {delete int_mul_bypass; int_mul_bypass = 0;}
3987    if(intTag_mul_Bypass) 	   {delete intTag_mul_Bypass; intTag_mul_Bypass =0;}
3988    if(fp_bypass) 	           {delete fp_bypass;fp_bypass = 0;}
3989    if(fpTagBypass) 	       {delete fpTagBypass;fpTagBypass = 0;}
3990    if(fp_u)                   {delete fp_u;fp_u = 0;}
3991    if(exeu)                   {delete exeu;exeu = 0;}
3992    if(mul)                    {delete mul;mul = 0;}
3993    if(rfu)                    {delete rfu;rfu = 0;}
3994        if(scheu) 	               {delete scheu; scheu = 0;}
3995        }
3996
3997Core ::~Core(){
3998
3999        if(ifu) 	               {delete ifu; ifu = 0;}
4000        if(lsu) 	               {delete lsu; lsu = 0;}
4001        if(rnu) 	               {delete rnu; rnu = 0;}
4002        if(mmu) 	               {delete mmu; mmu = 0;}
4003        if(exu) 	               {delete exu; exu = 0;}
4004    if(corepipe) 	           {delete corepipe; corepipe = 0;}
4005    if(undiffCore)             {delete undiffCore;undiffCore = 0;}
4006    if(l2cache)                {delete l2cache;l2cache = 0;}
4007        }
4008
4009void Core::set_core_param()
4010{
4011        coredynp.opt_local = XML->sys.core[ithCore].opt_local;
4012        coredynp.x86 = XML->sys.core[ithCore].x86;
4013        coredynp.Embedded = XML->sys.Embedded;
4014        coredynp.core_ty   = (enum Core_type)XML->sys.core[ithCore].machine_type;
4015        coredynp.rm_ty     = (enum Renaming_type)XML->sys.core[ithCore].rename_scheme;
4016    coredynp.fetchW    = XML->sys.core[ithCore].fetch_width;
4017    coredynp.decodeW   = XML->sys.core[ithCore].decode_width;
4018    coredynp.issueW    = XML->sys.core[ithCore].issue_width;
4019    coredynp.peak_issueW   = XML->sys.core[ithCore].peak_issue_width;
4020    coredynp.commitW       = XML->sys.core[ithCore].commit_width;
4021    coredynp.peak_commitW  = XML->sys.core[ithCore].peak_issue_width;
4022    coredynp.predictionW   = XML->sys.core[ithCore].prediction_width;
4023    coredynp.fp_issueW     = XML->sys.core[ithCore].fp_issue_width;
4024    coredynp.fp_decodeW    = XML->sys.core[ithCore].fp_issue_width;
4025    coredynp.num_alus      = XML->sys.core[ithCore].ALU_per_core;
4026    coredynp.num_fpus      = XML->sys.core[ithCore].FPU_per_core;
4027    coredynp.num_muls      = XML->sys.core[ithCore].MUL_per_core;
4028
4029
4030    coredynp.num_hthreads	     = XML->sys.core[ithCore].number_hardware_threads;
4031    coredynp.multithreaded       = coredynp.num_hthreads>1? true:false;
4032    coredynp.instruction_length  = XML->sys.core[ithCore].instruction_length;
4033    coredynp.pc_width            = XML->sys.virtual_address_width;
4034
4035        coredynp.opcode_length       = XML->sys.core[ithCore].opcode_width;
4036    coredynp.micro_opcode_length = XML->sys.core[ithCore].micro_opcode_width;
4037    coredynp.num_pipelines       = XML->sys.core[ithCore].pipelines_per_core[0];
4038    coredynp.pipeline_stages     = XML->sys.core[ithCore].pipeline_depth[0];
4039    coredynp.num_fp_pipelines    = XML->sys.core[ithCore].pipelines_per_core[1];
4040    coredynp.fp_pipeline_stages  = XML->sys.core[ithCore].pipeline_depth[1];
4041    coredynp.int_data_width      = int(ceil(XML->sys.machine_bits/32.0))*32;
4042    coredynp.fp_data_width       = coredynp.int_data_width;
4043    coredynp.v_address_width     = XML->sys.virtual_address_width;
4044    coredynp.p_address_width     = XML->sys.physical_address_width;
4045
4046        coredynp.scheu_ty         = (enum Scheduler_type)XML->sys.core[ithCore].instruction_window_scheme;
4047        coredynp.arch_ireg_width  =  int(ceil(log2(XML->sys.core[ithCore].archi_Regs_IRF_size)));
4048        coredynp.arch_freg_width  =  int(ceil(log2(XML->sys.core[ithCore].archi_Regs_FRF_size)));
4049        coredynp.num_IRF_entry    = XML->sys.core[ithCore].archi_Regs_IRF_size;
4050        coredynp.num_FRF_entry    = XML->sys.core[ithCore].archi_Regs_FRF_size;
4051        coredynp.pipeline_duty_cycle = XML->sys.core[ithCore].pipeline_duty_cycle;
4052        coredynp.total_cycles        = XML->sys.core[ithCore].total_cycles;
4053        coredynp.busy_cycles         = XML->sys.core[ithCore].busy_cycles;
4054        coredynp.idle_cycles         = XML->sys.core[ithCore].idle_cycles;
4055
4056        //Max power duty cycle for peak power estimation
4057//	if (coredynp.core_ty==OOO)
4058//	{
4059//		coredynp.IFU_duty_cycle = 1;
4060//		coredynp.LSU_duty_cycle = 1;
4061//		coredynp.MemManU_I_duty_cycle =1;
4062//		coredynp.MemManU_D_duty_cycle =1;
4063//		coredynp.ALU_duty_cycle =1;
4064//		coredynp.MUL_duty_cycle =1;
4065//		coredynp.FPU_duty_cycle =1;
4066//		coredynp.ALU_cdb_duty_cycle =1;
4067//		coredynp.MUL_cdb_duty_cycle =1;
4068//		coredynp.FPU_cdb_duty_cycle =1;
4069//	}
4070//	else
4071//	{
4072                coredynp.IFU_duty_cycle = XML->sys.core[ithCore].IFU_duty_cycle;
4073                coredynp.BR_duty_cycle = XML->sys.core[ithCore].BR_duty_cycle;
4074                coredynp.LSU_duty_cycle = XML->sys.core[ithCore].LSU_duty_cycle;
4075                coredynp.MemManU_I_duty_cycle = XML->sys.core[ithCore].MemManU_I_duty_cycle;
4076                coredynp.MemManU_D_duty_cycle = XML->sys.core[ithCore].MemManU_D_duty_cycle;
4077                coredynp.ALU_duty_cycle = XML->sys.core[ithCore].ALU_duty_cycle;
4078                coredynp.MUL_duty_cycle = XML->sys.core[ithCore].MUL_duty_cycle;
4079                coredynp.FPU_duty_cycle = XML->sys.core[ithCore].FPU_duty_cycle;
4080                coredynp.ALU_cdb_duty_cycle = XML->sys.core[ithCore].ALU_cdb_duty_cycle;
4081                coredynp.MUL_cdb_duty_cycle = XML->sys.core[ithCore].MUL_cdb_duty_cycle;
4082                coredynp.FPU_cdb_duty_cycle = XML->sys.core[ithCore].FPU_cdb_duty_cycle;
4083//	}
4084
4085
4086        if (!((coredynp.core_ty==OOO)||(coredynp.core_ty==Inorder)))
4087        {
4088                cout<<"Invalid Core Type"<<endl;
4089                exit(0);
4090        }
4091//	if (coredynp.core_ty==OOO)
4092//	{
4093//		cout<<"OOO processor models are being updated and will be available in next release"<<endl;
4094//		exit(0);
4095//	}
4096        if (!((coredynp.scheu_ty==PhysicalRegFile)||(coredynp.scheu_ty==ReservationStation)))
4097        {
4098                cout<<"Invalid OOO Scheduler Type"<<endl;
4099                exit(0);
4100        }
4101
4102        if (!((coredynp.rm_ty ==RAMbased)||(coredynp.rm_ty ==CAMbased)))
4103        {
4104                cout<<"Invalid OOO Renaming Type"<<endl;
4105                exit(0);
4106        }
4107
4108if (coredynp.core_ty==OOO)
4109{
4110        if (coredynp.scheu_ty==PhysicalRegFile)
4111        {
4112          coredynp.phy_ireg_width  =  int(ceil(log2(XML->sys.core[ithCore].phy_Regs_IRF_size)));
4113          coredynp.phy_freg_width  =  int(ceil(log2(XML->sys.core[ithCore].phy_Regs_FRF_size)));
4114          coredynp.num_ifreelist_entries = coredynp.num_IRF_entry  = XML->sys.core[ithCore].phy_Regs_IRF_size;
4115          coredynp.num_ffreelist_entries = coredynp.num_FRF_entry  = XML->sys.core[ithCore].phy_Regs_FRF_size;
4116        }
4117        else if (coredynp.scheu_ty==ReservationStation)
4118        {//ROB serves as Phy RF in RS based OOO
4119      coredynp.phy_ireg_width  =  int(ceil(log2(XML->sys.core[ithCore].ROB_size)));
4120          coredynp.phy_freg_width  =  int(ceil(log2(XML->sys.core[ithCore].ROB_size)));
4121          coredynp.num_ifreelist_entries = XML->sys.core[ithCore].ROB_size;
4122          coredynp.num_ffreelist_entries = XML->sys.core[ithCore].ROB_size;
4123
4124        }
4125
4126}
4127        coredynp.globalCheckpoint   =  32;//best check pointing entries for a 4~8 issue OOO should be 16~48;See TR for reference.
4128        coredynp.perThreadState     =  8;
4129        coredynp.instruction_length = 32;
4130        coredynp.clockRate          =  XML->sys.core[ithCore].clock_rate;
4131        coredynp.clockRate          *= 1e6;
4132        coredynp.regWindowing= (XML->sys.core[ithCore].register_windows_size>0&&coredynp.core_ty==Inorder)?true:false;
4133        coredynp.executionTime = XML->sys.total_cycles/coredynp.clockRate;
4134        set_pppm(coredynp.pppm_lkg_multhread, 0, coredynp.num_hthreads, coredynp.num_hthreads, 0);
4135}
4136