1/*****************************************************************************
2 *                                McPAT
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
6 *                          All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 ***************************************************************************/
32
33#include <algorithm>
34#include <cassert>
35#include <cmath>
36#include <iostream>
37#include <sstream>
38#include <string>
39
40#include "basic_circuit.h"
41#include "basic_components.h"
42#include "common.h"
43#include "const.h"
44#include "core.h"
45#include "io.h"
46#include "parameter.h"
47
48int RegFU::RFWIN_ACCESS_MULTIPLIER = 16;
49
50// The five bits are: busy, Issued, Finished, speculative, valid
51int SchedulerU::ROB_STATUS_BITS = 5;
52
53InstFetchU::InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_,
54                       const CoreParameters & _core_params,
55                       const CoreStatistics & _core_stats, bool exist_)
56    : McPATComponent(_xml_data), icache(NULL), IB(NULL), BTB(NULL),
57      BPT(NULL), ID_inst(NULL), ID_operand(NULL), ID_misc(NULL),
58      interface_ip(*interface_ip_),
59      core_params(_core_params), core_stats(_core_stats), exist(exist_) {
60    if (!exist) return;
61    int idx, tag, data, size, line, assoc, banks;
62    bool is_default = true;
63
64    clockRate = core_params.clockRate;
65    name = "Instruction Fetch Unit";
66    // Check if there is an icache child:
67    int i;
68    icache = NULL;
69    for( i = 0; i < xml_data->nChildNode("component"); i++ ) {
70        XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
71        XMLCSTR type = childXML->getAttribute("type");
72
73        if (!type)
74            warnMissingComponentType(childXML->getAttribute("id"));
75
76        STRCMP(type, "CacheUnit") {
77            XMLCSTR name = childXML->getAttribute("name");
78            if (strcmp(name, "Instruction Cache") == 0 ||
79                strcmp(name, "icache") == 0) {
80                icache = new CacheUnit(childXML, &interface_ip);
81                children.push_back(icache);
82            }
83        }
84    }
85
86    set_params_stats();
87
88    //Instruction buffer
89    data = core_params.instruction_length * core_params.peak_issueW;
90    line = int(ceil(data / BITS_PER_BYTE));
91    size = core_params.num_hthreads * core_params.instruction_buffer_size *
92        line;
93    if (size < MIN_BUFFER_SIZE) {
94        size = MIN_BUFFER_SIZE;
95    }
96
97    interface_ip.cache_sz = size;
98    interface_ip.line_sz = line;
99    interface_ip.assoc = core_params.instruction_buffer_assoc;
100    interface_ip.nbanks = core_params.instruction_buffer_nbanks;
101    interface_ip.out_w = line * BITS_PER_BYTE;
102    interface_ip.specific_tag = core_params.instruction_buffer_tag_width > 0;
103    interface_ip.tag_w = core_params.instruction_buffer_tag_width;
104    interface_ip.access_mode = Normal;
105    interface_ip.obj_func_dyn_energy = 0;
106    interface_ip.obj_func_dyn_power = 0;
107    interface_ip.obj_func_leak_power = 0;
108    interface_ip.obj_func_cycle_t = 1;
109    interface_ip.num_rw_ports =
110        core_params.number_instruction_fetch_ports;
111    interface_ip.num_rd_ports = 0;
112    interface_ip.num_wr_ports = 0;
113    interface_ip.num_se_rd_ports = 0;
114    interface_ip.num_search_ports = 0;
115    interface_ip.is_cache = false;
116    interface_ip.pure_ram = true;
117    interface_ip.pure_cam = false;
118    interface_ip.throughput = 1.0 / clockRate;
119    interface_ip.latency = 1.0 / clockRate;
120
121    IB = new ArrayST(xml_data, &interface_ip, "Instruction Buffer",
122                     Core_device, clockRate, core_params.opt_local,
123                     core_params.core_ty);
124    IB->area.set_area(IB->area.get_area() + IB->local_result.area);
125    area.set_area(area.get_area() + IB->local_result.area);
126
127    if (core_params.predictionW > 0) {
128        /*
129         * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged
130         * It is only a cache without all the buffers in the cache controller since it is more like a
131         * look up table than a cache with cache controller. When access miss, no load from other places
132         * such as main memory (not actively fill the misses), it is passively updated under two circumstances:
133         * 1)  when BPT@ID stage finds out current is a taken branch while BTB missed
134         * 2)  When BPT@ID stage predicts differently than BTB
135         * 3)  When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid)
136         * 4)  when EXEU find out wrong target has been provided from BTB.
137         *
138         */
139        size = inst_fetch_params.btb_size;
140        line = inst_fetch_params.btb_block_size;
141        assoc = inst_fetch_params.btb_assoc;
142        banks = inst_fetch_params.btb_num_banks;
143        idx = int(ceil(log2(size / line / assoc)));
144        tag = virtual_address_width + int(ceil(log2(core_params.num_hthreads)))
145            + EXTRA_TAG_BITS;
146
147        interface_ip.cache_sz = size;
148        interface_ip.line_sz = line;
149        interface_ip.assoc = assoc;
150        interface_ip.nbanks = banks;
151        interface_ip.out_w = line * BITS_PER_BYTE;
152        interface_ip.specific_tag = tag > 0;
153        interface_ip.tag_w = tag;
154        interface_ip.access_mode = Normal;
155        interface_ip.obj_func_dyn_energy = 0;
156        interface_ip.obj_func_dyn_power = 0;
157        interface_ip.obj_func_leak_power = 0;
158        interface_ip.obj_func_cycle_t = 1;
159        interface_ip.num_rw_ports = 1;
160        interface_ip.num_rd_ports = core_params.predictionW;
161        interface_ip.num_wr_ports = core_params.predictionW;
162        interface_ip.num_se_rd_ports = 0;
163        interface_ip.num_search_ports = 0;
164        interface_ip.is_cache = true;
165        interface_ip.pure_ram = false;
166        interface_ip.pure_cam = false;
167        interface_ip.throughput = inst_fetch_params.btb_throughput / clockRate;
168        interface_ip.latency = inst_fetch_params.btb_latency / clockRate;
169
170        BTB = new ArrayST(xml_data, &interface_ip, "Branch Target Buffer",
171                          Core_device, clockRate, core_params.opt_local,
172                          core_params.core_ty);
173        area.set_area(area.get_area() + BTB->local_result.area);
174
175        BPT = new BranchPredictor(xml_data, &interface_ip,
176                                  core_params, core_stats);
177        area.set_area(area.get_area() + BPT->area.get_area());
178    }
179
180    ID_inst = new InstructionDecoder(xml_data, "Instruction Opcode Decoder",
181                                     is_default, &interface_ip,
182                                     core_params.opcode_width,
183                                     core_params.decodeW,
184                                     core_params.x86, clockRate,
185                                     Core_device, core_params.core_ty);
186
187    ID_operand = new InstructionDecoder(xml_data,
188                                        "Instruction Operand Decoder",
189                                        is_default, &interface_ip,
190                                        core_params.arch_ireg_width,
191                                        core_params.decodeW,
192                                        core_params.x86, clockRate,
193                                        Core_device, core_params.core_ty);
194
195    ID_misc = new InstructionDecoder(xml_data, "Instruction Microcode Decoder",
196                                     is_default, &interface_ip,
197                                     core_params.micro_opcode_length,
198                                     core_params.decodeW,
199                                     core_params.x86, clockRate,
200                                     Core_device, core_params.core_ty);
201    area.set_area(area.get_area()+ (ID_inst->area.get_area()
202                                    + ID_operand->area.get_area()
203                                    + ID_misc->area.get_area())
204                  * core_params.decodeW);
205}
206
207void
208InstFetchU::set_params_stats() {
209    int num_children = xml_data->nChildNode("component");
210    int i;
211    memset(&inst_fetch_params,0,sizeof(InstFetchParameters));
212    for (i = 0; i < num_children; i++) {
213        XMLNode* child = xml_data->getChildNodePtr("component", &i);
214        XMLCSTR type = child->getAttribute("type");
215
216        if (!type)
217            warnMissingComponentType(child->getAttribute("id"));
218
219        STRCMP(type, "BranchTargetBuffer") {
220            int sub_num_children = child->nChildNode("param");
221            int j;
222            for (j = 0; j < sub_num_children; j++) {
223                XMLNode* paramNode = child->getChildNodePtr("param", &j);
224                XMLCSTR node_name = paramNode->getAttribute("name");
225                XMLCSTR value = paramNode->getAttribute("value");
226
227                if (!node_name)
228                    warnMissingParamName(paramNode->getAttribute("id"));
229
230                ASSIGN_INT_IF("size", inst_fetch_params.btb_size);
231                ASSIGN_INT_IF("block_size", inst_fetch_params.btb_block_size);
232                ASSIGN_INT_IF("assoc", inst_fetch_params.btb_assoc);
233                ASSIGN_INT_IF("num_banks", inst_fetch_params.btb_num_banks);
234                ASSIGN_INT_IF("latency", inst_fetch_params.btb_latency);
235                ASSIGN_INT_IF("throughput", inst_fetch_params.btb_throughput);
236                ASSIGN_INT_IF("rw_ports", inst_fetch_params.btb_rw_ports);
237
238                else {
239                    warnUnrecognizedParam(node_name);
240                }
241            }
242
243            sub_num_children = child->nChildNode("stat");
244            for (j = 0; j < sub_num_children; j++) {
245                XMLNode* statNode = child->getChildNodePtr("stat", &j);
246                XMLCSTR node_name = statNode->getAttribute("name");
247                XMLCSTR value = statNode->getAttribute("value");
248
249                if (!node_name)
250                    warnMissingStatName(statNode->getAttribute("id"));
251
252                ASSIGN_FP_IF("read_accesses",
253                             inst_fetch_stats.btb_read_accesses);
254                ASSIGN_FP_IF("write_accesses",
255                             inst_fetch_stats.btb_write_accesses);
256                else {
257                    warnUnrecognizedStat(node_name);
258                }
259            }
260        }
261    }
262
263    // Parameter sanity check
264    if (inst_fetch_params.btb_size <= 0) {
265        errorNonPositiveParam("size");
266    }
267
268    if (inst_fetch_params.btb_block_size <= 0) {
269        errorNonPositiveParam("block_size");
270    }
271
272    if (inst_fetch_params.btb_assoc <= 0) {
273        errorNonPositiveParam("assoc");
274    }
275
276    if (inst_fetch_params.btb_num_banks <= 0) {
277        errorNonPositiveParam("num_banks");
278    }
279}
280
281BranchPredictor::BranchPredictor(XMLNode* _xml_data,
282                                 InputParameter* interface_ip_,
283                                 const CoreParameters & _core_params,
284                                 const CoreStatistics & _core_stats,
285                                 bool exist_)
286    : McPATComponent(_xml_data), globalBPT(NULL), localBPT(NULL),
287      L1_localBPT(NULL), L2_localBPT(NULL), chooser(NULL), RAS(NULL),
288      interface_ip(*interface_ip_),
289      core_params(_core_params), core_stats(_core_stats), exist(exist_) {
290    if (!exist) return;
291    int tag;
292    int data;
293    int size;
294
295    clockRate = core_params.clockRate;
296    name = "Branch Predictor";
297
298    // Common interface parameters for the branch predictor structures
299    interface_ip.pure_cam = false;
300
301    if (core_params.multithreaded) {
302        tag = int(log2(core_params.num_hthreads) + EXTRA_TAG_BITS);
303        interface_ip.specific_tag = tag > 0;
304        interface_ip.tag_w = tag;
305        interface_ip.is_cache = true;
306        interface_ip.pure_ram = false;
307    } else {
308        interface_ip.specific_tag = 0;
309        interface_ip.tag_w = 0;
310        interface_ip.is_cache = false;
311        interface_ip.pure_ram = true;
312    }
313
314    // Parse params and stats from XML
315    set_params_stats();
316
317    // Common interface parameters for the branch predictor structures
318    interface_ip.assoc = branch_pred_params.assoc;
319    interface_ip.nbanks = branch_pred_params.nbanks;
320
321    //Global predictor
322    data = int(ceil(branch_pred_params.global_predictor_bits / BITS_PER_BYTE));
323    size = data * branch_pred_params.global_predictor_entries;
324
325    interface_ip.cache_sz = size;
326    interface_ip.line_sz = data;
327    interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
328    interface_ip.access_mode = Fast;
329    interface_ip.obj_func_dyn_energy = 0;
330    interface_ip.obj_func_dyn_power = 0;
331    interface_ip.obj_func_leak_power = 0;
332    interface_ip.obj_func_cycle_t = 1;
333    interface_ip.num_rw_ports = 0;
334    interface_ip.num_rd_ports = core_params.predictionW;
335    interface_ip.num_wr_ports = core_params.predictionW;
336    interface_ip.num_se_rd_ports = 0;
337    interface_ip.num_search_ports = 0;
338    interface_ip.throughput = 1.0 / clockRate;
339    interface_ip.latency = 1.0 / clockRate;
340    globalBPT = new ArrayST(xml_data, &interface_ip, "Global Predictor",
341                            Core_device, clockRate, core_params.opt_local,
342                            core_params.core_ty);
343    area.set_area(area.get_area() + globalBPT->local_result.area);
344
345    //Local BPT (Level 1)
346    data = int(ceil(branch_pred_params.local_l1_predictor_size /
347                    BITS_PER_BYTE));
348    size = data * branch_pred_params.local_predictor_entries;
349
350    interface_ip.cache_sz = size;
351    interface_ip.line_sz = data;
352    interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
353    interface_ip.access_mode = Fast;
354    interface_ip.obj_func_dyn_energy = 0;
355    interface_ip.obj_func_dyn_power = 0;
356    interface_ip.obj_func_leak_power = 0;
357    interface_ip.obj_func_cycle_t = 1;
358    interface_ip.num_rw_ports = 0;
359    interface_ip.num_rd_ports = core_params.predictionW;
360    interface_ip.num_wr_ports = core_params.predictionW;
361    interface_ip.num_se_rd_ports = 0;
362    interface_ip.num_search_ports = 0;
363    interface_ip.throughput = 1.0 / clockRate;
364    interface_ip.latency = 1.0 / clockRate;
365    L1_localBPT = new ArrayST(xml_data, &interface_ip,
366                              "Local Predictor, Level 1",
367                              Core_device, clockRate, core_params.opt_local,
368                              core_params.core_ty);
369    L1_localBPT->area.set_area(L1_localBPT->area.get_area() +
370                               L1_localBPT->local_result.area);
371    area.set_area(area.get_area()+ L1_localBPT->local_result.area);
372
373    //Local BPT (Level 2)
374    data = int(ceil(branch_pred_params.local_l2_predictor_size /
375                    BITS_PER_BYTE));
376    size = data * branch_pred_params.local_predictor_entries;
377
378    interface_ip.cache_sz = size;
379    interface_ip.line_sz = data;
380    interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
381    interface_ip.access_mode = Fast;
382    interface_ip.obj_func_dyn_energy = 0;
383    interface_ip.obj_func_dyn_power = 0;
384    interface_ip.obj_func_leak_power = 0;
385    interface_ip.obj_func_cycle_t = 1;
386    interface_ip.num_rw_ports = 0;
387    interface_ip.num_rd_ports = core_params.predictionW;
388    interface_ip.num_wr_ports = core_params.predictionW;
389    interface_ip.num_se_rd_ports = 0;
390    interface_ip.num_search_ports = 0;
391    interface_ip.throughput = 1.0 / clockRate;
392    interface_ip.latency = 1.0 / clockRate;
393    L2_localBPT = new ArrayST(xml_data, &interface_ip,
394                              "Local Predictor, Level 2",
395                              Core_device, clockRate, core_params.opt_local,
396                              core_params.core_ty);
397    area.set_area(area.get_area() + L2_localBPT->local_result.area);
398
399    //Chooser
400    data = int(ceil(branch_pred_params.chooser_predictor_bits /
401                    BITS_PER_BYTE));
402    size = data * branch_pred_params.chooser_predictor_entries;
403
404    interface_ip.cache_sz = size;
405    interface_ip.line_sz = data;
406    interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
407    interface_ip.access_mode = Fast;
408    interface_ip.obj_func_dyn_energy = 0;
409    interface_ip.obj_func_dyn_power = 0;
410    interface_ip.obj_func_leak_power = 0;
411    interface_ip.obj_func_cycle_t = 1;
412    interface_ip.num_rw_ports = 0;
413    interface_ip.num_rd_ports = core_params.predictionW;
414    interface_ip.num_wr_ports = core_params.predictionW;
415    interface_ip.num_se_rd_ports = 0;
416    interface_ip.num_search_ports = 0;
417    interface_ip.throughput = 1.0 / clockRate;
418    interface_ip.latency = 1.0 / clockRate;
419    chooser = new ArrayST(xml_data, &interface_ip, "Predictor Chooser",
420                          Core_device, clockRate, core_params.opt_local,
421                          core_params.core_ty);
422    area.set_area(area.get_area() + chooser->local_result.area);
423
424    //RAS return address stacks are Duplicated for each thread.
425    data = int(ceil(core_params.pc_width / BITS_PER_BYTE));
426    size = data * core_params.RAS_size;
427
428    interface_ip.cache_sz = size;
429    interface_ip.line_sz = data;
430    interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
431    interface_ip.access_mode = Fast;
432    interface_ip.obj_func_dyn_energy = 0;
433    interface_ip.obj_func_dyn_power = 0;
434    interface_ip.obj_func_leak_power = 0;
435    interface_ip.obj_func_cycle_t = 1;
436    interface_ip.num_rw_ports = 0;
437    interface_ip.num_rd_ports = core_params.predictionW;
438    interface_ip.num_wr_ports = core_params.predictionW;
439    interface_ip.num_se_rd_ports = 0;
440    interface_ip.num_search_ports = 0;
441    interface_ip.is_cache = false;
442    interface_ip.pure_ram = true;
443    interface_ip.throughput = 1.0 / clockRate;
444    interface_ip.latency = 1.0 / clockRate;
445    RAS = new ArrayST(xml_data, &interface_ip, "RAS", Core_device, clockRate,
446                      core_params.opt_local, core_params.core_ty);
447    RAS->output_data.area *= core_params.num_hthreads;
448    area.set_area(area.get_area() + RAS->local_result.area *
449                  core_params.num_hthreads);
450
451}
452
453void
454BranchPredictor::set_params_stats() {
455    int num_children = xml_data->nChildNode("component");
456    int i;
457    for (i = 0; i < num_children; i++) {
458        XMLNode* child = xml_data->getChildNodePtr("component", &i);
459        XMLCSTR type = child->getAttribute("type");
460
461        if (!type)
462            warnMissingComponentType(child->getAttribute("id"));
463
464        STRCMP(type, "BranchPredictor") {
465            int sub_num_children = child->nChildNode("param");
466            int j;
467            for (j = 0; j < sub_num_children; j++) {
468                XMLNode* paramNode = child->getChildNodePtr("param", &j);
469                XMLCSTR node_name = paramNode->getAttribute("name");
470                XMLCSTR value = paramNode->getAttribute("value");
471
472                if (!node_name)
473                    warnMissingParamName(paramNode->getAttribute("id"));
474
475                ASSIGN_INT_IF("assoc", branch_pred_params.assoc);
476                ASSIGN_INT_IF("nbanks", branch_pred_params.nbanks);
477                ASSIGN_INT_IF("local_l1_predictor_size",
478                              branch_pred_params.local_l1_predictor_size);
479                ASSIGN_INT_IF("local_l2_predictor_size",
480                              branch_pred_params.local_l2_predictor_size);
481                ASSIGN_INT_IF("local_predictor_entries",
482                              branch_pred_params.local_predictor_entries);
483                ASSIGN_INT_IF("global_predictor_entries",
484                              branch_pred_params.global_predictor_entries);
485                ASSIGN_INT_IF("global_predictor_bits",
486                              branch_pred_params.global_predictor_bits);
487                ASSIGN_INT_IF("chooser_predictor_entries",
488                              branch_pred_params.chooser_predictor_entries);
489                ASSIGN_INT_IF("chooser_predictor_bits",
490                              branch_pred_params.chooser_predictor_bits);
491
492                else {
493                    warnUnrecognizedParam(node_name);
494                }
495            }
496            // The core reads in the number of branches and the number of
497            // function calls and these values are passed through the
498            // core_stats variable, so we don't need to read them in here
499        }
500    }
501}
502
503SchedulerU::SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_,
504                       const CoreParameters & _core_params,
505                       const CoreStatistics & _core_stats, bool exist_)
506    : McPATComponent(_xml_data), int_inst_window(NULL),
507      fp_inst_window(NULL), ROB(NULL), int_instruction_selection(NULL),
508      fp_instruction_selection(NULL),
509      interface_ip(*interface_ip_),
510      core_params(_core_params), core_stats(_core_stats), exist(exist_) {
511    if (!exist) return;
512    int tag;
513    int data;
514    int size;
515    int line;
516    bool is_default = true;
517    string tmp_name;
518
519    clockRate = core_params.clockRate;
520    name = "Instruction Scheduler";
521    if ((core_params.core_ty == Inorder && core_params.multithreaded)) {
522        //Instruction issue queue, in-order multi-issue or multithreaded
523        //processor also has this structure. Unified window for Inorder
524        //processors
525        //This tag width is the normal thread state bits based on
526        //Niagara Design
527        tag = int(log2(core_params.num_hthreads) * core_params.perThreadState);
528        data = core_params.instruction_length;
529        line = int(ceil(data / BITS_PER_BYTE));
530        size = core_params.instruction_window_size * line;
531        if (size < MIN_BUFFER_SIZE) {
532            size = MIN_BUFFER_SIZE;
533        }
534
535        //NOTE: x86 inst can be very lengthy, up to 15B.
536        //Source: Intel® 64 and IA-32 Architectures
537        //Software Developer’s Manual
538        interface_ip.cache_sz = size;
539        interface_ip.line_sz = line;
540        interface_ip.assoc = core_params.scheduler_assoc;
541        interface_ip.nbanks = core_params.scheduler_nbanks;
542        interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
543        interface_ip.specific_tag = tag > 0;
544        interface_ip.tag_w = tag;
545        interface_ip.access_mode = Sequential;
546        interface_ip.obj_func_dyn_energy = 0;
547        interface_ip.obj_func_dyn_power = 0;
548        interface_ip.obj_func_leak_power = 0;
549        interface_ip.obj_func_cycle_t = 1;
550        interface_ip.num_rw_ports = 0;
551        interface_ip.num_rd_ports = core_params.peak_issueW;
552        interface_ip.num_wr_ports = core_params.peak_issueW;
553        interface_ip.num_se_rd_ports = 0;
554        interface_ip.num_search_ports = core_params.peak_issueW;
555        interface_ip.is_cache = true;
556        interface_ip.pure_cam = false;
557        interface_ip.pure_ram = false;
558        interface_ip.throughput = 1.0 / clockRate;
559        interface_ip.latency = 1.0 / clockRate;
560        int_inst_window = new ArrayST(xml_data, &interface_ip,
561                                      "InstFetchQueue", Core_device, clockRate,
562                                      core_params.opt_local,
563                                      core_params.core_ty);
564        int_inst_window->output_data.area *= core_params.num_pipelines;
565        area.set_area(area.get_area() + int_inst_window->local_result.area *
566                      core_params.num_pipelines);
567        Iw_height = int_inst_window->local_result.cache_ht;
568
569        /*
570         * selection logic
571         * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up
572         * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who
573         * at the issue stage.
574         */
575
576        int_instruction_selection =
577            new selection_logic(xml_data, is_default,
578                                core_params.instruction_window_size,
579                                core_params.peak_issueW *
580                                core_params.num_hthreads,
581                                &interface_ip,
582                                "Int Instruction Selection Logic",
583                                core_stats.inst_window_wakeup_accesses,
584                                clockRate, Core_device, core_params.core_ty);
585
586        if (core_params.fp_instruction_window_size > 0) {
587            fp_instruction_selection =
588                new selection_logic(xml_data, is_default,
589                                    core_params.fp_instruction_window_size,
590                                    core_params.fp_issueW *
591                                    core_params.num_hthreads,
592                                    &interface_ip,
593                                    "FP Instruction Selection Logic",
594                                    core_stats.fp_inst_window_wakeup_accesses,
595                                    clockRate, Core_device,
596                                    core_params.core_ty);
597        }
598    }
599
600    if (core_params.core_ty == OOO) {
601        /*
602         * CAM based instruction window
603         * For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored
604         * For RS based OOO it is the Reservation station, where both tags and values of phy regs are stored
605         * It is written once and read twice(two operands) before an instruction can be issued.
606         * X86 instruction can be very long up to 15B. add instruction length in XML
607         */
608        if (core_params.scheu_ty == PhysicalRegFile) {
609            tag = core_params.phy_ireg_width;
610            data = int((ceil((core_params.instruction_length +
611                              NUM_SOURCE_OPERANDS *
612                              (core_params.phy_ireg_width -
613                               core_params.arch_ireg_width)) /
614                             (double)NUM_SOURCE_OPERANDS) /
615                        BITS_PER_BYTE));
616            tmp_name = "Integer Instruction Window";
617        } else {
618            tag = core_params.phy_ireg_width;
619            data = int(ceil(((core_params.instruction_length +
620                              NUM_SOURCE_OPERANDS *
621                              (core_params.phy_ireg_width -
622                               core_params.arch_ireg_width) +
623                               2 * core_params.int_data_width) /
624                                (double)NUM_SOURCE_OPERANDS) /
625                            BITS_PER_BYTE));
626            tmp_name = "Integer Reservation Station";
627        }
628
629        size = data * core_params.instruction_window_size;
630
631        interface_ip.cache_sz = size;
632        interface_ip.line_sz = data;
633        interface_ip.assoc = core_params.scheduler_assoc;
634        interface_ip.nbanks = core_params.scheduler_nbanks;
635        interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
636        interface_ip.specific_tag = tag > 0;
637        interface_ip.tag_w = tag;
638        interface_ip.access_mode = Normal;
639        interface_ip.obj_func_dyn_energy = 0;
640        interface_ip.obj_func_dyn_power = 0;
641        interface_ip.obj_func_leak_power = 0;
642        interface_ip.obj_func_cycle_t = 1;
643        interface_ip.num_rw_ports = 0;
644        interface_ip.num_rd_ports = core_params.peak_issueW;
645        interface_ip.num_wr_ports = core_params.peak_issueW;
646        interface_ip.num_se_rd_ports = 0;
647        interface_ip.num_search_ports = core_params.peak_issueW;
648        interface_ip.is_cache = true;
649        interface_ip.pure_cam = false;
650        interface_ip.pure_ram = false;
651        interface_ip.throughput = NUM_SOURCE_OPERANDS * 1.0 / clockRate;
652        interface_ip.latency = NUM_SOURCE_OPERANDS * 1.0 / clockRate;
653        int_inst_window = new ArrayST(xml_data, &interface_ip, tmp_name,
654                                      Core_device, clockRate,
655                                      core_params.opt_local,
656                                      core_params.core_ty);
657        int_inst_window->output_data.area *= core_params.num_pipelines;
658        area.set_area(area.get_area() + int_inst_window->local_result.area *
659                      core_params.num_pipelines);
660        Iw_height = int_inst_window->local_result.cache_ht;
661
662        //FU inst window
663        if (core_params.scheu_ty == PhysicalRegFile) {
664            tag = NUM_SOURCE_OPERANDS * core_params.phy_freg_width;
665            data = int(ceil((core_params.instruction_length +
666                             NUM_SOURCE_OPERANDS *
667                             (core_params.phy_freg_width -
668                              core_params.arch_freg_width)) / BITS_PER_BYTE));
669            tmp_name = "FP Instruction Window";
670        } else {
671            tag = NUM_SOURCE_OPERANDS * core_params.phy_ireg_width;
672            data = int(ceil((core_params.instruction_length +
673                             NUM_SOURCE_OPERANDS *
674                             (core_params.phy_freg_width -
675                              core_params.arch_freg_width) +
676                             NUM_SOURCE_OPERANDS * core_params.fp_data_width) /
677                            BITS_PER_BYTE));
678            tmp_name = "FP Reservation Station";
679        }
680
681        size = data * core_params.fp_instruction_window_size;
682
683        interface_ip.cache_sz = size;
684        interface_ip.line_sz = data;
685        interface_ip.assoc = core_params.scheduler_assoc;
686        interface_ip.nbanks = core_params.scheduler_nbanks;
687        interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
688        interface_ip.specific_tag = tag > 0;
689        interface_ip.tag_w = tag;
690        interface_ip.access_mode = Normal;
691        interface_ip.obj_func_dyn_energy = 0;
692        interface_ip.obj_func_dyn_power = 0;
693        interface_ip.obj_func_leak_power = 0;
694        interface_ip.obj_func_cycle_t = 1;
695        interface_ip.num_rw_ports = 0;
696        interface_ip.num_rd_ports = core_params.fp_issueW;
697        interface_ip.num_wr_ports = core_params.fp_issueW;
698        interface_ip.num_se_rd_ports = 0;
699        interface_ip.num_search_ports = core_params.fp_issueW;
700        interface_ip.is_cache = true;
701        interface_ip.pure_cam = false;
702        interface_ip.pure_ram = false;
703        interface_ip.throughput = 1.0 / clockRate;
704        interface_ip.latency = 1.0 / clockRate;
705        fp_inst_window =
706            new ArrayST(xml_data, &interface_ip, tmp_name, Core_device,
707                        clockRate, core_params.opt_local, core_params.core_ty);
708        fp_inst_window->output_data.area *= core_params.num_fp_pipelines;
709        area.set_area(area.get_area() + fp_inst_window->local_result.area
710                      *core_params.num_fp_pipelines);
711        fp_Iw_height = fp_inst_window->local_result.cache_ht;
712
713        if (core_params.ROB_size > 0) {
714            /*
715             *  if ROB_size = 0, then the target processor does not support hardware-based
716             *  speculation, i.e. , the processor allow OOO issue as well as OOO completion, which
717             *  means branch must be resolved before instruction issued into instruction window, since
718             *  there is no change to flush miss-predict branch path after instructions are issued in this situation.
719             *
720             *  ROB.ROB size = inflight inst. ROB is unified for int and fp inst.
721             *  One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7.
722             *  However, this approach is abandoned due to its high power and poor scalablility.
723                         *      McPAT uses current implementation of ROB as circular buffer.
724                         *      ROB is written once when instruction is issued and read once when the instruction is committed.         *
725             */
726            int robExtra = int(ceil(ROB_STATUS_BITS +
727                                    log2(core_params.num_hthreads)));
728
729            if (core_params.scheu_ty == PhysicalRegFile) {
730                //PC is to id the instruction for recover exception.
731                //inst is used to map the renamed dest. registers. so that
732                //commit stage can know which reg/RRAT to update
733                data = int(ceil((robExtra + core_params.pc_width +
734                                 core_params.phy_ireg_width) / BITS_PER_BYTE));
735            } else {
736                //in RS based OOO, ROB also contains value of destination reg
737                data  = int(ceil((robExtra + core_params.pc_width +
738                                  core_params.phy_ireg_width +
739                                  core_params.fp_data_width) / BITS_PER_BYTE));
740            }
741
742            interface_ip.cache_sz = data * core_params.ROB_size;
743            interface_ip.line_sz = data;
744            interface_ip.assoc = core_params.ROB_assoc;
745            interface_ip.nbanks = core_params.ROB_nbanks;
746            interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
747            interface_ip.specific_tag = core_params.ROB_tag_width > 0;
748            interface_ip.tag_w = core_params.ROB_tag_width;
749            interface_ip.access_mode = Sequential;
750            interface_ip.obj_func_dyn_energy = 0;
751            interface_ip.obj_func_dyn_power = 0;
752            interface_ip.obj_func_leak_power = 0;
753            interface_ip.obj_func_cycle_t = 1;
754            interface_ip.num_rw_ports = 0;
755            interface_ip.num_rd_ports = core_params.peak_commitW;
756            interface_ip.num_wr_ports = core_params.peak_issueW;
757            interface_ip.num_se_rd_ports = 0;
758            interface_ip.num_search_ports    = 0;
759            interface_ip.is_cache = false;
760            interface_ip.pure_cam = false;
761            interface_ip.pure_ram = true;
762            interface_ip.throughput = 1.0 / clockRate;
763            interface_ip.latency = 1.0 / clockRate;
764            ROB = new ArrayST(xml_data, &interface_ip, "Reorder Buffer",
765                              Core_device, clockRate, core_params.opt_local,
766                              core_params.core_ty);
767            ROB->output_data.area *= core_params.num_pipelines;
768            area.set_area(area.get_area() + ROB->local_result.area *
769                          core_params.num_pipelines);
770            ROB_height = ROB->local_result.cache_ht;
771        }
772
773        int_instruction_selection =
774            new selection_logic(xml_data, is_default,
775                                core_params.instruction_window_size,
776                                core_params.peak_issueW, &interface_ip,
777                                "Int Instruction Selection Logic",
778                                core_stats.inst_window_wakeup_accesses,
779                                clockRate, Core_device, core_params.core_ty);
780
781        if (core_params.fp_instruction_window_size > 0) {
782            fp_instruction_selection =
783                new selection_logic(xml_data, is_default,
784                                    core_params.fp_instruction_window_size,
785                                    core_params.fp_issueW, &interface_ip,
786                                    "FP Instruction Selection Logic",
787                                    core_stats.fp_inst_window_wakeup_accesses,
788                                    clockRate, Core_device,
789                                    core_params.core_ty);
790        }
791
792    }
793}
794
795LoadStoreU::LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_,
796                       const CoreParameters & _core_params,
797                       const CoreStatistics & _core_stats, bool exist_)
798    : McPATComponent(_xml_data), dcache(NULL), LSQ(NULL), LoadQ(NULL),
799      interface_ip(*interface_ip_),
800      core_params(_core_params), core_stats(_core_stats), exist(exist_) {
801    if (!exist) return;
802    int  tag;
803    int line;
804    int size;
805    int ldst_opcode = core_params.opcode_width;
806
807    clockRate = core_params.clockRate;
808    name = "Load/Store Unit";
809
810    // Check if there is a dcache child:
811    int i;
812    dcache = NULL;
813    for( i = 0; i < xml_data->nChildNode("component"); i++ ) {
814        XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
815        XMLCSTR type = childXML->getAttribute("type");
816
817        if (!type)
818            warnMissingComponentType(childXML->getAttribute("id"));
819
820        STRCMP(type, "CacheUnit") {
821            XMLCSTR name = childXML->getAttribute("name");
822            if (strcmp(name, "Data Cache") == 0 ||
823                strcmp(name, "dcache") == 0) {
824                dcache = new CacheUnit(childXML, &interface_ip);
825                children.push_back(dcache);
826            }
827        }
828    }
829
830    /*
831     * LSU--in-order processors do not have separate load queue: unified lsq
832     * partitioned among threads
833     * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
834     */
835    tag = ldst_opcode + virtual_address_width +
836        int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
837    line = int(ceil(data_path_width / BITS_PER_BYTE));
838    size = core_params.store_buffer_size * line * core_params.num_hthreads;
839
840    interface_ip.cache_sz = size;
841    interface_ip.line_sz = line;
842    interface_ip.assoc = core_params.store_buffer_assoc;
843    interface_ip.nbanks = core_params.store_buffer_nbanks;
844    interface_ip.out_w = line * BITS_PER_BYTE;
845    interface_ip.specific_tag = tag > 0;
846    interface_ip.tag_w = tag;
847    interface_ip.access_mode = Sequential;
848    interface_ip.obj_func_dyn_energy = 0;
849    interface_ip.obj_func_dyn_power = 0;
850    interface_ip.obj_func_leak_power = 0;
851    interface_ip.obj_func_cycle_t = 1;
852    interface_ip.num_rw_ports = 0;
853    interface_ip.num_rd_ports = core_params.memory_ports;
854    interface_ip.num_wr_ports = core_params.memory_ports;
855    interface_ip.num_se_rd_ports = 0;
856    interface_ip.num_search_ports = core_params.memory_ports;
857    interface_ip.is_cache = true;
858    interface_ip.pure_ram = false;
859    interface_ip.pure_cam = false;
860    interface_ip.throughput = 1.0 / clockRate;
861    interface_ip.latency = 1.0 / clockRate;
862    LSQ = new ArrayST(xml_data, &interface_ip, "Store Queue", Core_device,
863                      clockRate, core_params.opt_local, core_params.core_ty);
864    area.set_area(area.get_area() + LSQ->local_result.area);
865    area.set_area(area.get_area()*cdb_overhead);
866    lsq_height = LSQ->local_result.cache_ht * sqrt(cdb_overhead);
867
868    if ((core_params.core_ty == OOO) && (core_params.load_buffer_size > 0)) {
869        tag = ldst_opcode + virtual_address_width +
870            int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
871        line = int(ceil(data_path_width / BITS_PER_BYTE));
872        size = core_params.load_buffer_size * line * core_params.num_hthreads;
873
874        interface_ip.cache_sz = size;
875        interface_ip.line_sz = line;
876        interface_ip.assoc = core_params.load_buffer_assoc;
877        interface_ip.nbanks = core_params.load_buffer_nbanks;
878        interface_ip.out_w = line * BITS_PER_BYTE;
879        interface_ip.specific_tag = tag > 0;
880        interface_ip.tag_w = tag;
881        interface_ip.access_mode = Sequential;
882        interface_ip.obj_func_dyn_energy = 0;
883        interface_ip.obj_func_dyn_power = 0;
884        interface_ip.obj_func_leak_power = 0;
885        interface_ip.obj_func_cycle_t = 1;
886        interface_ip.num_rw_ports = 0;
887        interface_ip.num_rd_ports = core_params.memory_ports;
888        interface_ip.num_wr_ports = core_params.memory_ports;
889        interface_ip.num_se_rd_ports = 0;
890        interface_ip.num_search_ports = core_params.memory_ports;
891        interface_ip.is_cache = true;
892        interface_ip.pure_ram = false;
893        interface_ip.pure_cam = false;
894        interface_ip.throughput = 1.0 / clockRate;
895        interface_ip.latency = 1.0 / clockRate;
896        LoadQ = new ArrayST(xml_data, &interface_ip, "Load Queue", Core_device,
897                            clockRate, core_params.opt_local,
898                            core_params.core_ty);
899        LoadQ->area.set_area(LoadQ->area.get_area() +
900                             LoadQ->local_result.area);
901        area.set_area(area.get_area()*cdb_overhead);
902        lsq_height = (LSQ->local_result.cache_ht +
903                      LoadQ->local_result.cache_ht) * sqrt(cdb_overhead);
904    }
905
906}
907
908MemManU::MemManU(XMLNode* _xml_data, InputParameter* interface_ip_,
909                 const CoreParameters & _core_params,
910                 const CoreStatistics & _core_stats, bool exist_)
911    : McPATComponent(_xml_data), itlb(NULL), dtlb(NULL),
912      interface_ip(*interface_ip_),
913      core_params(_core_params), core_stats(_core_stats), exist(exist_) {
914    if (!exist) return;
915    int tag;
916    int data;
917    int line;
918
919    clockRate = core_params.clockRate;
920    name = "Memory Management Unit";
921
922    set_params_stats();
923
924    // These are shared between ITLB and DTLB
925    interface_ip.is_cache            = true;
926    interface_ip.pure_cam            = false;
927    interface_ip.pure_ram            = false;
928    //Itlb TLBs are partioned among threads according to Nigara and Nehalem
929    tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) +
930        int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
931    data = physical_address_width - int(floor(log2(virtual_memory_page_size)));
932    line = int(ceil(data / BITS_PER_BYTE));
933
934    interface_ip.cache_sz = mem_man_params.itlb_number_entries * line;
935    interface_ip.line_sz = line;
936    interface_ip.assoc = mem_man_params.itlb_assoc;
937    interface_ip.nbanks = mem_man_params.itlb_nbanks;
938    interface_ip.out_w = line * BITS_PER_BYTE;
939    interface_ip.specific_tag = tag > 0;
940    interface_ip.tag_w = tag;
941    interface_ip.access_mode = Normal;
942    interface_ip.obj_func_dyn_energy = 0;
943    interface_ip.obj_func_dyn_power = 0;
944    interface_ip.obj_func_leak_power = 0;
945    interface_ip.obj_func_cycle_t = 1;
946    interface_ip.num_rw_ports = core_params.number_instruction_fetch_ports;
947    interface_ip.num_rd_ports = 0;
948    interface_ip.num_wr_ports = 0;
949    interface_ip.num_se_rd_ports = 0;
950    interface_ip.num_search_ports = core_params.number_instruction_fetch_ports;
951    interface_ip.throughput = mem_man_params.itlb_throughput / clockRate;
952    interface_ip.latency = mem_man_params.itlb_latency / clockRate;
953    itlb = new ArrayST(xml_data, &interface_ip, "Instruction TLB", Core_device,
954                       clockRate, core_params.opt_local, core_params.core_ty);
955    area.set_area(area.get_area() + itlb->local_result.area);
956
957    //dtlb
958    tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) +
959        int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
960    data = physical_address_width - int(floor(log2(virtual_memory_page_size)));
961    line = int(ceil(data / BITS_PER_BYTE));
962
963    interface_ip.cache_sz = mem_man_params.dtlb_number_entries * line;
964    interface_ip.line_sz = line;
965    interface_ip.assoc = mem_man_params.dtlb_assoc;
966    interface_ip.nbanks = mem_man_params.dtlb_nbanks;
967    interface_ip.out_w = line * BITS_PER_BYTE;
968    interface_ip.specific_tag = tag > 0;
969    interface_ip.tag_w = tag;
970    interface_ip.access_mode = Normal;
971    interface_ip.obj_func_dyn_energy = 0;
972    interface_ip.obj_func_dyn_power = 0;
973    interface_ip.obj_func_leak_power = 0;
974    interface_ip.obj_func_cycle_t = 1;
975    interface_ip.num_rw_ports = 0;
976    interface_ip.num_rd_ports = core_params.memory_ports;
977    interface_ip.num_wr_ports = core_params.memory_ports;
978    interface_ip.num_se_rd_ports = 0;
979    interface_ip.num_search_ports = core_params.memory_ports;
980    interface_ip.throughput = mem_man_params.dtlb_throughput / clockRate;
981    interface_ip.latency = mem_man_params.dtlb_latency / clockRate;
982    dtlb = new ArrayST(xml_data, &interface_ip, "Data TLB", Core_device,
983                       clockRate, core_params.opt_local, core_params.core_ty);
984    area.set_area(area.get_area() + dtlb->local_result.area);
985
986}
987
988void
989MemManU::set_params_stats() {
990    memset(&mem_man_params, 0, sizeof(MemoryManagementParams));
991    memset(&mem_man_stats, 0, sizeof(MemoryManagementStats));
992    int num_children = xml_data->nChildNode("component");
993    int i;
994    for (i = 0; i < num_children; i++) {
995        XMLNode* child = xml_data->getChildNodePtr("component", &i);
996        XMLCSTR type = child->getAttribute("type");
997
998        if (!type)
999            warnMissingComponentType(child->getAttribute("id"));
1000
1001        STRCMP(type, "InstructionTLB") {
1002            int sub_num_children = child->nChildNode("param");
1003            int j;
1004            for (j = 0; j < sub_num_children; j++) {
1005                XMLNode* paramNode = child->getChildNodePtr("param", &j);
1006                XMLCSTR node_name = paramNode->getAttribute("name");
1007                XMLCSTR value = paramNode->getAttribute("value");
1008
1009                if (!node_name)
1010                    warnMissingParamName(paramNode->getAttribute("id"));
1011
1012                ASSIGN_INT_IF("number_entries",
1013                              mem_man_params.itlb_number_entries);
1014                ASSIGN_FP_IF("latency", mem_man_params.itlb_latency);
1015                ASSIGN_FP_IF("throughput", mem_man_params.itlb_throughput);
1016                ASSIGN_FP_IF("assoc", mem_man_params.itlb_assoc);
1017                ASSIGN_FP_IF("nbanks", mem_man_params.itlb_nbanks);
1018
1019                else {
1020                    warnUnrecognizedParam(node_name);
1021                }
1022            }
1023            sub_num_children = child->nChildNode("stat");
1024            for (j = 0; j < sub_num_children; j++) {
1025                XMLNode* statNode = child->getChildNodePtr("stat", &j);
1026                XMLCSTR node_name = statNode->getAttribute("name");
1027                XMLCSTR value = statNode->getAttribute("value");
1028
1029                if (!node_name)
1030                    warnMissingStatName(statNode->getAttribute("id"));
1031
1032                ASSIGN_FP_IF("total_accesses",
1033                             mem_man_stats.itlb_total_accesses);
1034                ASSIGN_FP_IF("total_misses", mem_man_stats.itlb_total_misses);
1035                ASSIGN_FP_IF("conflicts", mem_man_stats.itlb_conflicts);
1036
1037                else {
1038                    warnUnrecognizedStat(node_name);
1039                }
1040            }
1041        } STRCMP(type, "DataTLB") {
1042            int sub_num_children = child->nChildNode("param");
1043            int j;
1044            for (j = 0; j < sub_num_children; j++) {
1045                XMLNode* paramNode = child->getChildNodePtr("param", &j);
1046                XMLCSTR node_name = paramNode->getAttribute("name");
1047                XMLCSTR value = paramNode->getAttribute("value");
1048
1049                if (!node_name)
1050                    warnMissingParamName(paramNode->getAttribute("id"));
1051
1052                ASSIGN_INT_IF("number_entries",
1053                              mem_man_params.dtlb_number_entries);
1054                ASSIGN_FP_IF("latency", mem_man_params.dtlb_latency);
1055                ASSIGN_FP_IF("throughput", mem_man_params.dtlb_throughput);
1056                ASSIGN_FP_IF("assoc", mem_man_params.dtlb_assoc);
1057                ASSIGN_FP_IF("nbanks", mem_man_params.dtlb_nbanks);
1058
1059                else {
1060                    warnUnrecognizedParam(node_name);
1061                }
1062            }
1063            sub_num_children = child->nChildNode("stat");
1064            for (j = 0; j < sub_num_children; j++) {
1065                XMLNode* statNode = child->getChildNodePtr("stat", &j);
1066                XMLCSTR node_name = statNode->getAttribute("name");
1067                XMLCSTR value = statNode->getAttribute("value");
1068
1069                if (!node_name)
1070                    warnMissingStatName(statNode->getAttribute("id"));
1071
1072                ASSIGN_FP_IF("read_accesses",
1073                             mem_man_stats.dtlb_read_accesses);
1074                ASSIGN_FP_IF("read_misses", mem_man_stats.dtlb_read_misses);
1075                ASSIGN_FP_IF("write_accesses",
1076                             mem_man_stats.dtlb_write_accesses);
1077                ASSIGN_FP_IF("write_misses", mem_man_stats.dtlb_write_misses);
1078                ASSIGN_FP_IF("conflicts", mem_man_stats.dtlb_conflicts);
1079
1080                else {
1081                    warnUnrecognizedStat(node_name);
1082                }
1083            }
1084        }
1085    }
1086}
1087
1088RegFU::RegFU(XMLNode* _xml_data, InputParameter* interface_ip_,
1089             const CoreParameters & _core_params,
1090             const CoreStatistics & _core_stats, bool exist_)
1091        : McPATComponent(_xml_data), IRF(NULL), FRF(NULL), RFWIN(NULL),
1092          interface_ip(*interface_ip_),
1093          core_params(_core_params), core_stats(_core_stats), exist(exist_) {
1094    /*
1095     * processors have separate architectural register files for each thread.
1096     * therefore, the bypass buses need to travel across all the register files.
1097     */
1098    if (!exist) return;
1099    int data;
1100    int line;
1101
1102    clockRate = core_params.clockRate;
1103    name = "Register File Unit";
1104
1105    //**********************************IRF************************************
1106    data = core_params.int_data_width;
1107    line = int(ceil(data / BITS_PER_BYTE));
1108
1109    interface_ip.cache_sz = core_params.num_IRF_entry * line;
1110    interface_ip.line_sz = line;
1111    interface_ip.assoc = core_params.phy_Regs_IRF_assoc;
1112    interface_ip.nbanks = core_params.phy_Regs_IRF_nbanks;
1113    interface_ip.out_w = line * BITS_PER_BYTE;
1114    interface_ip.specific_tag = core_params.phy_Regs_IRF_tag_width > 0;
1115    interface_ip.tag_w = core_params.phy_Regs_IRF_tag_width;
1116    interface_ip.access_mode = Sequential;
1117    interface_ip.obj_func_dyn_energy = 0;
1118    interface_ip.obj_func_dyn_power = 0;
1119    interface_ip.obj_func_leak_power = 0;
1120    interface_ip.obj_func_cycle_t = 1;
1121    interface_ip.num_rw_ports = 0;
1122    interface_ip.num_rd_ports = core_params.phy_Regs_IRF_rd_ports;
1123    interface_ip.num_wr_ports = core_params.phy_Regs_IRF_wr_ports;
1124    interface_ip.num_se_rd_ports = 0;
1125    interface_ip.num_search_ports = 0;
1126    interface_ip.is_cache = false;
1127    interface_ip.pure_cam = false;
1128    interface_ip.pure_ram = true;
1129    interface_ip.throughput = 1.0 / clockRate;
1130    interface_ip.latency = 1.0 / clockRate;
1131    IRF = new ArrayST(xml_data, &interface_ip, "Integer Register File",
1132                      Core_device, clockRate, core_params.opt_local,
1133                      core_params.core_ty);
1134    IRF->output_data.area *= core_params.num_hthreads *
1135        core_params.num_pipelines * cdb_overhead;
1136    area.set_area(area.get_area() + IRF->local_result.area *
1137                  core_params.num_hthreads * core_params.num_pipelines *
1138                  cdb_overhead);
1139
1140    //**********************************FRF************************************
1141    data = core_params.fp_data_width;
1142    line = int(ceil(data / BITS_PER_BYTE));
1143
1144    interface_ip.cache_sz = core_params.num_FRF_entry * line;
1145    interface_ip.line_sz = line;
1146    interface_ip.assoc = core_params.phy_Regs_FRF_assoc;
1147    interface_ip.nbanks = core_params.phy_Regs_FRF_nbanks;
1148    interface_ip.out_w = line * BITS_PER_BYTE;
1149    interface_ip.specific_tag = core_params.phy_Regs_FRF_tag_width > 0;
1150    interface_ip.tag_w = core_params.phy_Regs_FRF_tag_width;
1151    interface_ip.access_mode = Sequential;
1152    interface_ip.obj_func_dyn_energy = 0;
1153    interface_ip.obj_func_dyn_power = 0;
1154    interface_ip.obj_func_leak_power = 0;
1155    interface_ip.obj_func_cycle_t = 1;
1156    interface_ip.num_rw_ports = 0;
1157    interface_ip.num_rd_ports = core_params.phy_Regs_FRF_rd_ports;
1158    interface_ip.num_wr_ports = core_params.phy_Regs_FRF_wr_ports;
1159    interface_ip.num_se_rd_ports = 0;
1160    interface_ip.num_search_ports = 0;
1161    interface_ip.is_cache = false;
1162    interface_ip.pure_cam = false;
1163    interface_ip.pure_ram = true;
1164    interface_ip.throughput = 1.0 / clockRate;
1165    interface_ip.latency = 1.0 / clockRate;
1166    FRF = new ArrayST(xml_data, &interface_ip, "FP Register File", Core_device,
1167                      clockRate, core_params.opt_local, core_params.core_ty);
1168    FRF->output_data.area *= core_params.num_hthreads *
1169        core_params.num_fp_pipelines * cdb_overhead;
1170    area.set_area(area.get_area() + FRF->local_result.area *
1171                  core_params.num_hthreads * core_params.num_fp_pipelines *
1172                  cdb_overhead);
1173    int_regfile_height = IRF->local_result.cache_ht *
1174        core_params.num_hthreads * sqrt(cdb_overhead);
1175    fp_regfile_height = FRF->local_result.cache_ht * core_params.num_hthreads *
1176        sqrt(cdb_overhead);
1177    //since a EXU is associated with each pipeline, the cdb should not have
1178    //longer length.
1179
1180    if (core_params.regWindowing) {
1181        //*********************************REG_WIN*****************************
1182        //ECC, and usually 2 regs are transfered together during window
1183        //shifting.Niagara Mega cell
1184        data = core_params.int_data_width;
1185        line = int(ceil(data / BITS_PER_BYTE));
1186
1187        interface_ip.cache_sz = core_params.register_window_size *
1188            IRF->l_ip.cache_sz * core_params.num_hthreads;
1189        interface_ip.line_sz = line;
1190        interface_ip.assoc = core_params.register_window_assoc;
1191        interface_ip.nbanks = core_params.register_window_nbanks;
1192        interface_ip.out_w = line * BITS_PER_BYTE;
1193        interface_ip.specific_tag = core_params.register_window_tag_width > 0;
1194        interface_ip.tag_w = core_params.register_window_tag_width;
1195        interface_ip.access_mode = Sequential;
1196        interface_ip.obj_func_dyn_energy = 0;
1197        interface_ip.obj_func_dyn_power = 0;
1198        interface_ip.obj_func_leak_power = 0;
1199        interface_ip.obj_func_cycle_t = 1;
1200        interface_ip.num_rw_ports = core_params.register_window_rw_ports;
1201        interface_ip.num_rd_ports = 0;
1202        interface_ip.num_wr_ports = 0;
1203        interface_ip.num_se_rd_ports = 0;
1204        interface_ip.num_search_ports = 0;
1205        interface_ip.is_cache = false;
1206        interface_ip.pure_cam = false;
1207        interface_ip.pure_ram = true;
1208        interface_ip.throughput =
1209            core_params.register_window_throughput / clockRate;
1210        interface_ip.latency =
1211            core_params.register_window_latency / clockRate;
1212        RFWIN = new ArrayST(xml_data, &interface_ip, "RegWindow", Core_device,
1213                            clockRate, core_params.opt_local,
1214                            core_params.core_ty);
1215        RFWIN->output_data.area *= core_params.num_pipelines;
1216        area.set_area(area.get_area() + RFWIN->local_result.area *
1217                      core_params.num_pipelines);
1218    }
1219}
1220
1221EXECU::EXECU(XMLNode* _xml_data,
1222             InputParameter* interface_ip_, double lsq_height_,
1223             const CoreParameters & _core_params,
1224             const CoreStatistics & _core_stats, bool exist_)
1225    : McPATComponent(_xml_data), rfu(NULL), scheu(NULL), fp_u(NULL),
1226      exeu(NULL), mul(NULL), int_bypass(NULL), intTagBypass(NULL),
1227      int_mul_bypass(NULL), intTag_mul_Bypass(NULL), fp_bypass(NULL),
1228      fpTagBypass(NULL), interface_ip(*interface_ip_),
1229      lsq_height(lsq_height_), core_params(_core_params),
1230      core_stats(_core_stats), exist(exist_) {
1231    if (!exist) return;
1232    double fu_height = 0.0;
1233    clockRate = core_params.clockRate;
1234    name = "Execution Unit";
1235    rfu = new RegFU(xml_data, &interface_ip, core_params, core_stats);
1236    if (core_params.core_ty == OOO ||
1237        (core_params.core_ty == Inorder && core_params.multithreaded)) {
1238        scheu = new SchedulerU(xml_data, &interface_ip, core_params,
1239                               core_stats);
1240        area.set_area(area.get_area() + scheu->area.get_area() );
1241    }
1242    exeu  = new FunctionalUnit(xml_data, &interface_ip, core_params,
1243                               core_stats, ALU);
1244    area.set_area(area.get_area() + exeu->area.get_area() +
1245                  rfu->area.get_area());
1246    fu_height = exeu->FU_height;
1247    if (core_params.num_fpus > 0) {
1248        fp_u  = new FunctionalUnit(xml_data, &interface_ip,
1249                                   core_params, core_stats, FPU);
1250        area.set_area(area.get_area() + fp_u->area.get_area());
1251    }
1252    if (core_params.num_muls > 0) {
1253        mul   = new FunctionalUnit(xml_data, &interface_ip,
1254                                   core_params, core_stats, MUL);
1255        area.set_area(area.get_area() + mul->area.get_area());
1256        fu_height +=  mul->FU_height;
1257    }
1258    /*
1259     * broadcast logic, including int-broadcast; int_tag-broadcast;
1260     * fp-broadcast; fp_tag-broadcast
1261     * integer by pass has two paths and fp has 3 paths.
1262     * on the same bus there are multiple tri-state drivers and muxes that go
1263     * to different components on the same bus
1264     */
1265    interface_ip.wt = core_params.execu_broadcast_wt;
1266    interface_ip.wire_is_mat_type = core_params.execu_wire_mat_type;
1267    interface_ip.wire_os_mat_type = core_params.execu_wire_mat_type;
1268    interface_ip.throughput = core_params.broadcast_numerator / clockRate;
1269    interface_ip.latency = core_params.broadcast_numerator / clockRate;
1270    double scheu_Iw_height = 0.0;
1271    double scheu_ROB_height = 0.0;
1272    double scheu_fp_Iw_height = 0.0;
1273    if (scheu) {
1274        scheu_Iw_height = scheu->Iw_height;
1275        scheu_ROB_height = scheu->ROB_height;
1276        scheu_fp_Iw_height = scheu->fp_Iw_height;
1277    }
1278
1279    // Common bypass logic parameters
1280    double base_w = core_params.execu_bypass_base_width;
1281    double base_h = core_params.execu_bypass_base_height;
1282    int level = core_params.execu_bypass_start_wiring_level;
1283    double route_over_perc = core_params.execu_bypass_route_over_perc;
1284    Wire_type wire_type = core_params.execu_bypass_wire_type;
1285    int data_w;
1286    double len;
1287
1288    if (core_params.core_ty == Inorder) {
1289        data_w = int(ceil(data_path_width / 32.0)*32);
1290        len = rfu->int_regfile_height + exeu->FU_height + lsq_height;
1291        int_bypass = new Interconnect(xml_data, "Int Bypass Data", Core_device,
1292                                      base_w, base_h, data_w, len,
1293                                      &interface_ip, level, clockRate, false,
1294                                      route_over_perc, core_params.opt_local,
1295                                      core_params.core_ty, wire_type);
1296
1297        data_w = core_params.perThreadState;
1298        len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
1299            scheu_Iw_height;
1300        intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
1301                                        Core_device,
1302                                        base_w, base_h, data_w, len,
1303                                        &interface_ip, level, clockRate, false,
1304                                        route_over_perc, core_params.opt_local,
1305                                        core_params.core_ty, wire_type);
1306
1307        if (core_params.num_muls > 0) {
1308            data_w = int(ceil(data_path_width / 32.0)*32*1.5);
1309            len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height +
1310                lsq_height;
1311            int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
1312                                              Core_device, base_w, base_h,
1313                                              data_w, len, &interface_ip,
1314                                              level, clockRate, false,
1315                                              route_over_perc,
1316                                              core_params.opt_local,
1317                                              core_params.core_ty, wire_type);
1318
1319            data_w = core_params.perThreadState;
1320            len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height +
1321                lsq_height + scheu_Iw_height;
1322            intTag_mul_Bypass = new Interconnect(xml_data, "Mul Bypass Tag",
1323                                                 Core_device, base_w, base_h,
1324                                                 data_w, len, &interface_ip,
1325                                                 level, clockRate, false,
1326                                                 route_over_perc,
1327                                                 core_params.opt_local,
1328                                                 core_params.core_ty,
1329                                                 wire_type);
1330        }
1331
1332        if (core_params.num_fpus > 0) {
1333            data_w = int(ceil(data_path_width / 32.0)*32*1.5);
1334            len = rfu->fp_regfile_height + fp_u->FU_height;
1335            fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
1336                                         Core_device,
1337                                         base_w, base_h, data_w, len,
1338                                         &interface_ip, level, clockRate,
1339                                         false, route_over_perc,
1340                                         core_params.opt_local,
1341                                         core_params.core_ty, wire_type);
1342
1343            data_w = core_params.perThreadState;
1344            len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
1345                scheu_Iw_height;
1346            fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
1347                                           Core_device, base_w, base_h, data_w,
1348                                           len, &interface_ip, level,
1349                                           clockRate, false, route_over_perc,
1350                                           core_params.opt_local,
1351                                           core_params.core_ty, wire_type);
1352        }
1353    } else {//OOO
1354        if (core_params.scheu_ty == PhysicalRegFile) {
1355            /* For physical register based OOO,
1356             * data broadcast interconnects cover across functional units, lsq,
1357             * inst windows and register files,
1358             * while tag broadcast interconnects also cover across ROB
1359             */
1360            data_w = int(ceil(core_params.int_data_width));
1361            len = rfu->int_regfile_height + exeu->FU_height + lsq_height;
1362            int_bypass = new Interconnect(xml_data, "Int Bypass Data",
1363                                          Core_device, base_w, base_h, data_w,
1364                                          len, &interface_ip, level, clockRate,
1365                                          false, route_over_perc,
1366                                          core_params.opt_local,
1367                                          core_params.core_ty, wire_type);
1368
1369            data_w = core_params.phy_ireg_width;
1370            len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
1371                scheu_Iw_height + scheu_ROB_height;
1372            intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
1373                                            Core_device, base_w, base_h,
1374                                            data_w, len, &interface_ip, level,
1375                                            clockRate, false, route_over_perc,
1376                                            core_params.opt_local,
1377                                            core_params.core_ty, wire_type);
1378
1379            if (core_params.num_muls > 0) {
1380                data_w = int(ceil(core_params.int_data_width));
1381                len = rfu->int_regfile_height + exeu->FU_height +
1382                    mul->FU_height + lsq_height;
1383                int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
1384                                                  Core_device, base_w, base_h,
1385                                                  data_w, len, &interface_ip,
1386                                                  level, clockRate, false,
1387                                                  route_over_perc,
1388                                                  core_params.opt_local,
1389                                                  core_params.core_ty,
1390                                                  wire_type);
1391
1392                data_w = core_params.phy_ireg_width;
1393                len = rfu->int_regfile_height + exeu->FU_height +
1394                    mul->FU_height + lsq_height + scheu_Iw_height +
1395                    scheu_ROB_height;
1396                intTag_mul_Bypass = new Interconnect(xml_data,
1397                                                     "Mul Bypass Tag",
1398                                                     Core_device, base_w,
1399                                                     base_h, data_w, len,
1400                                                     &interface_ip, level,
1401                                                     clockRate, false,
1402                                                     route_over_perc,
1403                                                     core_params.opt_local,
1404                                                     core_params.core_ty,
1405                                                     wire_type);
1406            }
1407
1408            if (core_params.num_fpus > 0) {
1409                data_w = int(ceil(core_params.fp_data_width));
1410                len = rfu->fp_regfile_height + fp_u->FU_height;
1411                fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
1412                                             Core_device, base_w, base_h,
1413                                             data_w, len, &interface_ip, level,
1414                                             clockRate, false, route_over_perc,
1415                                             core_params.opt_local,
1416                                             core_params.core_ty, wire_type);
1417
1418                data_w = core_params.phy_freg_width;
1419                len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
1420                    scheu_fp_Iw_height + scheu_ROB_height;
1421                fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
1422                                               Core_device, base_w, base_h,
1423                                               data_w, len, &interface_ip,
1424                                               level, clockRate, false,
1425                                               route_over_perc,
1426                                               core_params.opt_local,
1427                                               core_params.core_ty, wire_type);
1428            }
1429        } else {
1430            /*
1431             * In RS based processor both data and tag are broadcast together,
1432             * covering functional units, lsq, nst windows, register files, and ROBs
1433             */
1434            data_w = int(ceil(core_params.int_data_width));
1435            len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
1436                scheu_Iw_height + scheu_ROB_height;
1437            int_bypass = new Interconnect(xml_data, "Int Bypass Data",
1438                                          Core_device, base_w, base_h, data_w,
1439                                          len, &interface_ip, level, clockRate,
1440                                          false, route_over_perc,
1441                                          core_params.opt_local,
1442                                          core_params.core_ty, wire_type);
1443
1444            data_w = core_params.phy_ireg_width;
1445            len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
1446                scheu_Iw_height + scheu_ROB_height;
1447            intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
1448                                            Core_device, base_w, base_h,
1449                                            data_w, len, &interface_ip, level,
1450                                            clockRate, false, route_over_perc,
1451                                            core_params.opt_local,
1452                                            core_params.core_ty, wire_type);
1453            if (core_params.num_muls > 0) {
1454                data_w = int(ceil(core_params.int_data_width));
1455                len = rfu->int_regfile_height + exeu->FU_height +
1456                    mul->FU_height + lsq_height + scheu_Iw_height +
1457                    scheu_ROB_height;
1458                int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
1459                                                  Core_device, base_w, base_h,
1460                                                  data_w, len, &interface_ip,
1461                                                  level, clockRate, false,
1462                                                  route_over_perc,
1463                                                  core_params.opt_local,
1464                                                  core_params.core_ty,
1465                                                  wire_type);
1466
1467                data_w = core_params.phy_ireg_width;
1468                len = rfu->int_regfile_height + exeu->FU_height +
1469                    mul->FU_height + lsq_height + scheu_Iw_height +
1470                    scheu_ROB_height;
1471                intTag_mul_Bypass = new Interconnect(xml_data,
1472                                                     "Mul Bypass Tag",
1473                                                     Core_device, base_w,
1474                                                     base_h, data_w, len,
1475                                                     &interface_ip, level,
1476                                                     clockRate, false,
1477                                                     route_over_perc,
1478                                                     core_params.opt_local,
1479                                                     core_params.core_ty,
1480                                                     wire_type);
1481            }
1482
1483            if (core_params.num_fpus > 0) {
1484                data_w = int(ceil(core_params.fp_data_width));
1485                len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
1486                    scheu_fp_Iw_height + scheu_ROB_height;
1487                fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
1488                                             Core_device, base_w, base_h,
1489                                             data_w, len, &interface_ip, level,
1490                                             clockRate, false, route_over_perc,
1491                                             core_params.opt_local,
1492                                             core_params.core_ty, wire_type);
1493
1494                data_w = core_params.phy_freg_width;
1495                len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
1496                    scheu_fp_Iw_height + scheu_ROB_height;
1497                fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
1498                                               Core_device, base_w, base_h,
1499                                               data_w, len, &interface_ip,
1500                                               level, clockRate, false,
1501                                               route_over_perc,
1502                                               core_params.opt_local,
1503                                               core_params.core_ty, wire_type);
1504            }
1505        }
1506    }
1507    if (int_bypass) {
1508        children.push_back(int_bypass);
1509    }
1510    if (intTagBypass) {
1511        children.push_back(intTagBypass);
1512    }
1513    if (int_mul_bypass) {
1514        children.push_back(int_mul_bypass);
1515    }
1516    if (intTag_mul_Bypass) {
1517        children.push_back(intTag_mul_Bypass);
1518    }
1519    if (fp_bypass) {
1520        children.push_back(fp_bypass);
1521    }
1522    if (fpTagBypass) {
1523        children.push_back(fpTagBypass);
1524    }
1525
1526    area.set_area(area.get_area() + int_bypass->area.get_area() +
1527                  intTagBypass->area.get_area());
1528    if (core_params.num_muls > 0) {
1529        area.set_area(area.get_area() + int_mul_bypass->area.get_area() +
1530                      intTag_mul_Bypass->area.get_area());
1531    }
1532    if (core_params.num_fpus > 0) {
1533        area.set_area(area.get_area() + fp_bypass->area.get_area() +
1534                      fpTagBypass->area.get_area());
1535    }
1536}
1537
1538RENAMINGU::RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_,
1539                     const CoreParameters & _core_params,
1540                     const CoreStatistics & _core_stats, bool exist_)
1541    : McPATComponent(_xml_data), iFRAT(NULL), fFRAT(NULL), iRRAT(NULL),
1542      fRRAT(NULL), ifreeL(NULL), ffreeL(NULL), idcl(NULL), fdcl(NULL),
1543      RAHT(NULL), interface_ip(*interface_ip_),
1544      core_params(_core_params), core_stats(_core_stats), exist(exist_) {
1545    if (!exist) return;
1546    int tag;
1547    int data;
1548    int out_w;
1549    int size;
1550
1551    // Assumption:
1552    //   We make an implicit design assumption based on the specific structure
1553    //   that is being modeled.
1554    //   1. RAM-based RATs are direct mapped. However, if the associated
1555    //      scheduler is a reservation station style, the RATs are fully
1556    //      associative.
1557    //   2. Non-CAM based RATs and free lists do not have tags.
1558    //   3. Free lists are direct mapped.
1559
1560    const int RAM_BASED_RAT_ASSOC = 1;
1561    const int RS_RAT_ASSOC = 0;
1562    const int NON_CAM_BASED_TAG_WIDTH = 0;
1563    const int FREELIST_ASSOC = 1;
1564
1565    clockRate = core_params.clockRate;
1566    name = "Rename Unit";
1567    if (core_params.core_ty == OOO) {
1568        //integer pipeline
1569        if (core_params.scheu_ty == PhysicalRegFile) {
1570            if (core_params.rm_ty == RAMbased) {
1571                //FRAT with global checkpointing (GCs) please see paper tech
1572                //report for detailed explaintions
1573
1574                data = int(ceil(core_params.phy_ireg_width *
1575                                (1 + core_params.globalCheckpoint) /
1576                                BITS_PER_BYTE));
1577                out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
1578
1579                size = data * core_params.archi_Regs_IRF_size;
1580
1581                interface_ip.cache_sz = size;
1582                interface_ip.line_sz = data;
1583                interface_ip.assoc = RAM_BASED_RAT_ASSOC;
1584                interface_ip.nbanks = core_params.front_rat_nbanks;
1585                interface_ip.out_w = out_w * BITS_PER_BYTE;
1586                interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
1587                interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
1588                interface_ip.access_mode = Fast;
1589                interface_ip.obj_func_dyn_energy = 0;
1590                interface_ip.obj_func_dyn_power = 0;
1591                interface_ip.obj_func_leak_power = 0;
1592                interface_ip.obj_func_cycle_t = 1;
1593                interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
1594                interface_ip.num_rd_ports =
1595                    NUM_SOURCE_OPERANDS * core_params.decodeW;
1596                interface_ip.num_wr_ports = core_params.decodeW;
1597                interface_ip.num_se_rd_ports = 0;
1598                interface_ip.num_search_ports = 0;
1599                interface_ip.is_cache = false;
1600                interface_ip.pure_cam = false;
1601                interface_ip.pure_ram = true;
1602                interface_ip.throughput = 1.0 / clockRate;
1603                interface_ip.latency = 1.0 / clockRate;
1604                iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
1605                                    Core_device, clockRate,
1606                                    core_params.opt_local,
1607                                    core_params.core_ty);
1608                iFRAT->output_data.area *= core_params.num_hthreads;
1609                area.set_area(area.get_area() + iFRAT->area.get_area());
1610
1611                //FRAT floating point
1612                data = int(ceil(core_params.phy_freg_width *
1613                                (1 + core_params.globalCheckpoint) /
1614                                BITS_PER_BYTE));
1615                out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
1616                size = data * core_params.archi_Regs_FRF_size;
1617
1618                interface_ip.cache_sz = size;
1619                interface_ip.line_sz = data;
1620                interface_ip.assoc = RAM_BASED_RAT_ASSOC;
1621                interface_ip.nbanks = core_params.front_rat_nbanks;
1622                interface_ip.out_w = out_w * BITS_PER_BYTE;
1623                interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
1624                interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
1625                interface_ip.access_mode = Fast;
1626                interface_ip.obj_func_dyn_energy = 0;
1627                interface_ip.obj_func_dyn_power = 0;
1628                interface_ip.obj_func_leak_power = 0;
1629                interface_ip.obj_func_cycle_t = 1;
1630                interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
1631                interface_ip.num_rd_ports =
1632                    NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
1633                interface_ip.num_wr_ports = core_params.fp_decodeW;
1634                interface_ip.num_se_rd_ports = 0;
1635                interface_ip.num_search_ports = 0;
1636                interface_ip.is_cache = false;
1637                interface_ip.pure_cam = false;
1638                interface_ip.pure_ram = true;
1639                interface_ip.throughput = 1.0 / clockRate;
1640                interface_ip.latency = 1.0 / clockRate;
1641                fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
1642                                    Core_device, clockRate,
1643                                    core_params.opt_local,
1644                                    core_params.core_ty);
1645                fFRAT->output_data.area *= core_params.num_hthreads;
1646                area.set_area(area.get_area() + fFRAT->area.get_area());
1647
1648            } else if ((core_params.rm_ty == CAMbased)) {
1649                //IRAT
1650                tag = core_params.arch_ireg_width;
1651                //the address of CAM needed to be sent out
1652                data = int(ceil((core_params.arch_ireg_width + 1 *
1653                                 core_params.globalCheckpoint) /
1654                                BITS_PER_BYTE));
1655                out_w = int(ceil(core_params.arch_ireg_width / BITS_PER_BYTE));
1656                size = data * core_params.phy_Regs_IRF_size;
1657
1658                interface_ip.cache_sz = size;
1659                interface_ip.line_sz = data;
1660                interface_ip.assoc = CAM_ASSOC;
1661                interface_ip.nbanks = core_params.front_rat_nbanks;
1662                interface_ip.out_w  = out_w * BITS_PER_BYTE;
1663                interface_ip.specific_tag = tag > 0;
1664                interface_ip.tag_w = tag;
1665                interface_ip.access_mode = Fast;
1666                interface_ip.obj_func_dyn_energy = 0;
1667                interface_ip.obj_func_dyn_power = 0;
1668                interface_ip.obj_func_leak_power = 0;
1669                interface_ip.obj_func_cycle_t = 1;
1670                interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
1671                interface_ip.num_rd_ports = core_params.decodeW;
1672                interface_ip.num_wr_ports = core_params.decodeW;
1673                interface_ip.num_se_rd_ports = 0;
1674                interface_ip.num_search_ports =
1675                    NUM_SOURCE_OPERANDS * core_params.decodeW;
1676                interface_ip.is_cache = true;
1677                interface_ip.pure_cam = false;
1678                interface_ip.pure_ram = false;
1679                interface_ip.throughput = 1.0 / clockRate;
1680                interface_ip.latency = 1.0 / clockRate;
1681                iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
1682                                    Core_device, clockRate,
1683                                    core_params.opt_local,
1684                                    core_params.core_ty);
1685                iFRAT->output_data.area *= core_params.num_hthreads;
1686                area.set_area(area.get_area() + iFRAT->area.get_area());
1687
1688                //FRAT for FP
1689                tag = core_params.arch_freg_width;
1690                //the address of CAM needed to be sent out
1691                data = int(ceil((core_params.arch_freg_width + 1 *
1692                                 core_params.globalCheckpoint) /
1693                                BITS_PER_BYTE));
1694                out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE));
1695                size = data * core_params.phy_Regs_FRF_size;
1696
1697                interface_ip.cache_sz = size;
1698                interface_ip.line_sz = data;
1699                interface_ip.assoc = CAM_ASSOC;
1700                interface_ip.nbanks = core_params.front_rat_nbanks;
1701                interface_ip.out_w = out_w * BITS_PER_BYTE;
1702                interface_ip.specific_tag = tag > 0;
1703                interface_ip.tag_w = tag;
1704                interface_ip.access_mode = Fast;
1705                interface_ip.obj_func_dyn_energy = 0;
1706                interface_ip.obj_func_dyn_power = 0;
1707                interface_ip.obj_func_leak_power = 0;
1708                interface_ip.obj_func_cycle_t = 1;
1709                interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
1710                interface_ip.num_rd_ports = core_params.fp_decodeW;
1711                interface_ip.num_wr_ports = core_params.fp_decodeW;
1712                interface_ip.num_se_rd_ports = 0;
1713                interface_ip.num_search_ports =
1714                    NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
1715                interface_ip.is_cache = true;
1716                interface_ip.pure_cam = false;
1717                interface_ip.pure_ram = false;
1718                interface_ip.throughput = 1.0 / clockRate;
1719                interface_ip.latency = 1.0 / clockRate;
1720                fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
1721                                    Core_device, clockRate,
1722                                    core_params.opt_local,
1723                                    core_params.core_ty);
1724                fFRAT->output_data.area *= core_params.num_hthreads;
1725                area.set_area(area.get_area() + fFRAT->area.get_area());
1726            }
1727
1728            //RRAT is always RAM based, does not have GCs, and is used only for
1729            //record latest non-speculative mapping
1730            data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
1731            size = data * core_params.archi_Regs_IRF_size *
1732                NUM_SOURCE_OPERANDS;
1733
1734            interface_ip.cache_sz = size;
1735            interface_ip.line_sz = data;
1736            interface_ip.assoc = RAM_BASED_RAT_ASSOC;
1737            interface_ip.nbanks = core_params.retire_rat_nbanks;
1738            interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
1739            interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
1740            interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
1741            interface_ip.access_mode = Sequential;
1742            interface_ip.obj_func_dyn_energy = 0;
1743            interface_ip.obj_func_dyn_power = 0;
1744            interface_ip.obj_func_leak_power = 0;
1745            interface_ip.obj_func_cycle_t = 1;
1746            interface_ip.num_rw_ports = core_params.retire_rat_rw_ports;
1747            interface_ip.num_rd_ports = core_params.commitW;
1748            interface_ip.num_wr_ports = core_params.commitW;
1749            interface_ip.num_se_rd_ports = 0;
1750            interface_ip.num_search_ports = 0;
1751            interface_ip.is_cache = false;
1752            interface_ip.pure_cam = false;
1753            interface_ip.pure_ram = true;
1754            interface_ip.throughput = 1.0 / clockRate;
1755            interface_ip.latency = 1.0 / clockRate;
1756            iRRAT = new ArrayST(xml_data, &interface_ip, "Int Retire RAT",
1757                                Core_device, clockRate, core_params.opt_local,
1758                                core_params.core_ty);
1759            iRRAT->output_data.area *= core_params.num_hthreads;
1760            area.set_area(area.get_area() + iRRAT->area.get_area());
1761
1762            //RRAT for FP
1763            data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
1764            size = data * core_params.archi_Regs_FRF_size *
1765                NUM_SOURCE_OPERANDS;
1766
1767            interface_ip.cache_sz = size;
1768            interface_ip.line_sz = data;
1769            interface_ip.assoc = RAM_BASED_RAT_ASSOC;
1770            interface_ip.nbanks = core_params.retire_rat_nbanks;
1771            interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
1772            interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
1773            interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
1774            interface_ip.access_mode = Sequential;
1775            interface_ip.obj_func_dyn_energy = 0;
1776            interface_ip.obj_func_dyn_power = 0;
1777            interface_ip.obj_func_leak_power = 0;
1778            interface_ip.obj_func_cycle_t = 1;
1779            interface_ip.num_rw_ports = core_params.retire_rat_rw_ports;
1780            interface_ip.num_rd_ports = core_params.fp_decodeW;
1781            interface_ip.num_wr_ports = core_params.fp_decodeW;
1782            interface_ip.num_se_rd_ports = 0;
1783            interface_ip.num_search_ports = 0;
1784            interface_ip.is_cache = false;
1785            interface_ip.pure_cam = false;
1786            interface_ip.pure_ram = true;
1787            interface_ip.throughput = 1.0 / clockRate;
1788            interface_ip.latency = 1.0 / clockRate;
1789            fRRAT = new ArrayST(xml_data, &interface_ip, "FP Retire RAT",
1790                                Core_device, clockRate, core_params.opt_local,
1791                                core_params.core_ty);
1792            fRRAT->output_data.area *= core_params.num_hthreads;
1793            area.set_area(area.get_area() + fRRAT->area.get_area());
1794
1795            //Freelist of renaming unit always RAM based
1796            //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist
1797            // 2)When instruction commits the Phyregisters/ROB needed to be recycled.
1798            //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width
1799            data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
1800            size = data * core_params.num_ifreelist_entries;
1801
1802            interface_ip.cache_sz = size;
1803            interface_ip.line_sz = data;
1804            interface_ip.assoc = FREELIST_ASSOC;
1805            interface_ip.nbanks = core_params.freelist_nbanks;
1806            interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
1807            interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
1808            interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
1809            interface_ip.access_mode = Sequential;
1810            interface_ip.obj_func_dyn_energy = 0;
1811            interface_ip.obj_func_dyn_power = 0;
1812            interface_ip.obj_func_leak_power = 0;
1813            interface_ip.obj_func_cycle_t = 1;
1814            interface_ip.num_rw_ports = core_params.freelist_rw_ports;
1815            interface_ip.num_rd_ports = core_params.decodeW;
1816            interface_ip.num_wr_ports =
1817                core_params.decodeW - 1 + core_params.commitW;
1818            interface_ip.num_se_rd_ports = 0;
1819            interface_ip.num_search_ports = 0;
1820            interface_ip.is_cache = false;
1821            interface_ip.pure_cam = false;
1822            interface_ip.pure_ram = true;
1823            interface_ip.throughput = 1.0 / clockRate;
1824            interface_ip.latency = 1.0 / clockRate;
1825            ifreeL = new ArrayST(xml_data, &interface_ip, "Integer Free List",
1826                                 Core_device, clockRate, core_params.opt_local,
1827                                 core_params.core_ty);
1828            ifreeL->output_data.area *= core_params.num_hthreads;
1829            area.set_area(area.get_area() + ifreeL->area.get_area());
1830
1831            //freelist for FP
1832            data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
1833            size = data * core_params.num_ffreelist_entries;
1834
1835            interface_ip.cache_sz = size;
1836            interface_ip.line_sz = data;
1837            interface_ip.assoc = FREELIST_ASSOC;
1838            interface_ip.nbanks = core_params.freelist_nbanks;
1839            interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
1840            interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
1841            interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
1842            interface_ip.access_mode = Sequential;
1843            interface_ip.obj_func_dyn_energy = 0;
1844            interface_ip.obj_func_dyn_power = 0;
1845            interface_ip.obj_func_leak_power = 0;
1846            interface_ip.obj_func_cycle_t = 1;
1847            interface_ip.num_rw_ports = core_params.freelist_rw_ports;
1848            interface_ip.num_rd_ports = core_params.fp_decodeW;
1849            interface_ip.num_wr_ports =
1850                core_params.fp_decodeW - 1 + core_params.commitW;
1851            interface_ip.num_se_rd_ports = 0;
1852            interface_ip.num_search_ports = 0;
1853            interface_ip.is_cache = false;
1854            interface_ip.pure_cam = false;
1855            interface_ip.pure_ram = true;
1856            interface_ip.throughput = 1.0 / clockRate;
1857            interface_ip.latency = 1.0 / clockRate;
1858            ffreeL = new ArrayST(xml_data, &interface_ip, "FP Free List",
1859                                 Core_device, clockRate, core_params.opt_local,
1860                                 core_params.core_ty);
1861            ffreeL->output_data.area *= core_params.num_hthreads;
1862            area.set_area(area.get_area() + ffreeL->area.get_area());
1863
1864        } else if (core_params.scheu_ty == ReservationStation) {
1865            if (core_params.rm_ty == RAMbased) {
1866                tag = core_params.phy_ireg_width;
1867                data = int(ceil(core_params.phy_ireg_width *
1868                                (1 + core_params.globalCheckpoint) /
1869                                BITS_PER_BYTE));
1870                out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
1871                size = data * core_params.archi_Regs_IRF_size;
1872
1873                interface_ip.cache_sz = size;
1874                interface_ip.line_sz = data;
1875                interface_ip.assoc = RS_RAT_ASSOC;
1876                interface_ip.nbanks = core_params.front_rat_nbanks;
1877                interface_ip.out_w = out_w * BITS_PER_BYTE;
1878                interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
1879                interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
1880                interface_ip.access_mode = Fast;
1881                interface_ip.obj_func_dyn_energy = 0;
1882                interface_ip.obj_func_dyn_power = 0;
1883                interface_ip.obj_func_leak_power = 0;
1884                interface_ip.obj_func_cycle_t = 1;
1885                interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
1886                interface_ip.num_rd_ports =
1887                    NUM_SOURCE_OPERANDS * core_params.decodeW;
1888                interface_ip.num_wr_ports = core_params.decodeW;
1889                interface_ip.num_se_rd_ports = 0;
1890                interface_ip.num_search_ports = core_params.commitW;
1891                interface_ip.is_cache = true;
1892                interface_ip.pure_cam = false;
1893                interface_ip.pure_ram = false;
1894                interface_ip.throughput = 1.0 / clockRate;
1895                interface_ip.latency = 1.0 / clockRate;
1896                iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
1897                                    Core_device, clockRate,
1898                                    core_params.opt_local,
1899                                    core_params.core_ty);
1900                iFRAT->local_result.adjust_area();
1901                iFRAT->output_data.area *= core_params.num_hthreads;
1902                area.set_area(area.get_area() + iFRAT->area.get_area());
1903
1904                //FP
1905                tag = core_params.phy_freg_width;
1906                data = int(ceil(core_params.phy_freg_width *
1907                                (1 + core_params.globalCheckpoint) /
1908                                BITS_PER_BYTE));
1909                out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
1910                size = data * core_params.archi_Regs_FRF_size;
1911
1912                interface_ip.cache_sz = size;
1913                interface_ip.line_sz = data;
1914                interface_ip.assoc = RS_RAT_ASSOC;
1915                interface_ip.nbanks = core_params.front_rat_nbanks;
1916                interface_ip.out_w = out_w * BITS_PER_BYTE;
1917                interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
1918                interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
1919                interface_ip.access_mode = Fast;
1920                interface_ip.obj_func_dyn_energy = 0;
1921                interface_ip.obj_func_dyn_power = 0;
1922                interface_ip.obj_func_leak_power = 0;
1923                interface_ip.obj_func_cycle_t = 1;
1924                interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
1925                interface_ip.num_rd_ports =
1926                    NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
1927                interface_ip.num_wr_ports = core_params.fp_decodeW;
1928                interface_ip.num_se_rd_ports = 0;
1929                interface_ip.num_search_ports = core_params.fp_issueW;
1930                interface_ip.is_cache = true;
1931                interface_ip.pure_cam = false;
1932                interface_ip.pure_ram = false;
1933                interface_ip.throughput = 1.0 / clockRate;
1934                interface_ip.latency = 1.0 / clockRate;
1935                fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
1936                                    Core_device, clockRate,
1937                                    core_params.opt_local,
1938                                    core_params.core_ty);
1939                fFRAT->local_result.adjust_area();
1940                fFRAT->output_data.area *= core_params.num_hthreads;
1941                area.set_area(area.get_area() + fFRAT->area.get_area());
1942
1943            } else if ((core_params.rm_ty == CAMbased)) {
1944                //FRAT
1945                //the address of CAM needed to be sent out
1946                tag = core_params.arch_ireg_width;
1947                data = int(ceil (core_params.arch_ireg_width +
1948                                 1 * core_params.globalCheckpoint /
1949                                 BITS_PER_BYTE));
1950                out_w = int(ceil (core_params.arch_ireg_width /
1951                                  BITS_PER_BYTE));
1952                size = data * core_params.phy_Regs_IRF_size;
1953
1954                interface_ip.cache_sz = size;
1955                interface_ip.line_sz = data;
1956                interface_ip.assoc = CAM_ASSOC;
1957                interface_ip.nbanks = core_params.front_rat_nbanks;
1958                interface_ip.out_w = out_w * BITS_PER_BYTE;
1959                interface_ip.specific_tag = tag > 0;
1960                interface_ip.tag_w = tag;
1961                interface_ip.access_mode = Fast;
1962                interface_ip.obj_func_dyn_energy = 0;
1963                interface_ip.obj_func_dyn_power = 0;
1964                interface_ip.obj_func_leak_power = 0;
1965                interface_ip.obj_func_cycle_t = 1;
1966                interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
1967                interface_ip.num_rd_ports = core_params.decodeW;
1968                interface_ip.num_wr_ports = core_params.decodeW;
1969                interface_ip.num_se_rd_ports = 0;
1970                interface_ip.num_search_ports =
1971                    NUM_SOURCE_OPERANDS * core_params.decodeW;
1972                interface_ip.is_cache = true;
1973                interface_ip.pure_cam = false;
1974                interface_ip.pure_ram = false;
1975                interface_ip.throughput = 1.0 / clockRate;
1976                interface_ip.latency = 1.0 / clockRate;
1977                iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
1978                                    Core_device, clockRate,
1979                                    core_params.opt_local,
1980                                    core_params.core_ty);
1981                iFRAT->output_data.area *= core_params.num_hthreads;
1982                area.set_area(area.get_area() + iFRAT->area.get_area());
1983
1984                //FRAT
1985                tag = core_params.arch_freg_width;
1986                //the address of CAM needed to be sent out
1987                data = int(ceil(core_params.arch_freg_width +
1988                                1 * core_params.globalCheckpoint /
1989                               BITS_PER_BYTE));
1990                out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE));
1991                size = data * core_params.phy_Regs_FRF_size;
1992
1993                interface_ip.cache_sz = size;
1994                interface_ip.line_sz = data;
1995                interface_ip.assoc = CAM_ASSOC;
1996                interface_ip.nbanks = core_params.front_rat_nbanks;
1997                interface_ip.out_w = out_w * BITS_PER_BYTE;
1998                interface_ip.specific_tag = tag > 0;
1999                interface_ip.tag_w = tag;
2000                interface_ip.access_mode = Fast;
2001                interface_ip.obj_func_dyn_energy = 0;
2002                interface_ip.obj_func_dyn_power = 0;
2003                interface_ip.obj_func_leak_power = 0;
2004                interface_ip.obj_func_cycle_t = 1;
2005                interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
2006                interface_ip.num_rd_ports = core_params.decodeW;
2007                interface_ip.num_wr_ports = core_params.fp_decodeW;
2008                interface_ip.num_se_rd_ports = 0;
2009                interface_ip.num_search_ports =
2010                    NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
2011                interface_ip.is_cache = true;
2012                interface_ip.pure_cam = false;
2013                interface_ip.pure_ram = false;
2014                interface_ip.throughput = 1.0 / clockRate;
2015                interface_ip.latency = 1.0 / clockRate;
2016                fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
2017                                    Core_device, clockRate,
2018                                    core_params.opt_local,
2019                                    core_params.core_ty);
2020                fFRAT->output_data.area *= core_params.num_hthreads;
2021                area.set_area(area.get_area() + fFRAT->area.get_area());
2022
2023            }
2024            //No RRAT for RS based OOO
2025            //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified
2026            data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
2027            size = data * core_params.num_ifreelist_entries;
2028
2029            interface_ip.cache_sz = size;
2030            interface_ip.line_sz = data;
2031            interface_ip.assoc = FREELIST_ASSOC;
2032            interface_ip.nbanks = core_params.freelist_nbanks;
2033            interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
2034            interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
2035            interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
2036            interface_ip.access_mode = Fast;
2037            interface_ip.obj_func_dyn_energy = 0;
2038            interface_ip.obj_func_dyn_power = 0;
2039            interface_ip.obj_func_leak_power = 0;
2040            interface_ip.obj_func_cycle_t = 1;
2041            interface_ip.num_rw_ports = core_params.freelist_rw_ports;
2042            interface_ip.num_rd_ports = core_params.decodeW;
2043            interface_ip.num_wr_ports =
2044                core_params.decodeW - 1 + core_params.commitW;
2045            interface_ip.num_se_rd_ports = 0;
2046            interface_ip.num_search_ports = 0;
2047            interface_ip.is_cache = false;
2048            interface_ip.pure_cam = false;
2049            interface_ip.pure_ram = true;
2050            interface_ip.throughput = 1.0 / clockRate;
2051            interface_ip.latency = 1.0 / clockRate;
2052            ifreeL = new ArrayST(xml_data, &interface_ip, "Unified Free List",
2053                                 Core_device, clockRate, core_params.opt_local,
2054                                 core_params.core_ty);
2055            ifreeL->output_data.area *= core_params.num_hthreads;
2056            area.set_area(area.get_area() + ifreeL->area.get_area());
2057        }
2058
2059    }
2060    idcl =
2061        new dep_resource_conflict_check(xml_data,
2062                                        "Instruction Dependency Check?",
2063                                        &interface_ip, core_params,
2064                                        core_params.phy_ireg_width,
2065                                        clockRate);
2066    fdcl =
2067        new dep_resource_conflict_check(xml_data,
2068                                        "FP Dependency Check?", &interface_ip,
2069                                        core_params,
2070                                        core_params.phy_freg_width, clockRate);
2071}
2072
2073Core::Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_)
2074    : McPATComponent(_xml_data), ifu(NULL), lsu(NULL), mmu(NULL),
2075      exu(NULL), rnu(NULL), corepipe (NULL), undiffCore(NULL), l2cache (NULL),
2076      ithCore(_ithCore), interface_ip(*interface_ip_) {
2077
2078    ostringstream os;
2079    os << ithCore;
2080    name = "Core " + os.str();
2081
2082    int i = 0;
2083    XMLNode* childXML;
2084    for (i = 0; i < xml_data->nChildNode("component"); i++) {
2085        childXML = xml_data->getChildNodePtr("component", &i);
2086        XMLCSTR type = childXML->getAttribute("type");
2087        if (!type)
2088            warnMissingComponentType(childXML->getAttribute("id"));
2089
2090        STRCMP(type, "CacheUnit") {
2091            XMLCSTR comp_name = childXML->getAttribute("id");
2092            if (!comp_name)
2093                continue;
2094
2095            STRCMP(comp_name, "system.L20") {
2096                l2cache = new CacheUnit(childXML, &interface_ip);
2097                children.push_back(l2cache);
2098            }
2099        }
2100    }
2101
2102    set_core_param();
2103    clockRate = core_params.clockRate;
2104
2105    ifu = new InstFetchU(xml_data, &interface_ip, core_params,
2106                         core_stats);
2107    children.push_back(ifu);
2108    lsu = new LoadStoreU(xml_data, &interface_ip, core_params,
2109                         core_stats);
2110    children.push_back(lsu);
2111    mmu = new MemManU(xml_data, &interface_ip, core_params,
2112                      core_stats);
2113    children.push_back(mmu);
2114    exu = new EXECU(xml_data, &interface_ip, lsu->lsq_height,
2115                    core_params, core_stats);
2116    children.push_back(exu);
2117    undiffCore = new UndiffCore(xml_data, &interface_ip, core_params);
2118    children.push_back(undiffCore);
2119    if (core_params.core_ty == OOO) {
2120        rnu = new RENAMINGU(xml_data, &interface_ip, core_params,
2121                            core_stats);
2122        children.push_back(rnu);
2123    }
2124    corepipe = new Pipeline(xml_data, &interface_ip, core_params);
2125    children.push_back(corepipe);
2126
2127    double pipeline_area_per_unit;
2128    if (core_params.core_ty == OOO) {
2129        pipeline_area_per_unit = (corepipe->area.get_area() *
2130                                  core_params.num_pipelines) / 5.0;
2131        if (rnu->exist) {
2132            rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit);
2133        }
2134    } else {
2135        pipeline_area_per_unit = (corepipe->area.get_area() *
2136                                  core_params.num_pipelines) / 4.0;
2137    }
2138
2139    // Move all of this to computeArea
2140    //area.set_area(area.get_area()+ corepipe->area.get_area());
2141    if (ifu->exist) {
2142        ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit);
2143        area.set_area(area.get_area() + ifu->area.get_area());
2144    }
2145    if (lsu->exist) {
2146        lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit);
2147        area.set_area(area.get_area() + lsu->area.get_area());
2148    }
2149    if (exu->exist) {
2150        exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit);
2151        area.set_area(area.get_area() + exu->area.get_area());
2152    }
2153    if (mmu->exist) {
2154        mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit);
2155        area.set_area(area.get_area() + mmu->area.get_area());
2156    }
2157
2158    if (core_params.core_ty == OOO) {
2159        if (rnu->exist) {
2160
2161            area.set_area(area.get_area() + rnu->area.get_area());
2162        }
2163    }
2164
2165    if (undiffCore->exist) {
2166        area.set_area(area.get_area() + undiffCore->area.get_area());
2167    }
2168
2169    if (l2cache) {
2170        area.set_area(area.get_area() + l2cache->area.get_area());
2171    }
2172}
2173
2174
2175void BranchPredictor::computeEnergy() {
2176    if (!exist) return;
2177
2178    // ASSUMPTION: All instructions access the branch predictors at Fetch and
2179    //             only branch instrucions update the predictors regardless
2180    //             of the correctness of the prediction.
2181    double tdp_read_accesses =
2182        core_params.predictionW * core_stats.BR_duty_cycle;
2183    globalBPT->tdp_stats.reset();
2184    globalBPT->tdp_stats.readAc.access  = tdp_read_accesses;
2185    globalBPT->tdp_stats.writeAc.access = 0;
2186    globalBPT->rtp_stats.reset();
2187    globalBPT->rtp_stats.readAc.access  = core_stats.total_instructions;
2188    globalBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
2189    globalBPT->power_t.reset();
2190    globalBPT->power_t.readOp.dynamic +=
2191        globalBPT->local_result.power.readOp.dynamic *
2192        globalBPT->tdp_stats.readAc.access +
2193        globalBPT->local_result.power.writeOp.dynamic *
2194        globalBPT->tdp_stats.writeAc.access;
2195    globalBPT->power_t = globalBPT->power_t +
2196        globalBPT->local_result.power * pppm_lkg;
2197    globalBPT->rt_power.reset();
2198    globalBPT->rt_power.readOp.dynamic +=
2199        globalBPT->local_result.power.readOp.dynamic *
2200        globalBPT->rtp_stats.readAc.access +
2201        globalBPT->local_result.power.writeOp.dynamic *
2202        globalBPT->rtp_stats.writeAc.access;
2203
2204    L1_localBPT->tdp_stats.reset();
2205    L1_localBPT->tdp_stats.readAc.access  = tdp_read_accesses;
2206    L1_localBPT->tdp_stats.writeAc.access = 0;
2207    L1_localBPT->rtp_stats.reset();
2208    L1_localBPT->rtp_stats.readAc.access  = core_stats.total_instructions;
2209    L1_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
2210    L1_localBPT->power_t.reset();
2211    L1_localBPT->power_t.readOp.dynamic +=
2212        L1_localBPT->local_result.power.readOp.dynamic *
2213        L1_localBPT->tdp_stats.readAc.access +
2214        L1_localBPT->local_result.power.writeOp.dynamic *
2215        L1_localBPT->tdp_stats.writeAc.access;
2216    L1_localBPT->power_t = L1_localBPT->power_t +
2217        L1_localBPT->local_result.power * pppm_lkg;
2218    L1_localBPT->rt_power.reset();
2219    L1_localBPT->rt_power.readOp.dynamic +=
2220        L1_localBPT->local_result.power.readOp.dynamic *
2221        L1_localBPT->rtp_stats.readAc.access +
2222        L1_localBPT->local_result.power.writeOp.dynamic *
2223        L1_localBPT->rtp_stats.writeAc.access;
2224
2225    L2_localBPT->tdp_stats.reset();
2226    L2_localBPT->tdp_stats.readAc.access  = tdp_read_accesses;
2227    L2_localBPT->tdp_stats.writeAc.access = 0;
2228    L2_localBPT->rtp_stats.reset();
2229    L2_localBPT->rtp_stats.readAc.access  = core_stats.branch_instructions;
2230    L2_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
2231    L2_localBPT->power_t.reset();
2232    L2_localBPT->power_t.readOp.dynamic +=
2233        L2_localBPT->local_result.power.readOp.dynamic *
2234        L2_localBPT->tdp_stats.readAc.access +
2235        L2_localBPT->local_result.power.writeOp.dynamic *
2236        L2_localBPT->tdp_stats.writeAc.access;
2237    L2_localBPT->power_t = L2_localBPT->power_t +
2238        L2_localBPT->local_result.power * pppm_lkg;
2239    L2_localBPT->rt_power.reset();
2240    L2_localBPT->rt_power.readOp.dynamic +=
2241        L2_localBPT->local_result.power.readOp.dynamic *
2242        L2_localBPT->rtp_stats.readAc.access +
2243        L2_localBPT->local_result.power.writeOp.dynamic *
2244        L2_localBPT->rtp_stats.writeAc.access;
2245
2246    chooser->tdp_stats.reset();
2247    chooser->tdp_stats.readAc.access  = tdp_read_accesses;
2248    chooser->tdp_stats.writeAc.access = 0;
2249    chooser->rtp_stats.reset();
2250    chooser->rtp_stats.readAc.access  = core_stats.total_instructions;
2251    chooser->rtp_stats.writeAc.access = core_stats.branch_instructions;
2252    chooser->power_t.reset();
2253    chooser->power_t.readOp.dynamic +=
2254        chooser->local_result.power.readOp.dynamic *
2255        chooser->tdp_stats.readAc.access +
2256        chooser->local_result.power.writeOp.dynamic *
2257        chooser->tdp_stats.writeAc.access;
2258    chooser->power_t =
2259        chooser->power_t + chooser->local_result.power * pppm_lkg;
2260    chooser->rt_power.reset();
2261    chooser->rt_power.readOp.dynamic +=
2262        chooser->local_result.power.readOp.dynamic *
2263        chooser->rtp_stats.readAc.access +
2264        chooser->local_result.power.writeOp.dynamic *
2265        chooser->rtp_stats.writeAc.access;
2266
2267    RAS->tdp_stats.reset();
2268    RAS->tdp_stats.readAc.access  = tdp_read_accesses;
2269    RAS->tdp_stats.writeAc.access = 0;
2270    RAS->rtp_stats.reset();
2271    RAS->rtp_stats.readAc.access  = core_stats.function_calls;
2272    RAS->rtp_stats.writeAc.access = core_stats.function_calls;
2273    RAS->power_t.reset();
2274    RAS->power_t.readOp.dynamic +=
2275        RAS->local_result.power.readOp.dynamic * RAS->tdp_stats.readAc.access +
2276        RAS->local_result.power.writeOp.dynamic *
2277        RAS->tdp_stats.writeAc.access;
2278    RAS->power_t = RAS->power_t + RAS->local_result.power *
2279        core_params.pppm_lkg_multhread;
2280    RAS->rt_power.reset();
2281    RAS->rt_power.readOp.dynamic += RAS->local_result.power.readOp.dynamic *
2282        RAS->rtp_stats.readAc.access +
2283        RAS->local_result.power.writeOp.dynamic *
2284        RAS->rtp_stats.writeAc.access;
2285
2286    output_data.reset();
2287    if (globalBPT) {
2288        globalBPT->output_data.peak_dynamic_power =
2289            globalBPT->power_t.readOp.dynamic * clockRate;
2290        globalBPT->output_data.runtime_dynamic_energy =
2291            globalBPT->rt_power.readOp.dynamic;
2292        output_data += globalBPT->output_data;
2293    }
2294    if (L1_localBPT) {
2295        L1_localBPT->output_data.peak_dynamic_power =
2296            L1_localBPT->power_t.readOp.dynamic * clockRate;
2297        L1_localBPT->output_data.runtime_dynamic_energy =
2298            L1_localBPT->rt_power.readOp.dynamic;
2299        output_data += L1_localBPT->output_data;
2300    }
2301    if (L2_localBPT) {
2302        L2_localBPT->output_data.peak_dynamic_power =
2303            L2_localBPT->power_t.readOp.dynamic * clockRate;
2304        L2_localBPT->output_data.runtime_dynamic_energy =
2305            L2_localBPT->rt_power.readOp.dynamic;
2306        output_data += L2_localBPT->output_data;
2307    }
2308    if (chooser) {
2309        chooser->output_data.peak_dynamic_power =
2310            chooser->power_t.readOp.dynamic * clockRate;
2311        chooser->output_data.runtime_dynamic_energy =
2312            chooser->rt_power.readOp.dynamic;
2313        output_data += chooser->output_data;
2314    }
2315    if (RAS) {
2316        RAS->output_data.peak_dynamic_power =
2317            RAS->power_t.readOp.dynamic * clockRate;
2318        RAS->output_data.subthreshold_leakage_power =
2319            RAS->power_t.readOp.leakage * core_params.num_hthreads;
2320        RAS->output_data.gate_leakage_power =
2321            RAS->power_t.readOp.gate_leakage * core_params.num_hthreads;
2322        RAS->output_data.runtime_dynamic_energy = RAS->rt_power.readOp.dynamic;
2323        output_data += RAS->output_data;
2324    }
2325}
2326
2327void BranchPredictor::displayData(uint32_t indent, int plevel) {
2328    if (!exist) return;
2329
2330    McPATComponent::displayData(indent, plevel);
2331
2332    globalBPT->displayData(indent + 4, plevel);
2333    L1_localBPT->displayData(indent + 4, plevel);
2334    L2_localBPT->displayData(indent + 4, plevel);
2335    chooser->displayData(indent + 4, plevel);
2336    RAS->displayData(indent + 4, plevel);
2337}
2338
2339void InstFetchU::computeEnergy() {
2340    if (!exist) return;
2341
2342    if (BPT) {
2343        BPT->computeEnergy();
2344    }
2345
2346    IB->tdp_stats.reset();
2347    IB->tdp_stats.readAc.access = core_params.peak_issueW;
2348    IB->tdp_stats.writeAc.access = core_params.peak_issueW;
2349    IB->rtp_stats.reset();
2350    IB->rtp_stats.readAc.access = core_stats.total_instructions;
2351    IB->rtp_stats.writeAc.access = core_stats.total_instructions;
2352    IB->power_t.reset();
2353    IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic *
2354        IB->tdp_stats.readAc.access +
2355        IB->local_result.power.writeOp.dynamic * IB->tdp_stats.writeAc.access;
2356    IB->power_t = IB->power_t + IB->local_result.power * pppm_lkg;
2357    IB->rt_power.reset();
2358    IB->rt_power.readOp.dynamic += IB->local_result.power.readOp.dynamic *
2359        IB->rtp_stats.readAc.access +
2360        IB->local_result.power.writeOp.dynamic * IB->rtp_stats.writeAc.access;
2361
2362    if (core_params.predictionW > 0) {
2363        BTB->tdp_stats.reset();
2364        BTB->tdp_stats.readAc.access = core_params.predictionW;
2365        BTB->tdp_stats.writeAc.access = 0;
2366        BTB->rtp_stats.reset();
2367        BTB->rtp_stats.readAc.access = inst_fetch_stats.btb_read_accesses;
2368        BTB->rtp_stats.writeAc.access = inst_fetch_stats.btb_write_accesses;
2369        BTB->power_t.reset();
2370        BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic *
2371            BTB->tdp_stats.readAc.access +
2372            BTB->local_result.power.writeOp.dynamic *
2373            BTB->tdp_stats.writeAc.access;
2374        BTB->rt_power.reset();
2375        BTB->rt_power.readOp.dynamic +=
2376            BTB->local_result.power.readOp.dynamic *
2377            BTB->rtp_stats.readAc.access +
2378            BTB->local_result.power.writeOp.dynamic *
2379            BTB->rtp_stats.writeAc.access;
2380    }
2381
2382    ID_inst->tdp_stats.reset();
2383    ID_inst->tdp_stats.readAc.access = core_params.decodeW;
2384    ID_inst->power_t.reset();
2385    ID_inst->power_t = ID_misc->power;
2386    ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic *
2387        ID_inst->tdp_stats.readAc.access;
2388    ID_inst->rtp_stats.reset();
2389    ID_inst->rtp_stats.readAc.access = core_stats.total_instructions;
2390    ID_inst->rt_power.reset();
2391    ID_inst->rt_power.readOp.dynamic = ID_inst->power.readOp.dynamic *
2392        ID_inst->rtp_stats.readAc.access;
2393
2394    ID_operand->tdp_stats.reset();
2395    ID_operand->tdp_stats.readAc.access = core_params.decodeW;
2396    ID_operand->power_t.reset();
2397    ID_operand->power_t = ID_misc->power;
2398    ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic *
2399        ID_operand->tdp_stats.readAc.access;
2400    ID_operand->rtp_stats.reset();
2401    ID_operand->rtp_stats.readAc.access = core_stats.total_instructions;
2402    ID_operand->rt_power.reset();
2403    ID_operand->rt_power.readOp.dynamic = ID_operand->power.readOp.dynamic *
2404        ID_operand->rtp_stats.readAc.access;
2405
2406    ID_misc->tdp_stats.reset();
2407    ID_misc->tdp_stats.readAc.access = core_params.decodeW;
2408    ID_misc->power_t.reset();
2409    ID_misc->power_t = ID_misc->power;
2410    ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic *
2411        ID_misc->tdp_stats.readAc.access;
2412    ID_misc->rtp_stats.reset();
2413    ID_misc->rtp_stats.readAc.access = core_stats.total_instructions;
2414    ID_misc->rt_power.reset();
2415    ID_misc->rt_power.readOp.dynamic = ID_misc->power.readOp.dynamic *
2416        ID_misc->rtp_stats.readAc.access;
2417
2418    power.reset();
2419    rt_power.reset();
2420    McPATComponent::computeEnergy();
2421
2422    output_data.reset();
2423    if (icache) {
2424        output_data += icache->output_data;
2425    }
2426    if (IB) {
2427        IB->output_data.peak_dynamic_power =
2428            IB->power_t.readOp.dynamic * clockRate;
2429        IB->output_data.runtime_dynamic_energy = IB->rt_power.readOp.dynamic;
2430        output_data += IB->output_data;
2431    }
2432    if (BTB) {
2433        BTB->output_data.peak_dynamic_power =
2434            BTB->power_t.readOp.dynamic * clockRate;
2435        BTB->output_data.runtime_dynamic_energy = BTB->rt_power.readOp.dynamic;
2436        output_data += BTB->output_data;
2437    }
2438    if (BPT) {
2439        output_data += BPT->output_data;
2440    }
2441    if (ID_inst) {
2442        ID_inst->output_data.peak_dynamic_power =
2443            ID_inst->power_t.readOp.dynamic * clockRate;
2444        ID_inst->output_data.runtime_dynamic_energy =
2445            ID_inst->rt_power.readOp.dynamic;
2446        output_data += ID_inst->output_data;
2447    }
2448    if (ID_operand) {
2449        ID_operand->output_data.peak_dynamic_power =
2450            ID_operand->power_t.readOp.dynamic * clockRate;
2451        ID_operand->output_data.runtime_dynamic_energy =
2452            ID_operand->rt_power.readOp.dynamic;
2453        output_data += ID_operand->output_data;
2454    }
2455    if (ID_misc) {
2456        ID_misc->output_data.peak_dynamic_power =
2457            ID_misc->power_t.readOp.dynamic * clockRate;
2458        ID_misc->output_data.runtime_dynamic_energy =
2459            ID_misc->rt_power.readOp.dynamic;
2460        output_data += ID_misc->output_data;
2461    }
2462}
2463
2464void InstFetchU::displayData(uint32_t indent, int plevel) {
2465    if (!exist) return;
2466
2467    McPATComponent::displayData(indent, plevel);
2468
2469    if (core_params.predictionW > 0) {
2470        BTB->displayData(indent + 4, plevel);
2471        if (BPT->exist) {
2472            BPT->displayData(indent + 4, plevel);
2473        }
2474    }
2475    IB->displayData(indent + 4, plevel);
2476    ID_inst->displayData(indent + 4, plevel);
2477    ID_operand->displayData(indent + 4, plevel);
2478    ID_misc->displayData(indent + 4, plevel);
2479}
2480
2481void RENAMINGU::computeEnergy() {
2482    if (!exist) return;
2483
2484    idcl->tdp_stats.reset();
2485    idcl->rtp_stats.reset();
2486    idcl->power_t.reset();
2487    idcl->rt_power.reset();
2488    if (core_params.core_ty == OOO) {
2489        idcl->tdp_stats.readAc.access = core_params.decodeW;
2490        idcl->rtp_stats.readAc.access = 3 * core_params.decodeW *
2491            core_params.decodeW * core_stats.rename_reads;
2492    } else if (core_params.issueW > 1) {
2493        idcl->tdp_stats.readAc.access = core_params.decodeW;
2494        idcl->rtp_stats.readAc.access = 2 * core_stats.int_instructions;
2495    }
2496    idcl->power_t.readOp.dynamic = idcl->tdp_stats.readAc.access *
2497        idcl->power.readOp.dynamic;
2498    idcl->power_t.readOp.leakage = idcl->power.readOp.leakage *
2499        core_params.num_hthreads;
2500    idcl->power_t.readOp.gate_leakage = idcl->power.readOp.gate_leakage *
2501        core_params.num_hthreads;
2502    idcl->rt_power.readOp.dynamic = idcl->rtp_stats.readAc.access *
2503        idcl->power.readOp.dynamic;
2504
2505    fdcl->tdp_stats.reset();
2506    fdcl->rtp_stats.reset();
2507    fdcl->power_t.reset();
2508    fdcl->rt_power.reset();
2509    if (core_params.core_ty == OOO) {
2510        fdcl->tdp_stats.readAc.access = core_params.decodeW;
2511        fdcl->rtp_stats.readAc.access = 3 * core_params.fp_issueW *
2512            core_params.fp_issueW * core_stats.fp_rename_writes;
2513    } else if (core_params.issueW > 1) {
2514        fdcl->tdp_stats.readAc.access = core_params.decodeW;
2515        fdcl->rtp_stats.readAc.access = core_stats.fp_instructions;
2516    }
2517    fdcl->power_t.readOp.dynamic = fdcl->tdp_stats.readAc.access *
2518        fdcl->power.readOp.dynamic;
2519    fdcl->power_t.readOp.leakage = fdcl->power.readOp.leakage *
2520        core_params.num_hthreads;
2521    fdcl->power_t.readOp.gate_leakage = fdcl->power.readOp.gate_leakage *
2522        core_params.num_hthreads;
2523    fdcl->rt_power.readOp.dynamic = fdcl->rtp_stats.readAc.access *
2524        fdcl->power.readOp.dynamic;
2525
2526    if (iRRAT) {
2527        iRRAT->tdp_stats.reset();
2528        iRRAT->tdp_stats.readAc.access = iRRAT->l_ip.num_rd_ports;
2529        iRRAT->tdp_stats.writeAc.access = iRRAT->l_ip.num_wr_ports;
2530        iRRAT->rtp_stats.reset();
2531        iRRAT->rtp_stats.readAc.access = core_stats.rename_writes;
2532        iRRAT->rtp_stats.writeAc.access = core_stats.rename_writes;
2533        iRRAT->power_t.reset();
2534        iRRAT->power_t.readOp.dynamic +=
2535            iRRAT->tdp_stats.readAc.access * iRRAT->power.readOp.dynamic +
2536            iRRAT->tdp_stats.writeAc.access * iRRAT->power.writeOp.dynamic;
2537        iRRAT->rt_power.reset();
2538        iRRAT->rt_power.readOp.dynamic +=
2539            iRRAT->rtp_stats.readAc.access * iRRAT->power.readOp.dynamic +
2540            iRRAT->rtp_stats.writeAc.access * iRRAT->power.writeOp.dynamic;
2541        iRRAT->power_t.readOp.leakage =
2542            iRRAT->power.readOp.leakage * core_params.num_hthreads;
2543        iRRAT->power_t.readOp.gate_leakage =
2544            iRRAT->power.readOp.gate_leakage * core_params.num_hthreads;
2545    }
2546
2547    if (ifreeL) {
2548        ifreeL->tdp_stats.reset();
2549        ifreeL->tdp_stats.readAc.access = core_params.decodeW;
2550        ifreeL->tdp_stats.writeAc.access = core_params.decodeW;
2551        ifreeL->rtp_stats.reset();
2552        if (core_params.scheu_ty == PhysicalRegFile) {
2553            ifreeL->rtp_stats.readAc.access = core_stats.rename_reads;
2554            ifreeL->rtp_stats.writeAc.access = 2 * core_stats.rename_writes;
2555        } else if (core_params.scheu_ty == ReservationStation) {
2556            ifreeL->rtp_stats.readAc.access =
2557                core_stats.rename_reads + core_stats.fp_rename_reads;
2558            ifreeL->rtp_stats.writeAc.access =
2559                2 * (core_stats.rename_writes + core_stats.fp_rename_writes);
2560        }
2561        ifreeL->power_t.reset();
2562        ifreeL->power_t.readOp.dynamic +=
2563            ifreeL->tdp_stats.readAc.access * ifreeL->power.readOp.dynamic +
2564            ifreeL->tdp_stats.writeAc.access * ifreeL->power.writeOp.dynamic;
2565        ifreeL->rt_power.reset();
2566        ifreeL->rt_power.readOp.dynamic +=
2567            ifreeL->rtp_stats.readAc.access * ifreeL->power.readOp.dynamic +
2568            ifreeL->rtp_stats.writeAc.access * ifreeL->power.writeOp.dynamic;
2569        ifreeL->power_t.readOp.leakage =
2570            ifreeL->power.readOp.leakage * core_params.num_hthreads;
2571        ifreeL->power_t.readOp.gate_leakage =
2572            ifreeL->power.readOp.gate_leakage * core_params.num_hthreads;
2573    }
2574
2575    if (fRRAT) {
2576        fRRAT->tdp_stats.reset();
2577        fRRAT->tdp_stats.readAc.access = fRRAT->l_ip.num_rd_ports;
2578        fRRAT->tdp_stats.writeAc.access = fRRAT->l_ip.num_wr_ports;
2579        fRRAT->rtp_stats.reset();
2580        fRRAT->rtp_stats.readAc.access = core_stats.fp_rename_writes;
2581        fRRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes;
2582        fRRAT->power_t.reset();
2583        fRRAT->power_t.readOp.dynamic +=
2584            fRRAT->tdp_stats.readAc.access * fRRAT->power.readOp.dynamic +
2585            fRRAT->tdp_stats.writeAc.access * fRRAT->power.writeOp.dynamic;
2586        fRRAT->rt_power.reset();
2587        fRRAT->rt_power.readOp.dynamic +=
2588            fRRAT->rtp_stats.readAc.access * fRRAT->power.readOp.dynamic +
2589            fRRAT->rtp_stats.writeAc.access * fRRAT->power.writeOp.dynamic;
2590        fRRAT->power_t.readOp.leakage =
2591            fRRAT->power.readOp.leakage * core_params.num_hthreads;
2592        fRRAT->power_t.readOp.gate_leakage =
2593            fRRAT->power.readOp.gate_leakage * core_params.num_hthreads;
2594    }
2595
2596    if (ffreeL) {
2597        ffreeL->tdp_stats.reset();
2598        ffreeL->tdp_stats.readAc.access = core_params.decodeW;
2599        ffreeL->tdp_stats.writeAc.access = core_params.decodeW;
2600        ffreeL->rtp_stats.reset();
2601        ffreeL->rtp_stats.readAc.access = core_stats.fp_rename_reads;
2602        ffreeL->rtp_stats.writeAc.access = 2 * core_stats.fp_rename_writes;
2603        ffreeL->power_t.reset();
2604        ffreeL->power_t.readOp.dynamic +=
2605            ffreeL->tdp_stats.readAc.access * ffreeL->power.readOp.dynamic +
2606            ffreeL->tdp_stats.writeAc.access * ffreeL->power.writeOp.dynamic;
2607        ffreeL->rt_power.reset();
2608        ffreeL->rt_power.readOp.dynamic +=
2609            ffreeL->rtp_stats.readAc.access * ffreeL->power.readOp.dynamic +
2610            ffreeL->rtp_stats.writeAc.access * ffreeL->power.writeOp.dynamic;
2611        ffreeL->power_t.readOp.leakage =
2612            ffreeL->power.readOp.leakage * core_params.num_hthreads;
2613        ffreeL->power_t.readOp.gate_leakage =
2614            ffreeL->power.readOp.gate_leakage * core_params.num_hthreads;
2615    }
2616
2617    if (iFRAT) {
2618        tdp_stats.reset();
2619        if (core_params.rm_ty == RAMbased) {
2620            iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_rd_ports;
2621            iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports;
2622            iFRAT->tdp_stats.searchAc.access = iFRAT->l_ip.num_search_ports;
2623        } else if ((core_params.rm_ty == CAMbased)) {
2624            iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_search_ports;
2625            iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports;
2626        }
2627        rtp_stats.reset();
2628        iFRAT->rtp_stats.readAc.access = core_stats.rename_reads;
2629        iFRAT->rtp_stats.writeAc.access = core_stats.rename_writes;
2630        if (core_params.scheu_ty == ReservationStation &&
2631            core_params.rm_ty == RAMbased) {
2632            iFRAT->rtp_stats.searchAc.access =
2633                core_stats.committed_int_instructions;
2634        }
2635        iFRAT->power_t.reset();
2636        iFRAT->power_t.readOp.dynamic += iFRAT->tdp_stats.readAc.access
2637            * (iFRAT->local_result.power.readOp.dynamic
2638               + idcl->power.readOp.dynamic)
2639            + iFRAT->tdp_stats.writeAc.access
2640            * iFRAT->local_result.power.writeOp.dynamic
2641            + iFRAT->tdp_stats.searchAc.access
2642            * iFRAT->local_result.power.searchOp.dynamic;
2643        iFRAT->power_t.readOp.leakage =
2644            iFRAT->power.readOp.leakage * core_params.num_hthreads;
2645        iFRAT->power_t.readOp.gate_leakage =
2646            iFRAT->power.readOp.gate_leakage * core_params.num_hthreads;
2647        iFRAT->rt_power.reset();
2648        iFRAT->rt_power.readOp.dynamic += iFRAT->rtp_stats.readAc.access
2649            * (iFRAT->local_result.power.readOp.dynamic
2650               + idcl->power.readOp.dynamic)
2651            + iFRAT->rtp_stats.writeAc.access
2652            * iFRAT->local_result.power.writeOp.dynamic
2653            + iFRAT->rtp_stats.searchAc.access
2654            * iFRAT->local_result.power.searchOp.dynamic;
2655    }
2656
2657    if (fFRAT) {
2658        tdp_stats.reset();
2659        fFRAT->tdp_stats.writeAc.access = fFRAT->l_ip.num_wr_ports;
2660        if ((core_params.rm_ty == CAMbased)) {
2661            fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_search_ports;
2662        } else if (core_params.rm_ty == RAMbased) {
2663            fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_rd_ports;
2664            if (core_params.scheu_ty == ReservationStation) {
2665                fFRAT->tdp_stats.searchAc.access = fFRAT->l_ip.num_search_ports;
2666            }
2667        }
2668        rtp_stats.reset();
2669        fFRAT->rtp_stats.readAc.access = core_stats.fp_rename_reads;
2670        fFRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes;
2671        if (core_params.scheu_ty == ReservationStation &&
2672            core_params.rm_ty == RAMbased) {
2673            fFRAT->rtp_stats.searchAc.access =
2674                core_stats.committed_fp_instructions;
2675        }
2676        fFRAT->power_t.reset();
2677        fFRAT->power_t.readOp.dynamic += fFRAT->tdp_stats.readAc.access
2678            * (fFRAT->local_result.power.readOp.dynamic
2679               + fdcl->power.readOp.dynamic)
2680            + fFRAT->tdp_stats.writeAc.access
2681            * fFRAT->local_result.power.writeOp.dynamic
2682            + fFRAT->tdp_stats.searchAc.access
2683            * fFRAT->local_result.power.searchOp.dynamic;
2684        fFRAT->power_t.readOp.leakage =
2685            fFRAT->power.readOp.leakage * core_params.num_hthreads;
2686        fFRAT->power_t.readOp.gate_leakage =
2687            fFRAT->power.readOp.gate_leakage * core_params.num_hthreads;
2688        fFRAT->rt_power.reset();
2689        fFRAT->rt_power.readOp.dynamic += fFRAT->rtp_stats.readAc.access
2690            * (fFRAT->local_result.power.readOp.dynamic
2691               + fdcl->power.readOp.dynamic)
2692            + fFRAT->rtp_stats.writeAc.access
2693            * fFRAT->local_result.power.writeOp.dynamic
2694            + fFRAT->rtp_stats.searchAc.access
2695            * fFRAT->local_result.power.searchOp.dynamic;
2696    }
2697
2698    output_data.reset();
2699    if (iFRAT) {
2700        iFRAT->output_data.peak_dynamic_power =
2701            iFRAT->power_t.readOp.dynamic * clockRate;
2702        iFRAT->output_data.subthreshold_leakage_power =
2703            iFRAT->power_t.readOp.leakage;
2704        iFRAT->output_data.gate_leakage_power =
2705            iFRAT->power_t.readOp.gate_leakage;
2706        iFRAT->output_data.runtime_dynamic_energy =
2707            iFRAT->rt_power.readOp.dynamic;
2708        output_data += iFRAT->output_data;
2709    }
2710    if (fFRAT) {
2711        fFRAT->output_data.peak_dynamic_power =
2712            fFRAT->power_t.readOp.dynamic * clockRate;
2713        fFRAT->output_data.subthreshold_leakage_power =
2714            fFRAT->power_t.readOp.leakage;
2715        fFRAT->output_data.gate_leakage_power =
2716            fFRAT->power_t.readOp.gate_leakage;
2717        fFRAT->output_data.runtime_dynamic_energy =
2718            fFRAT->rt_power.readOp.dynamic;
2719        output_data += fFRAT->output_data;
2720    }
2721    if (iRRAT) {
2722        iRRAT->output_data.peak_dynamic_power =
2723            iRRAT->power_t.readOp.dynamic * clockRate;
2724        iRRAT->output_data.subthreshold_leakage_power =
2725            iRRAT->power_t.readOp.leakage;
2726        iRRAT->output_data.gate_leakage_power =
2727            iRRAT->power_t.readOp.gate_leakage;
2728        iRRAT->output_data.runtime_dynamic_energy =
2729            iRRAT->rt_power.readOp.dynamic;
2730        output_data += iRRAT->output_data;
2731    }
2732    if (fRRAT) {
2733        fRRAT->output_data.peak_dynamic_power =
2734            fRRAT->power_t.readOp.dynamic * clockRate;
2735        fRRAT->output_data.subthreshold_leakage_power =
2736            fRRAT->power_t.readOp.leakage;
2737        fRRAT->output_data.gate_leakage_power =
2738            fRRAT->power_t.readOp.gate_leakage;
2739        fRRAT->output_data.runtime_dynamic_energy =
2740            fRRAT->rt_power.readOp.dynamic;
2741        output_data += fRRAT->output_data;
2742    }
2743    if (ifreeL) {
2744        ifreeL->output_data.peak_dynamic_power =
2745            ifreeL->power_t.readOp.dynamic * clockRate;
2746        ifreeL->output_data.subthreshold_leakage_power =
2747            ifreeL->power_t.readOp.leakage;
2748        ifreeL->output_data.gate_leakage_power =
2749            ifreeL->power_t.readOp.gate_leakage;
2750        ifreeL->output_data.runtime_dynamic_energy =
2751            ifreeL->rt_power.readOp.dynamic;
2752        output_data += ifreeL->output_data;
2753    }
2754    if (ffreeL) {
2755        ffreeL->output_data.peak_dynamic_power =
2756            ffreeL->power_t.readOp.dynamic * clockRate;
2757        ffreeL->output_data.subthreshold_leakage_power =
2758            ffreeL->power_t.readOp.leakage;
2759        ffreeL->output_data.gate_leakage_power =
2760            ffreeL->power_t.readOp.gate_leakage;
2761        ffreeL->output_data.runtime_dynamic_energy =
2762            ffreeL->rt_power.readOp.dynamic;
2763        output_data += ffreeL->output_data;
2764    }
2765    if (idcl) {
2766        idcl->output_data.peak_dynamic_power =
2767            idcl->power_t.readOp.dynamic * clockRate;
2768        idcl->output_data.subthreshold_leakage_power =
2769            idcl->power_t.readOp.leakage;
2770        idcl->output_data.gate_leakage_power =
2771            idcl->power_t.readOp.gate_leakage;
2772        idcl->output_data.runtime_dynamic_energy =
2773            idcl->rt_power.readOp.dynamic;
2774        output_data += idcl->output_data;
2775    }
2776    if (fdcl) {
2777        fdcl->output_data.peak_dynamic_power =
2778            fdcl->power_t.readOp.dynamic * clockRate;
2779        fdcl->output_data.subthreshold_leakage_power =
2780            fdcl->power_t.readOp.leakage;
2781        fdcl->output_data.gate_leakage_power =
2782            fdcl->power_t.readOp.gate_leakage;
2783        fdcl->output_data.runtime_dynamic_energy =
2784            fdcl->rt_power.readOp.dynamic;
2785        output_data += fdcl->output_data;
2786    }
2787    if (RAHT) {
2788        output_data += RAHT->output_data;
2789    }
2790}
2791
2792void RENAMINGU::displayData(uint32_t indent, int plevel) {
2793    if (!exist) return;
2794
2795    McPATComponent::displayData(indent, plevel);
2796
2797    if (core_params.core_ty == OOO) {
2798        iFRAT->displayData(indent + 4, plevel);
2799        fFRAT->displayData(indent + 4, plevel);
2800        ifreeL->displayData(indent + 4, plevel);
2801
2802        if (core_params.scheu_ty == PhysicalRegFile) {
2803            iRRAT->displayData(indent + 4, plevel);
2804            fRRAT->displayData(indent + 4, plevel);
2805            ffreeL->displayData(indent + 4, plevel);
2806        }
2807    }
2808    idcl->displayData(indent + 4, plevel);
2809    fdcl->displayData(indent + 4, plevel);
2810}
2811
2812void SchedulerU::computeEnergy() {
2813    if (!exist) return;
2814
2815    double ROB_duty_cycle;
2816    ROB_duty_cycle = 1;
2817
2818    if (int_instruction_selection) {
2819        int_instruction_selection->computeEnergy();
2820    }
2821
2822    if (fp_instruction_selection) {
2823        fp_instruction_selection->computeEnergy();
2824    }
2825
2826    if (int_inst_window) {
2827        int_inst_window->tdp_stats.reset();
2828        int_inst_window->rtp_stats.reset();
2829        int_inst_window->power_t.reset();
2830        int_inst_window->rt_power.reset();
2831        if (core_params.core_ty == OOO) {
2832            int_inst_window->tdp_stats.readAc.access =
2833                core_params.issueW * core_params.num_pipelines;
2834            int_inst_window->tdp_stats.writeAc.access =
2835                core_params.issueW * core_params.num_pipelines;
2836            int_inst_window->tdp_stats.searchAc.access =
2837                core_params.issueW * core_params.num_pipelines;
2838
2839            int_inst_window->power_t.readOp.dynamic +=
2840                int_inst_window->local_result.power.readOp.dynamic *
2841                int_inst_window->tdp_stats.readAc.access +
2842                int_inst_window->local_result.power.searchOp.dynamic *
2843                int_inst_window->tdp_stats.searchAc.access +
2844                int_inst_window->local_result.power.writeOp.dynamic *
2845                int_inst_window->tdp_stats.writeAc.access;
2846
2847            int_inst_window->rtp_stats.readAc.access =
2848                core_stats.inst_window_reads;
2849            int_inst_window->rtp_stats.writeAc.access =
2850                core_stats.inst_window_writes;
2851            int_inst_window->rtp_stats.searchAc.access =
2852                core_stats.inst_window_wakeup_accesses;
2853
2854            int_inst_window->rt_power.readOp.dynamic +=
2855                int_inst_window->local_result.power.readOp.dynamic *
2856                int_inst_window->rtp_stats.readAc.access +
2857                int_inst_window->local_result.power.searchOp.dynamic *
2858                int_inst_window->rtp_stats.searchAc.access +
2859                int_inst_window->local_result.power.writeOp.dynamic *
2860                int_inst_window->rtp_stats.writeAc.access;
2861        } else if (core_params.multithreaded) {
2862            int_inst_window->tdp_stats.readAc.access =
2863                core_params.issueW * core_params.num_pipelines;
2864            int_inst_window->tdp_stats.writeAc.access =
2865                core_params.issueW * core_params.num_pipelines;
2866            int_inst_window->tdp_stats.searchAc.access =
2867                core_params.issueW * core_params.num_pipelines;
2868
2869            int_inst_window->power_t.readOp.dynamic +=
2870                int_inst_window->local_result.power.readOp.dynamic *
2871                int_inst_window->tdp_stats.readAc.access +
2872                int_inst_window->local_result.power.searchOp.dynamic *
2873                int_inst_window->tdp_stats.searchAc.access +
2874                int_inst_window->local_result.power.writeOp.dynamic *
2875                int_inst_window->tdp_stats.writeAc.access;
2876
2877            int_inst_window->rtp_stats.readAc.access =
2878                core_stats.int_instructions + core_stats.fp_instructions;
2879            int_inst_window->rtp_stats.writeAc.access =
2880                core_stats.int_instructions + core_stats.fp_instructions;
2881            int_inst_window->rtp_stats.searchAc.access =
2882                2 * (core_stats.int_instructions + core_stats.fp_instructions);
2883
2884            int_inst_window->rt_power.readOp.dynamic  +=
2885                int_inst_window->local_result.power.readOp.dynamic *
2886                int_inst_window->rtp_stats.readAc.access +
2887                int_inst_window->local_result.power.searchOp.dynamic *
2888                int_inst_window->rtp_stats.searchAc.access +
2889                int_inst_window->local_result.power.writeOp.dynamic *
2890                int_inst_window->rtp_stats.writeAc.access;
2891        }
2892    }
2893
2894    if (fp_inst_window) {
2895        fp_inst_window->tdp_stats.reset();
2896        fp_inst_window->tdp_stats.readAc.access =
2897            fp_inst_window->l_ip.num_rd_ports * core_params.num_fp_pipelines;
2898        fp_inst_window->tdp_stats.writeAc.access =
2899            fp_inst_window->l_ip.num_wr_ports * core_params.num_fp_pipelines;
2900        fp_inst_window->tdp_stats.searchAc.access =
2901            fp_inst_window->l_ip.num_search_ports *
2902            core_params.num_fp_pipelines;
2903
2904        fp_inst_window->rtp_stats.reset();
2905        fp_inst_window->rtp_stats.readAc.access =
2906            core_stats.fp_inst_window_reads;
2907        fp_inst_window->rtp_stats.writeAc.access =
2908            core_stats.fp_inst_window_writes;
2909        fp_inst_window->rtp_stats.searchAc.access =
2910            core_stats.fp_inst_window_wakeup_accesses;
2911
2912        fp_inst_window->power_t.reset();
2913        fp_inst_window->power_t.readOp.dynamic +=
2914            fp_inst_window->power.readOp.dynamic *
2915            fp_inst_window->tdp_stats.readAc.access +
2916            fp_inst_window->power.searchOp.dynamic *
2917            fp_inst_window->tdp_stats.searchAc.access +
2918            fp_inst_window->power.writeOp.dynamic *
2919            fp_inst_window->tdp_stats.writeAc.access;
2920
2921        fp_inst_window->rt_power.reset();
2922        fp_inst_window->rt_power.readOp.dynamic +=
2923            fp_inst_window->power.readOp.dynamic *
2924            fp_inst_window->rtp_stats.readAc.access +
2925            fp_inst_window->power.searchOp.dynamic *
2926            fp_inst_window->rtp_stats.searchAc.access +
2927            fp_inst_window->power.writeOp.dynamic *
2928            fp_inst_window->rtp_stats.writeAc.access;
2929    }
2930
2931    if (ROB) {
2932        ROB->tdp_stats.reset();
2933        ROB->tdp_stats.readAc.access = core_params.commitW *
2934            core_params.num_pipelines * ROB_duty_cycle;
2935        ROB->tdp_stats.writeAc.access = core_params.issueW *
2936            core_params.num_pipelines * ROB_duty_cycle;
2937        ROB->rtp_stats.reset();
2938        ROB->rtp_stats.readAc.access = core_stats.ROB_reads;
2939        ROB->rtp_stats.writeAc.access = core_stats.ROB_writes;
2940        ROB->power_t.reset();
2941        ROB->power_t.readOp.dynamic +=
2942            ROB->local_result.power.readOp.dynamic *
2943            ROB->tdp_stats.readAc.access +
2944            ROB->local_result.power.writeOp.dynamic *
2945            ROB->tdp_stats.writeAc.access;
2946        ROB->rt_power.reset();
2947        ROB->rt_power.readOp.dynamic +=
2948            ROB->local_result.power.readOp.dynamic *
2949            ROB->rtp_stats.readAc.access +
2950            ROB->local_result.power.writeOp.dynamic *
2951            ROB->rtp_stats.writeAc.access;
2952    }
2953
2954    output_data.reset();
2955    if (int_inst_window) {
2956        int_inst_window->output_data.subthreshold_leakage_power =
2957            int_inst_window->power_t.readOp.leakage;
2958        int_inst_window->output_data.gate_leakage_power =
2959            int_inst_window->power_t.readOp.gate_leakage;
2960        int_inst_window->output_data.peak_dynamic_power =
2961            int_inst_window->power_t.readOp.dynamic * clockRate;
2962        int_inst_window->output_data.runtime_dynamic_energy =
2963            int_inst_window->rt_power.readOp.dynamic;
2964        output_data += int_inst_window->output_data;
2965    }
2966    if (fp_inst_window) {
2967        fp_inst_window->output_data.subthreshold_leakage_power =
2968            fp_inst_window->power_t.readOp.leakage;
2969        fp_inst_window->output_data.gate_leakage_power =
2970            fp_inst_window->power_t.readOp.gate_leakage;
2971        fp_inst_window->output_data.peak_dynamic_power =
2972            fp_inst_window->power_t.readOp.dynamic * clockRate;
2973        fp_inst_window->output_data.runtime_dynamic_energy =
2974            fp_inst_window->rt_power.readOp.dynamic;
2975        output_data += fp_inst_window->output_data;
2976    }
2977    if (ROB) {
2978        ROB->output_data.peak_dynamic_power =
2979            ROB->power_t.readOp.dynamic * clockRate;
2980        ROB->output_data.runtime_dynamic_energy =
2981            ROB->rt_power.readOp.dynamic;
2982        output_data += ROB->output_data;
2983    }
2984
2985    // Integer and FP instruction selection logic is not included in the
2986    // roll-up due to the uninitialized area
2987    /*
2988    if (int_instruction_selection) {
2989        output_data += int_instruction_selection->output_data;
2990    }
2991    if (fp_instruction_selection) {
2992        output_data += fp_instruction_selection->output_data;
2993    }
2994    */
2995}
2996
2997void SchedulerU::displayData(uint32_t indent, int plevel) {
2998    if (!exist) return;
2999
3000    McPATComponent::displayData(indent, plevel);
3001
3002    if (core_params.core_ty == OOO) {
3003        int_inst_window->displayData(indent + 4, plevel);
3004        fp_inst_window->displayData(indent + 4, plevel);
3005        if (core_params.ROB_size > 0) {
3006            ROB->displayData(indent + 4, plevel);
3007        }
3008    } else if (core_params.multithreaded) {
3009        int_inst_window->displayData(indent + 4, plevel);
3010    }
3011
3012    // Integer and FP instruction selection logic is not included in the
3013    // roll-up due to the uninitialized area
3014    /*
3015    if (int_instruction_selection) {
3016        int_instruction_selection->displayData(indent + 4, plevel);
3017    }
3018    if (fp_instruction_selection) {
3019        fp_instruction_selection->displayData(indent + 4, plevel);
3020    }
3021    */
3022}
3023
3024void LoadStoreU::computeEnergy() {
3025    if (!exist) return;
3026
3027    LSQ->tdp_stats.reset();
3028    LSQ->tdp_stats.readAc.access = LSQ->l_ip.num_search_ports *
3029        core_stats.LSU_duty_cycle;
3030    LSQ->tdp_stats.writeAc.access = LSQ->l_ip.num_search_ports *
3031        core_stats.LSU_duty_cycle;
3032    LSQ->rtp_stats.reset();
3033    // Flush overhead conidered
3034    LSQ->rtp_stats.readAc.access  = (core_stats.load_instructions +
3035                                     core_stats.store_instructions) * 2;
3036    LSQ->rtp_stats.writeAc.access = (core_stats.load_instructions +
3037                                     core_stats.store_instructions) * 2;
3038    LSQ->power_t.reset();
3039    //every memory access invloves at least two operations on LSQ
3040    LSQ->power_t.readOp.dynamic += LSQ->tdp_stats.readAc.access *
3041        (LSQ->local_result.power.searchOp.dynamic +
3042         LSQ->local_result.power.readOp.dynamic) +
3043        LSQ->tdp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic;
3044    LSQ->rt_power.reset();
3045    //every memory access invloves at least two operations on LSQ
3046    LSQ->rt_power.readOp.dynamic += LSQ->rtp_stats.readAc.access *
3047        (LSQ->local_result.power.searchOp.dynamic +
3048         LSQ->local_result.power.readOp.dynamic) +
3049        LSQ->rtp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic;
3050
3051    if (LoadQ) {
3052        LoadQ->tdp_stats.reset();
3053        LoadQ->tdp_stats.readAc.access = LoadQ->l_ip.num_search_ports *
3054            core_stats.LSU_duty_cycle;
3055        LoadQ->tdp_stats.writeAc.access = LoadQ->l_ip.num_search_ports *
3056            core_stats.LSU_duty_cycle;
3057        LoadQ->rtp_stats.reset();
3058        LoadQ->rtp_stats.readAc.access = core_stats.load_instructions +
3059            core_stats.store_instructions;
3060        LoadQ->rtp_stats.writeAc.access = core_stats.load_instructions +
3061            core_stats.store_instructions;
3062        LoadQ->power_t.reset();
3063        //every memory access invloves at least two operations on LoadQ
3064        LoadQ->power_t.readOp.dynamic +=
3065            LoadQ->tdp_stats.readAc.access *
3066            (LoadQ->local_result.power.searchOp.dynamic +
3067             LoadQ->local_result.power.readOp.dynamic) +
3068            LoadQ->tdp_stats.writeAc.access *
3069            LoadQ->local_result.power.writeOp.dynamic;
3070        LoadQ->rt_power.reset();
3071        //every memory access invloves at least two operations on LoadQ
3072        LoadQ->rt_power.readOp.dynamic += LoadQ->rtp_stats.readAc.access *
3073            (LoadQ->local_result.power.searchOp.dynamic +
3074             LoadQ->local_result.power.readOp.dynamic) +
3075            LoadQ->rtp_stats.writeAc.access *
3076            LoadQ->local_result.power.writeOp.dynamic;
3077    }
3078
3079    McPATComponent::computeEnergy();
3080
3081    output_data.reset();
3082    if (dcache) {
3083        output_data += dcache->output_data;
3084    }
3085    if (LSQ) {
3086        LSQ->output_data.peak_dynamic_power =
3087            LSQ->power_t.readOp.dynamic * clockRate;
3088        LSQ->output_data.runtime_dynamic_energy = LSQ->rt_power.readOp.dynamic;
3089        output_data += LSQ->output_data;
3090    }
3091    if (LoadQ) {
3092        LoadQ->output_data.peak_dynamic_power =
3093            LoadQ->power_t.readOp.dynamic * clockRate;
3094        LoadQ->output_data.runtime_dynamic_energy =
3095            LoadQ->rt_power.readOp.dynamic;
3096        output_data += LoadQ->output_data;
3097    }
3098}
3099
3100void LoadStoreU::displayData(uint32_t indent, int plevel) {
3101    if (!exist) return;
3102
3103    McPATComponent::displayData(indent, plevel);
3104
3105    if (LoadQ) {
3106        LoadQ->displayData(indent + 4, plevel);
3107    }
3108    LSQ->displayData(indent + 4, plevel);
3109
3110}
3111
3112void MemManU::computeEnergy() {
3113    if (!exist) return;
3114
3115    itlb->tdp_stats.reset();
3116    itlb->tdp_stats.readAc.access = itlb->l_ip.num_search_ports;
3117    itlb->tdp_stats.readAc.miss = 0;
3118    itlb->tdp_stats.readAc.hit = itlb->tdp_stats.readAc.access -
3119        itlb->tdp_stats.readAc.miss;
3120    itlb->rtp_stats.reset();
3121    itlb->rtp_stats.readAc.access = mem_man_stats.itlb_total_accesses;
3122    itlb->rtp_stats.writeAc.access = mem_man_stats.itlb_total_misses;
3123
3124    itlb->power_t.reset();
3125    //FA spent most power in tag, so use total access not hits
3126    itlb->power_t.readOp.dynamic += itlb->tdp_stats.readAc.access *
3127        itlb->local_result.power.searchOp.dynamic +
3128        itlb->tdp_stats.readAc.miss *
3129        itlb->local_result.power.writeOp.dynamic;
3130    itlb->rt_power.reset();
3131    //FA spent most power in tag, so use total access not hits
3132    itlb->rt_power.readOp.dynamic += itlb->rtp_stats.readAc.access *
3133        itlb->local_result.power.searchOp.dynamic +
3134        itlb->rtp_stats.writeAc.access *
3135        itlb->local_result.power.writeOp.dynamic;
3136
3137    dtlb->tdp_stats.reset();
3138    dtlb->tdp_stats.readAc.access = dtlb->l_ip.num_search_ports *
3139        core_stats.LSU_duty_cycle;
3140    dtlb->tdp_stats.readAc.miss = 0;
3141    dtlb->tdp_stats.readAc.hit = dtlb->tdp_stats.readAc.access -
3142        dtlb->tdp_stats.readAc.miss;
3143    dtlb->rtp_stats.reset();
3144    dtlb->rtp_stats.readAc.access = mem_man_stats.dtlb_read_accesses +
3145        mem_man_stats.dtlb_write_misses;
3146    dtlb->rtp_stats.writeAc.access = mem_man_stats.dtlb_write_accesses +
3147        mem_man_stats.dtlb_read_misses;
3148
3149    dtlb->power_t.reset();
3150    //FA spent most power in tag, so use total access not hits
3151    dtlb->power_t.readOp.dynamic += dtlb->tdp_stats.readAc.access *
3152        dtlb->local_result.power.searchOp.dynamic +
3153        dtlb->tdp_stats.readAc.miss *
3154        dtlb->local_result.power.writeOp.dynamic;
3155    dtlb->rt_power.reset();
3156    //FA spent most power in tag, so use total access not hits
3157    dtlb->rt_power.readOp.dynamic += dtlb->rtp_stats.readAc.access *
3158        dtlb->local_result.power.searchOp.dynamic +
3159        dtlb->rtp_stats.writeAc.access *
3160        dtlb->local_result.power.writeOp.dynamic;
3161
3162    output_data.reset();
3163    if (itlb) {
3164        itlb->output_data.peak_dynamic_power = itlb->power_t.readOp.dynamic *
3165            clockRate;
3166        itlb->output_data.runtime_dynamic_energy =
3167            itlb->rt_power.readOp.dynamic;
3168        output_data += itlb->output_data;
3169    }
3170    if (dtlb) {
3171        dtlb->output_data.peak_dynamic_power =
3172            dtlb->power_t.readOp.dynamic * clockRate;
3173        dtlb->output_data.runtime_dynamic_energy =
3174            dtlb->rt_power.readOp.dynamic;
3175        output_data += dtlb->output_data;
3176    }
3177}
3178
3179void MemManU::displayData(uint32_t indent, int plevel) {
3180    if (!exist) return;
3181
3182    McPATComponent::displayData(indent, plevel);
3183
3184    itlb->displayData(indent + 4, plevel);
3185    dtlb->displayData(indent + 4, plevel);
3186}
3187
3188void RegFU::computeEnergy() {
3189    /*
3190     * Architecture RF and physical RF cannot be present at the same time.
3191     * Therefore, the RF stats can only refer to either ARF or PRF;
3192     * And the same stats can be used for both.
3193     */
3194    if (!exist) return;
3195
3196    IRF->tdp_stats.reset();
3197    IRF->tdp_stats.readAc.access =
3198        core_params.issueW * NUM_INT_INST_SOURCE_OPERANDS *
3199        (core_stats.ALU_duty_cycle * 1.1 +
3200         (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) *
3201        core_params.num_pipelines;
3202    IRF->tdp_stats.writeAc.access =
3203        core_params.issueW *
3204        (core_stats.ALU_duty_cycle * 1.1 +
3205         (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) *
3206        core_params.num_pipelines;
3207    IRF->rtp_stats.reset();
3208    IRF->rtp_stats.readAc.access  = core_stats.int_regfile_reads;
3209    IRF->rtp_stats.writeAc.access  = core_stats.int_regfile_writes;
3210    if (core_params.regWindowing) {
3211        IRF->rtp_stats.readAc.access += core_stats.function_calls *
3212            RFWIN_ACCESS_MULTIPLIER;
3213        IRF->rtp_stats.writeAc.access += core_stats.function_calls *
3214            RFWIN_ACCESS_MULTIPLIER;
3215    }
3216    IRF->power_t.reset();
3217    IRF->power_t.readOp.dynamic += IRF->tdp_stats.readAc.access *
3218        IRF->local_result.power.readOp.dynamic +
3219        IRF->tdp_stats.writeAc.access *
3220        IRF->local_result.power.writeOp.dynamic;
3221    IRF->rt_power.reset();
3222    IRF->rt_power.readOp.dynamic +=
3223        IRF->rtp_stats.readAc.access * IRF->local_result.power.readOp.dynamic +
3224        IRF->rtp_stats.writeAc.access * IRF->local_result.power.writeOp.dynamic;
3225
3226    FRF->tdp_stats.reset();
3227    FRF->tdp_stats.readAc.access  =
3228        FRF->l_ip.num_rd_ports * core_stats.FPU_duty_cycle * 1.05 *
3229        core_params.num_fp_pipelines;
3230    FRF->tdp_stats.writeAc.access  =
3231        FRF->l_ip.num_wr_ports * core_stats.FPU_duty_cycle * 1.05 *
3232        core_params.num_fp_pipelines;
3233    FRF->rtp_stats.reset();
3234    FRF->rtp_stats.readAc.access = core_stats.float_regfile_reads;
3235    FRF->rtp_stats.writeAc.access = core_stats.float_regfile_writes;
3236    if (core_params.regWindowing) {
3237        FRF->rtp_stats.readAc.access += core_stats.function_calls *
3238            RFWIN_ACCESS_MULTIPLIER;
3239        FRF->rtp_stats.writeAc.access += core_stats.function_calls *
3240            RFWIN_ACCESS_MULTIPLIER;
3241    }
3242    FRF->power_t.reset();
3243    FRF->power_t.readOp.dynamic +=
3244        FRF->tdp_stats.readAc.access * FRF->local_result.power.readOp.dynamic +
3245        FRF->tdp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic;
3246    FRF->rt_power.reset();
3247    FRF->rt_power.readOp.dynamic +=
3248        FRF->rtp_stats.readAc.access * FRF->local_result.power.readOp.dynamic +
3249        FRF->rtp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic;
3250
3251    if (core_params.regWindowing) {
3252        RFWIN->tdp_stats.reset();
3253        RFWIN->tdp_stats.readAc.access = 0;
3254        RFWIN->tdp_stats.writeAc.access = 0;
3255        RFWIN->rtp_stats.reset();
3256        RFWIN->rtp_stats.readAc.access =
3257            core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER;
3258        RFWIN->rtp_stats.writeAc.access =
3259            core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER;
3260        RFWIN->power_t.reset();
3261        RFWIN->power_t.readOp.dynamic +=
3262            RFWIN->tdp_stats.readAc.access *
3263            RFWIN->local_result.power.readOp.dynamic +
3264            RFWIN->tdp_stats.writeAc.access *
3265            RFWIN->local_result.power.writeOp.dynamic;
3266        RFWIN->rt_power.reset();
3267        RFWIN->rt_power.readOp.dynamic +=
3268            RFWIN->rtp_stats.readAc.access *
3269            RFWIN->local_result.power.readOp.dynamic +
3270            RFWIN->rtp_stats.writeAc.access *
3271            RFWIN->local_result.power.writeOp.dynamic;
3272    }
3273
3274    output_data.reset();
3275    if (IRF) {
3276        IRF->output_data.peak_dynamic_power =
3277            IRF->power_t.readOp.dynamic * clockRate;
3278        IRF->output_data.subthreshold_leakage_power *=
3279            core_params.num_hthreads;
3280        IRF->output_data.gate_leakage_power *= core_params.num_hthreads;
3281        IRF->output_data.runtime_dynamic_energy = IRF->rt_power.readOp.dynamic;
3282        output_data += IRF->output_data;
3283    }
3284    if (FRF) {
3285        FRF->output_data.peak_dynamic_power =
3286            FRF->power_t.readOp.dynamic * clockRate;
3287        FRF->output_data.subthreshold_leakage_power *=
3288            core_params.num_hthreads;
3289        FRF->output_data.gate_leakage_power *= core_params.num_hthreads;
3290        FRF->output_data.runtime_dynamic_energy = FRF->rt_power.readOp.dynamic;
3291        output_data += FRF->output_data;
3292    }
3293    if (RFWIN) {
3294        RFWIN->output_data.peak_dynamic_power =
3295            RFWIN->power_t.readOp.dynamic * clockRate;
3296        RFWIN->output_data.runtime_dynamic_energy =
3297            RFWIN->rt_power.readOp.dynamic;
3298        output_data += RFWIN->output_data;
3299    }
3300}
3301
3302void RegFU::displayData(uint32_t indent, int plevel) {
3303    if (!exist) return;
3304
3305    McPATComponent::displayData(indent, plevel);
3306
3307    IRF->displayData(indent + 4, plevel);
3308    FRF->displayData(indent + 4, plevel);
3309    if (core_params.regWindowing) {
3310        RFWIN->displayData(indent + 4, plevel);
3311    }
3312}
3313
3314void EXECU::computeEnergy() {
3315    if (!exist) return;
3316
3317    int_bypass->set_params_stats(core_params.execu_int_bypass_ports,
3318                                 core_stats.ALU_cdb_duty_cycle,
3319                                 core_stats.cdb_alu_accesses);
3320
3321    intTagBypass->set_params_stats(core_params.execu_int_bypass_ports,
3322                                   core_stats.ALU_cdb_duty_cycle,
3323                                   core_stats.cdb_alu_accesses);
3324
3325    if (core_params.num_muls > 0) {
3326        int_mul_bypass->set_params_stats(core_params.execu_mul_bypass_ports,
3327                                         core_stats.MUL_cdb_duty_cycle,
3328                                         core_stats.cdb_mul_accesses);
3329
3330        intTag_mul_Bypass->set_params_stats(core_params.execu_mul_bypass_ports,
3331                                            core_stats.MUL_cdb_duty_cycle,
3332                                            core_stats.cdb_mul_accesses);
3333    }
3334
3335    if (core_params.num_fpus > 0) {
3336        fp_bypass->set_params_stats(core_params.execu_fp_bypass_ports,
3337                                    core_stats.FPU_cdb_duty_cycle,
3338                                    core_stats.cdb_fpu_accesses);
3339
3340        fpTagBypass->set_params_stats(core_params.execu_fp_bypass_ports,
3341                                      core_stats.FPU_cdb_duty_cycle,
3342                                      core_stats.cdb_fpu_accesses);
3343    }
3344
3345    McPATComponent::computeEnergy();
3346
3347    if (rfu) {
3348        rfu->computeEnergy();
3349        output_data += rfu->output_data;
3350    }
3351    if (scheu) {
3352        scheu->computeEnergy();
3353        output_data += scheu->output_data;
3354    }
3355    if (fp_u) {
3356        fp_u->computeEnergy();
3357        output_data += fp_u->output_data;
3358    }
3359    if (exeu) {
3360        exeu->computeEnergy();
3361        output_data += exeu->output_data;
3362    }
3363    if (mul) {
3364        mul->computeEnergy();
3365        output_data += mul->output_data;
3366    }
3367}
3368
3369void EXECU::displayData(uint32_t indent, int plevel) {
3370    if (!exist) return;
3371
3372    McPATComponent::displayData(indent, plevel);
3373
3374    rfu->displayData(indent + 4, plevel);
3375    if (scheu) {
3376        scheu->displayData(indent + 4, plevel);
3377    }
3378    exeu->displayData(indent + 4, plevel);
3379    if (core_params.num_fpus > 0) {
3380        fp_u->displayData(indent + 4, plevel);
3381    }
3382    if (core_params.num_muls > 0) {
3383        mul->displayData(indent + 4, plevel);
3384    }
3385}
3386
3387void Core::computeEnergy() {
3388    ifu->computeEnergy();
3389    lsu->computeEnergy();
3390    mmu->computeEnergy();
3391    exu->computeEnergy();
3392    if (core_params.core_ty == OOO) {
3393        rnu->computeEnergy();
3394    }
3395
3396    output_data.reset();
3397    if (ifu) {
3398        output_data += ifu->output_data;
3399    }
3400    if (lsu) {
3401        output_data += lsu->output_data;
3402    }
3403    if (mmu) {
3404        output_data += mmu->output_data;
3405    }
3406    if (exu) {
3407        output_data += exu->output_data;
3408    }
3409    if (rnu) {
3410        output_data += rnu->output_data;
3411    }
3412    if (corepipe) {
3413        output_data += corepipe->output_data;
3414    }
3415    if (undiffCore) {
3416        output_data += undiffCore->output_data;
3417    }
3418    if (l2cache) {
3419        output_data += l2cache->output_data;
3420    }
3421}
3422
3423InstFetchU ::~InstFetchU() {
3424
3425    if (!exist) return;
3426    if (IB) {
3427        delete IB;
3428        IB = NULL;
3429    }
3430    if (ID_inst) {
3431        delete ID_inst;
3432        ID_inst = NULL;
3433    }
3434    if (ID_operand) {
3435        delete ID_operand;
3436        ID_operand = NULL;
3437    }
3438    if (ID_misc) {
3439        delete ID_misc;
3440        ID_misc = NULL;
3441    }
3442    if (core_params.predictionW > 0) {
3443        if (BTB) {
3444            delete BTB;
3445            BTB = NULL;
3446        }
3447        if (BPT) {
3448            delete BPT;
3449            BPT = NULL;
3450        }
3451    }
3452    if (icache) {
3453        delete icache;
3454    }
3455}
3456
3457BranchPredictor ::~BranchPredictor() {
3458
3459    if (!exist) return;
3460    if (globalBPT) {
3461        delete globalBPT;
3462        globalBPT = NULL;
3463    }
3464    if (localBPT) {
3465        delete localBPT;
3466        localBPT = NULL;
3467    }
3468    if (L1_localBPT) {
3469        delete L1_localBPT;
3470        L1_localBPT = NULL;
3471    }
3472    if (L2_localBPT) {
3473        delete L2_localBPT;
3474        L2_localBPT = NULL;
3475    }
3476    if (chooser) {
3477        delete chooser;
3478        chooser = NULL;
3479    }
3480    if (RAS) {
3481        delete RAS;
3482        RAS = NULL;
3483    }
3484}
3485
3486RENAMINGU ::~RENAMINGU() {
3487
3488    if (!exist) return;
3489    if (iFRAT) {
3490        delete iFRAT;
3491        iFRAT = NULL;
3492    }
3493    if (fFRAT) {
3494        delete fFRAT;
3495        fFRAT = NULL;
3496    }
3497    if (iRRAT) {
3498        delete iRRAT;
3499        iRRAT = NULL;
3500    }
3501    if (iFRAT) {
3502        delete iFRAT;
3503        iFRAT = NULL;
3504    }
3505    if (ifreeL) {
3506        delete ifreeL;
3507        ifreeL = NULL;
3508    }
3509    if (ffreeL) {
3510        delete ffreeL;
3511        ffreeL = NULL;
3512    }
3513    if (idcl) {
3514        delete idcl;
3515        idcl = NULL;
3516    }
3517    if (fdcl) {
3518        delete fdcl;
3519        fdcl = NULL;
3520    }
3521    if (RAHT) {
3522        delete RAHT;
3523        RAHT = NULL;
3524    }
3525}
3526
3527LoadStoreU ::~LoadStoreU() {
3528
3529    if (!exist) return;
3530    if (LSQ) {
3531        delete LSQ;
3532        LSQ = NULL;
3533    }
3534    if (dcache) {
3535        delete dcache;
3536        dcache = NULL;
3537    }
3538}
3539
3540MemManU ::~MemManU() {
3541
3542    if (!exist) return;
3543    if (itlb) {
3544        delete itlb;
3545        itlb = NULL;
3546    }
3547    if (dtlb) {
3548        delete dtlb;
3549        dtlb = NULL;
3550    }
3551}
3552
3553RegFU ::~RegFU() {
3554
3555    if (!exist) return;
3556    if (IRF) {
3557        delete IRF;
3558        IRF = NULL;
3559    }
3560    if (FRF) {
3561        delete FRF;
3562        FRF = NULL;
3563    }
3564    if (RFWIN) {
3565        delete RFWIN;
3566        RFWIN = NULL;
3567    }
3568}
3569
3570SchedulerU ::~SchedulerU() {
3571
3572    if (!exist) return;
3573    if (int_inst_window) {
3574        delete int_inst_window;
3575        int_inst_window = NULL;
3576    }
3577    if (fp_inst_window) {
3578        delete int_inst_window;
3579        int_inst_window = NULL;
3580    }
3581    if (ROB) {
3582        delete ROB;
3583        ROB = NULL;
3584    }
3585    if (int_instruction_selection) {
3586        delete int_instruction_selection;
3587        int_instruction_selection = NULL;
3588    }
3589    if (fp_instruction_selection) {
3590        delete fp_instruction_selection;
3591        fp_instruction_selection = NULL;
3592    }
3593}
3594
3595EXECU ::~EXECU() {
3596
3597    if (!exist) return;
3598    if (int_bypass) {
3599        delete int_bypass;
3600        int_bypass = NULL;
3601    }
3602    if (intTagBypass) {
3603        delete intTagBypass;
3604        intTagBypass = NULL;
3605    }
3606    if (int_mul_bypass) {
3607        delete int_mul_bypass;
3608        int_mul_bypass = NULL;
3609    }
3610    if (intTag_mul_Bypass) {
3611        delete intTag_mul_Bypass;
3612        intTag_mul_Bypass = NULL;
3613    }
3614    if (fp_bypass) {
3615        delete fp_bypass;
3616        fp_bypass = NULL;
3617    }
3618    if (fpTagBypass) {
3619        delete fpTagBypass;
3620        fpTagBypass = NULL;
3621    }
3622    if (fp_u) {
3623        delete fp_u;
3624        fp_u = NULL;
3625    }
3626    if (exeu) {
3627        delete exeu;
3628        exeu = NULL;
3629    }
3630    if (mul) {
3631        delete mul;
3632        mul = NULL;
3633    }
3634    if (rfu) {
3635        delete rfu;
3636        rfu = NULL;
3637    }
3638    if (scheu) {
3639        delete scheu;
3640        scheu = NULL;
3641    }
3642}
3643
3644Core::~Core() {
3645
3646    if (ifu) {
3647        delete ifu;
3648        ifu = NULL;
3649    }
3650    if (lsu) {
3651        delete lsu;
3652        lsu = NULL;
3653    }
3654    if (rnu) {
3655        delete rnu;
3656        rnu = NULL;
3657    }
3658    if (mmu) {
3659        delete mmu;
3660        mmu = NULL;
3661    }
3662    if (exu) {
3663        delete exu;
3664        exu = NULL;
3665    }
3666    if (corepipe) {
3667        delete corepipe;
3668        corepipe = NULL;
3669    }
3670    if (undiffCore) {
3671        delete undiffCore;
3672        undiffCore = NULL;
3673    }
3674    if (l2cache) {
3675        delete l2cache;
3676        l2cache = NULL;
3677    }
3678}
3679
3680void Core::initialize_params() {
3681    memset(&core_params, 0, sizeof(CoreParameters));
3682    core_params.peak_issueW = -1;
3683    core_params.peak_commitW = -1;
3684}
3685
3686void Core::initialize_stats() {
3687    memset(&core_stats, 0, sizeof(CoreStatistics));
3688    core_stats.IFU_duty_cycle = 1.0;
3689    core_stats.ALU_duty_cycle = 1.0;
3690    core_stats.FPU_duty_cycle = 1.0;
3691    core_stats.MUL_duty_cycle = 1.0;
3692    core_stats.ALU_cdb_duty_cycle = 1.0;
3693    core_stats.FPU_cdb_duty_cycle = 1.0;
3694    core_stats.MUL_cdb_duty_cycle = 1.0;
3695    core_stats.pipeline_duty_cycle = 1.0;
3696    core_stats.IFU_duty_cycle = 1.0;
3697    core_stats.LSU_duty_cycle = 1.0;
3698    core_stats.MemManU_D_duty_cycle = 1.0;
3699    core_stats.MemManU_I_duty_cycle = 1.0;
3700}
3701
3702void Core::set_core_param() {
3703    initialize_params();
3704    initialize_stats();
3705
3706    int num_children = xml_data->nChildNode("param");
3707    int i;
3708    for (i = 0; i < num_children; i++) {
3709        XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
3710        XMLCSTR node_name = paramNode->getAttribute("name");
3711        XMLCSTR value = paramNode->getAttribute("value");
3712
3713        if (!node_name)
3714            warnMissingParamName(paramNode->getAttribute("id"));
3715
3716        ASSIGN_STR_IF("name", name);
3717        ASSIGN_INT_IF("opt_local", core_params.opt_local);
3718        ASSIGN_FP_IF("clock_rate", core_params.clockRate);
3719        ASSIGN_INT_IF("instruction_length", core_params.instruction_length);
3720        ASSIGN_INT_IF("opcode_width", core_params.opcode_width);
3721        ASSIGN_INT_IF("x86", core_params.x86);
3722        ASSIGN_INT_IF("Embedded", core_params.Embedded);
3723        ASSIGN_ENUM_IF("machine_type", core_params.core_ty, Core_type);
3724        ASSIGN_INT_IF("micro_opcode_width", core_params.micro_opcode_length);
3725        ASSIGN_INT_IF("number_hardware_threads", core_params.num_hthreads);
3726        ASSIGN_INT_IF("fetch_width", core_params.fetchW);
3727        ASSIGN_INT_IF("decode_width", core_params.decodeW);
3728        ASSIGN_INT_IF("issue_width", core_params.issueW);
3729        ASSIGN_INT_IF("peak_issue_width", core_params.peak_issueW);
3730        ASSIGN_INT_IF("commit_width", core_params.commitW);
3731        ASSIGN_INT_IF("prediction_width", core_params.predictionW);
3732        ASSIGN_INT_IF("ALU_per_core", core_params.num_alus);
3733        ASSIGN_INT_IF("FPU_per_core", core_params.num_fpus);
3734        ASSIGN_INT_IF("MUL_per_core", core_params.num_muls);
3735        ASSIGN_INT_IF("fp_issue_width", core_params.fp_issueW);
3736        ASSIGN_ENUM_IF("instruction_window_scheme", core_params.scheu_ty,
3737                       Scheduler_type);
3738        ASSIGN_ENUM_IF("rename_scheme", core_params.rm_ty, Renaming_type);
3739        ASSIGN_INT_IF("archi_Regs_IRF_size", core_params.archi_Regs_IRF_size);
3740        ASSIGN_INT_IF("archi_Regs_FRF_size", core_params.archi_Regs_FRF_size);
3741        ASSIGN_INT_IF("ROB_size", core_params.ROB_size);
3742        ASSIGN_INT_IF("ROB_assoc", core_params.ROB_assoc);
3743        ASSIGN_INT_IF("ROB_nbanks", core_params.ROB_nbanks);
3744        ASSIGN_INT_IF("ROB_tag_width", core_params.ROB_tag_width);
3745        ASSIGN_INT_IF("scheduler_assoc", core_params.scheduler_assoc);
3746        ASSIGN_INT_IF("scheduler_nbanks", core_params.scheduler_nbanks);
3747        ASSIGN_INT_IF("register_window_size",
3748                      core_params.register_window_size);
3749        ASSIGN_INT_IF("register_window_throughput",
3750                      core_params.register_window_throughput);
3751        ASSIGN_INT_IF("register_window_latency",
3752                      core_params.register_window_latency);
3753        ASSIGN_INT_IF("register_window_assoc",
3754                      core_params.register_window_assoc);
3755        ASSIGN_INT_IF("register_window_nbanks",
3756                      core_params.register_window_nbanks);
3757        ASSIGN_INT_IF("register_window_tag_width",
3758                      core_params.register_window_tag_width);
3759        ASSIGN_INT_IF("register_window_rw_ports",
3760                      core_params.register_window_rw_ports);
3761        ASSIGN_INT_IF("phy_Regs_IRF_size", core_params.phy_Regs_IRF_size);
3762        ASSIGN_INT_IF("phy_Regs_IRF_assoc", core_params.phy_Regs_IRF_assoc);
3763        ASSIGN_INT_IF("phy_Regs_IRF_nbanks", core_params.phy_Regs_IRF_nbanks);
3764        ASSIGN_INT_IF("phy_Regs_IRF_tag_width",
3765                      core_params.phy_Regs_IRF_tag_width);
3766        ASSIGN_INT_IF("phy_Regs_IRF_rd_ports",
3767                      core_params.phy_Regs_IRF_rd_ports);
3768        ASSIGN_INT_IF("phy_Regs_IRF_wr_ports",
3769                      core_params.phy_Regs_IRF_wr_ports);
3770        ASSIGN_INT_IF("phy_Regs_FRF_size", core_params.phy_Regs_FRF_size);
3771        ASSIGN_INT_IF("phy_Regs_FRF_assoc", core_params.phy_Regs_FRF_assoc);
3772        ASSIGN_INT_IF("phy_Regs_FRF_nbanks", core_params.phy_Regs_FRF_nbanks);
3773        ASSIGN_INT_IF("phy_Regs_FRF_tag_width",
3774                      core_params.phy_Regs_FRF_tag_width);
3775        ASSIGN_INT_IF("phy_Regs_FRF_rd_ports",
3776                      core_params.phy_Regs_FRF_rd_ports);
3777        ASSIGN_INT_IF("phy_Regs_FRF_wr_ports",
3778                      core_params.phy_Regs_FRF_wr_ports);
3779        ASSIGN_INT_IF("front_rat_nbanks", core_params.front_rat_nbanks);
3780        ASSIGN_INT_IF("front_rat_rw_ports", core_params.front_rat_rw_ports);
3781        ASSIGN_INT_IF("retire_rat_nbanks", core_params.retire_rat_nbanks);
3782        ASSIGN_INT_IF("retire_rat_rw_ports", core_params.retire_rat_rw_ports);
3783        ASSIGN_INT_IF("freelist_nbanks", core_params.freelist_nbanks);
3784        ASSIGN_INT_IF("freelist_rw_ports", core_params.freelist_rw_ports);
3785        ASSIGN_INT_IF("memory_ports", core_params.memory_ports);
3786        ASSIGN_INT_IF("load_buffer_size", core_params.load_buffer_size);
3787        ASSIGN_INT_IF("load_buffer_assoc", core_params.load_buffer_assoc);
3788        ASSIGN_INT_IF("load_buffer_nbanks", core_params.load_buffer_nbanks);
3789        ASSIGN_INT_IF("store_buffer_size", core_params.store_buffer_size);
3790        ASSIGN_INT_IF("store_buffer_assoc", core_params.store_buffer_assoc);
3791        ASSIGN_INT_IF("store_buffer_nbanks", core_params.store_buffer_nbanks);
3792        ASSIGN_INT_IF("instruction_window_size",
3793                      core_params.instruction_window_size);
3794        ASSIGN_INT_IF("fp_instruction_window_size",
3795                      core_params.fp_instruction_window_size);
3796        ASSIGN_INT_IF("instruction_buffer_size",
3797                      core_params.instruction_buffer_size);
3798        ASSIGN_INT_IF("instruction_buffer_assoc",
3799                      core_params.instruction_buffer_assoc);
3800        ASSIGN_INT_IF("instruction_buffer_nbanks",
3801                      core_params.instruction_buffer_nbanks);
3802        ASSIGN_INT_IF("instruction_buffer_tag_width",
3803                      core_params.instruction_buffer_tag_width);
3804        ASSIGN_INT_IF("number_instruction_fetch_ports",
3805                      core_params.number_instruction_fetch_ports);
3806        ASSIGN_INT_IF("RAS_size", core_params.RAS_size);
3807        ASSIGN_ENUM_IF("execu_broadcast_wt", core_params.execu_broadcast_wt,
3808                       Wire_type);
3809        ASSIGN_INT_IF("execu_wire_mat_type", core_params.execu_wire_mat_type);
3810        ASSIGN_INT_IF("execu_int_bypass_ports",
3811                      core_params.execu_int_bypass_ports);
3812        ASSIGN_INT_IF("execu_mul_bypass_ports",
3813                      core_params.execu_mul_bypass_ports);
3814        ASSIGN_INT_IF("execu_fp_bypass_ports",
3815                      core_params.execu_fp_bypass_ports);
3816        ASSIGN_ENUM_IF("execu_bypass_wire_type",
3817                       core_params.execu_bypass_wire_type, Wire_type);
3818        ASSIGN_FP_IF("execu_bypass_base_width",
3819                     core_params.execu_bypass_base_width);
3820        ASSIGN_FP_IF("execu_bypass_base_height",
3821                     core_params.execu_bypass_base_height);
3822        ASSIGN_INT_IF("execu_bypass_start_wiring_level",
3823                      core_params.execu_bypass_start_wiring_level);
3824        ASSIGN_FP_IF("execu_bypass_route_over_perc",
3825                     core_params.execu_bypass_route_over_perc);
3826        ASSIGN_FP_IF("broadcast_numerator", core_params.broadcast_numerator);
3827        ASSIGN_INT_IF("int_pipeline_depth", core_params.pipeline_stages);
3828        ASSIGN_INT_IF("fp_pipeline_depth", core_params.fp_pipeline_stages);
3829        ASSIGN_INT_IF("int_pipelines", core_params.num_pipelines);
3830        ASSIGN_INT_IF("fp_pipelines", core_params.num_fp_pipelines);
3831        ASSIGN_INT_IF("globalCheckpoint", core_params.globalCheckpoint);
3832        ASSIGN_INT_IF("perThreadState", core_params.perThreadState);
3833        ASSIGN_INT_IF("instruction_length", core_params.instruction_length);
3834
3835        else {
3836            warnUnrecognizedParam(node_name);
3837        }
3838    }
3839
3840    // Change from MHz to Hz
3841    core_params.clockRate *= 1e6;
3842    clockRate = core_params.clockRate;
3843
3844    core_params.peak_commitW = core_params.peak_issueW;
3845    core_params.fp_decodeW = core_params.fp_issueW;
3846
3847
3848    num_children = xml_data->nChildNode("stat");
3849    for (i = 0; i < num_children; i++) {
3850        XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
3851        XMLCSTR node_name = statNode->getAttribute("name");
3852        XMLCSTR value = statNode->getAttribute("value");
3853
3854        if (!node_name)
3855            warnMissingStatName(statNode->getAttribute("id"));
3856
3857        ASSIGN_FP_IF("ALU_duty_cycle", core_stats.ALU_duty_cycle);
3858        ASSIGN_FP_IF("FPU_duty_cycle", core_stats.FPU_duty_cycle);
3859        ASSIGN_FP_IF("MUL_duty_cycle", core_stats.MUL_duty_cycle);
3860        ASSIGN_FP_IF("ALU_cdb_duty_cycle", core_stats.ALU_cdb_duty_cycle);
3861        ASSIGN_FP_IF("FPU_cdb_duty_cycle", core_stats.FPU_cdb_duty_cycle);
3862        ASSIGN_FP_IF("MUL_cdb_duty_cycle", core_stats.MUL_cdb_duty_cycle);
3863        ASSIGN_FP_IF("pipeline_duty_cycle", core_stats.pipeline_duty_cycle);
3864        ASSIGN_FP_IF("total_cycles", core_stats.total_cycles);
3865        ASSIGN_FP_IF("busy_cycles", core_stats.busy_cycles);
3866        ASSIGN_FP_IF("idle_cycles", core_stats.idle_cycles);
3867        ASSIGN_FP_IF("IFU_duty_cycle", core_stats.IFU_duty_cycle);
3868        ASSIGN_FP_IF("BR_duty_cycle", core_stats.BR_duty_cycle);
3869        ASSIGN_FP_IF("LSU_duty_cycle", core_stats.LSU_duty_cycle);
3870        ASSIGN_FP_IF("MemManU_D_duty_cycle", core_stats.MemManU_D_duty_cycle);
3871        ASSIGN_FP_IF("MemManU_I_duty_cycle", core_stats.MemManU_I_duty_cycle);
3872        ASSIGN_FP_IF("cdb_fpu_accesses", core_stats.cdb_fpu_accesses);
3873        ASSIGN_FP_IF("cdb_alu_accesses", core_stats.cdb_alu_accesses);
3874        ASSIGN_FP_IF("cdb_mul_accesses", core_stats.cdb_mul_accesses);
3875        ASSIGN_FP_IF("function_calls", core_stats.function_calls);
3876        ASSIGN_FP_IF("total_instructions", core_stats.total_instructions);
3877        ASSIGN_FP_IF("int_instructions", core_stats.int_instructions);
3878        ASSIGN_FP_IF("fp_instructions", core_stats.fp_instructions);
3879        ASSIGN_FP_IF("branch_instructions", core_stats.branch_instructions);
3880        ASSIGN_FP_IF("branch_mispredictions",
3881                     core_stats.branch_mispredictions);
3882        ASSIGN_FP_IF("load_instructions", core_stats.load_instructions);
3883        ASSIGN_FP_IF("store_instructions", core_stats.store_instructions);
3884        ASSIGN_FP_IF("committed_instructions",
3885                     core_stats.committed_instructions);
3886        ASSIGN_FP_IF("committed_int_instructions",
3887                     core_stats.committed_int_instructions);
3888        ASSIGN_FP_IF("committed_fp_instructions",
3889                     core_stats.committed_fp_instructions);
3890        ASSIGN_FP_IF("ROB_reads", core_stats.ROB_reads);
3891        ASSIGN_FP_IF("ROB_writes", core_stats.ROB_writes);
3892        ASSIGN_FP_IF("rename_reads", core_stats.rename_reads);
3893        ASSIGN_FP_IF("rename_writes", core_stats.rename_writes);
3894        ASSIGN_FP_IF("fp_rename_reads", core_stats.fp_rename_reads);
3895        ASSIGN_FP_IF("fp_rename_writes", core_stats.fp_rename_writes);
3896        ASSIGN_FP_IF("inst_window_reads", core_stats.inst_window_reads);
3897        ASSIGN_FP_IF("inst_window_writes", core_stats.inst_window_writes);
3898        ASSIGN_FP_IF("inst_window_wakeup_accesses",
3899                     core_stats.inst_window_wakeup_accesses);
3900        ASSIGN_FP_IF("fp_inst_window_reads", core_stats.fp_inst_window_reads);
3901        ASSIGN_FP_IF("fp_inst_window_writes",
3902                     core_stats.fp_inst_window_writes);
3903        ASSIGN_FP_IF("fp_inst_window_wakeup_accesses",
3904                     core_stats.fp_inst_window_wakeup_accesses);
3905        ASSIGN_FP_IF("int_regfile_reads", core_stats.int_regfile_reads);
3906        ASSIGN_FP_IF("float_regfile_reads", core_stats.float_regfile_reads);
3907        ASSIGN_FP_IF("int_regfile_writes", core_stats.int_regfile_writes);
3908        ASSIGN_FP_IF("float_regfile_writes", core_stats.float_regfile_writes);
3909        ASSIGN_FP_IF("context_switches", core_stats.context_switches);
3910        ASSIGN_FP_IF("ialu_accesses", core_stats.ialu_accesses);
3911        ASSIGN_FP_IF("fpu_accesses", core_stats.fpu_accesses);
3912        ASSIGN_FP_IF("mul_accesses", core_stats.mul_accesses);
3913
3914        else {
3915            warnUnrecognizedStat(node_name);
3916        }
3917    }
3918
3919    // Initialize a few variables
3920    core_params.multithreaded = core_params.num_hthreads > 1 ? true : false;
3921    core_params.pc_width = virtual_address_width;
3922    core_params.v_address_width = virtual_address_width;
3923    core_params.p_address_width = physical_address_width;
3924    core_params.int_data_width = int(ceil(data_path_width / 32.0)) * 32;
3925    core_params.fp_data_width = core_params.int_data_width;
3926    core_params.arch_ireg_width =
3927        int(ceil(log2(core_params.archi_Regs_IRF_size)));
3928    core_params.arch_freg_width
3929        = int(ceil(log2(core_params.archi_Regs_FRF_size)));
3930    core_params.num_IRF_entry = core_params.archi_Regs_IRF_size;
3931    core_params.num_FRF_entry = core_params.archi_Regs_FRF_size;
3932
3933    if (core_params.instruction_length <= 0) {
3934        errorNonPositiveParam("instruction_length");
3935    }
3936
3937    if (core_params.num_hthreads <= 0) {
3938        errorNonPositiveParam("number_hardware_threads");
3939    }
3940
3941    if (core_params.opcode_width <= 0) {
3942        errorNonPositiveParam("opcode_width");
3943    }
3944
3945    if (core_params.instruction_buffer_size <= 0) {
3946        errorNonPositiveParam("instruction_buffer_size");
3947    }
3948
3949    if (core_params.number_instruction_fetch_ports <= 0) {
3950        errorNonPositiveParam("number_instruction_fetch_ports");
3951    }
3952
3953    if (core_params.peak_issueW <= 0) {
3954        errorNonPositiveParam("peak_issue_width");
3955    } else {
3956        assert(core_params.peak_commitW > 0);
3957    }
3958
3959    if (core_params.core_ty == OOO) {
3960        if (core_params.scheu_ty == PhysicalRegFile) {
3961            core_params.phy_ireg_width =
3962                int(ceil(log2(core_params.phy_Regs_IRF_size)));
3963            core_params.phy_freg_width =
3964                int(ceil(log2(core_params.phy_Regs_FRF_size)));
3965            core_params.num_ifreelist_entries =
3966                core_params.num_IRF_entry = core_params.phy_Regs_IRF_size;
3967            core_params.num_ffreelist_entries =
3968                core_params.num_FRF_entry = core_params.phy_Regs_FRF_size;
3969        } else if (core_params.scheu_ty == ReservationStation) {
3970            core_params.phy_ireg_width = int(ceil(log2(core_params.ROB_size)));
3971            core_params.phy_freg_width = int(ceil(log2(core_params.ROB_size)));
3972            core_params.num_ifreelist_entries = core_params.ROB_size;
3973            core_params.num_ffreelist_entries = core_params.ROB_size;
3974        }
3975    }
3976
3977    core_params.regWindowing =
3978        (core_params.register_window_size > 0 &&
3979         core_params.core_ty == Inorder) ? true : false;
3980
3981    if (core_params.regWindowing) {
3982        if (core_params.register_window_throughput <= 0) {
3983            errorNonPositiveParam("register_window_throughput");
3984        } else if (core_params.register_window_latency <= 0) {
3985            errorNonPositiveParam("register_window_latency");
3986        }
3987    }
3988
3989    set_pppm(core_params.pppm_lkg_multhread, 0, core_params.num_hthreads,
3990             core_params.num_hthreads, 0);
3991
3992    if (!((core_params.core_ty == OOO) || (core_params.core_ty == Inorder))) {
3993        cout << "Invalid Core Type" << endl;
3994        exit(0);
3995    }
3996
3997    if (!((core_params.scheu_ty == PhysicalRegFile) ||
3998          (core_params.scheu_ty == ReservationStation))) {
3999        cout << "Invalid OOO Scheduler Type" << endl;
4000        exit(0);
4001    }
4002
4003    if (!((core_params.rm_ty == RAMbased) ||
4004          (core_params.rm_ty == CAMbased))) {
4005        cout << "Invalid OOO Renaming Type" << endl;
4006        exit(0);
4007    }
4008
4009}
4010