110152Satgutier@umich.edu/*****************************************************************************
210152Satgutier@umich.edu *                                McPAT
310152Satgutier@umich.edu *                      SOFTWARE LICENSE AGREEMENT
410152Satgutier@umich.edu *            Copyright 2012 Hewlett-Packard Development Company, L.P.
510234Syasuko.eckert@amd.com *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
610152Satgutier@umich.edu *                          All Rights Reserved
710152Satgutier@umich.edu *
810152Satgutier@umich.edu * Redistribution and use in source and binary forms, with or without
910152Satgutier@umich.edu * modification, are permitted provided that the following conditions are
1010152Satgutier@umich.edu * met: redistributions of source code must retain the above copyright
1110152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer;
1210152Satgutier@umich.edu * redistributions in binary form must reproduce the above copyright
1310152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer in the
1410152Satgutier@umich.edu * documentation and/or other materials provided with the distribution;
1510152Satgutier@umich.edu * neither the name of the copyright holders nor the names of its
1610152Satgutier@umich.edu * contributors may be used to endorse or promote products derived from
1710152Satgutier@umich.edu * this software without specific prior written permission.
1810152Satgutier@umich.edu
1910152Satgutier@umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2010152Satgutier@umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2110152Satgutier@umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2210152Satgutier@umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2310152Satgutier@umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2410152Satgutier@umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2510152Satgutier@umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2610152Satgutier@umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2710152Satgutier@umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2810152Satgutier@umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2910234Syasuko.eckert@amd.com * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3010152Satgutier@umich.edu *
3110152Satgutier@umich.edu ***************************************************************************/
3210152Satgutier@umich.edu
3310234Syasuko.eckert@amd.com#include "common.h"
3410152Satgutier@umich.edu#include "logic.h"
3510152Satgutier@umich.edu
3610152Satgutier@umich.edu//selection_logic
3710234Syasuko.eckert@amd.comselection_logic::selection_logic(XMLNode* _xml_data, bool _is_default,
3810234Syasuko.eckert@amd.com                                 int _win_entries, int issue_width_,
3910234Syasuko.eckert@amd.com                                 const InputParameter *configure_interface,
4010234Syasuko.eckert@amd.com                                 string _name, double _accesses,
4110234Syasuko.eckert@amd.com                                 double clockRate_, enum Device_ty device_ty_,
4210234Syasuko.eckert@amd.com                                 enum Core_type core_ty_)
4310234Syasuko.eckert@amd.com    : McPATComponent(_xml_data), is_default(_is_default),
4410234Syasuko.eckert@amd.com      win_entries(_win_entries),
4510234Syasuko.eckert@amd.com      issue_width(issue_width_),
4610234Syasuko.eckert@amd.com      accesses(_accesses),
4710234Syasuko.eckert@amd.com      device_ty(device_ty_),
4810234Syasuko.eckert@amd.com      core_ty(core_ty_) {
4910234Syasuko.eckert@amd.com    clockRate = clockRate_;
5010234Syasuko.eckert@amd.com    name = _name;
5110234Syasuko.eckert@amd.com    l_ip = *configure_interface;
5210234Syasuko.eckert@amd.com    local_result = init_interface(&l_ip, name);
5310152Satgutier@umich.edu}
5410152Satgutier@umich.edu
5510234Syasuko.eckert@amd.comvoid selection_logic::computeArea() {
5610234Syasuko.eckert@amd.com    output_data.area = local_result.area;
5710234Syasuko.eckert@amd.com}
5810234Syasuko.eckert@amd.com
5910234Syasuko.eckert@amd.comvoid selection_logic::computeEnergy() {
6010234Syasuko.eckert@amd.com    //based on cost effective superscalar processor TR pp27-31
6110234Syasuko.eckert@amd.com    double Ctotal, Cor, Cpencode;
6210234Syasuko.eckert@amd.com    int num_arbiter;
6310234Syasuko.eckert@amd.com    double WSelORn, WSelORprequ, WSelPn, WSelPp, WSelEnn, WSelEnp;
6410234Syasuko.eckert@amd.com
6510234Syasuko.eckert@amd.com    //the 0.8um process data is used.
6610234Syasuko.eckert@amd.com    //this was 10 micron for the 0.8 micron process
6710234Syasuko.eckert@amd.com    WSelORn	= 12.5 * l_ip.F_sz_um;
6810234Syasuko.eckert@amd.com    //this was 40 micron for the 0.8 micron process
6910234Syasuko.eckert@amd.com    WSelORprequ = 50 * l_ip.F_sz_um;
7010234Syasuko.eckert@amd.com    //this was 10mcron for the 0.8 micron process
7110234Syasuko.eckert@amd.com    WSelPn = 12.5 * l_ip.F_sz_um;
7210234Syasuko.eckert@amd.com    //this was 15 micron for the 0.8 micron process
7310234Syasuko.eckert@amd.com    WSelPp = 18.75 * l_ip.F_sz_um;
7410234Syasuko.eckert@amd.com    //this was 5 micron for the 0.8 micron process
7510234Syasuko.eckert@amd.com    WSelEnn	= 6.25 * l_ip.F_sz_um;
7610234Syasuko.eckert@amd.com    //this was 10 micron for the 0.8 micron process
7710234Syasuko.eckert@amd.com    WSelEnp	= 12.5 * l_ip.F_sz_um;
7810234Syasuko.eckert@amd.com
7910234Syasuko.eckert@amd.com    Ctotal = 0;
8010234Syasuko.eckert@amd.com    num_arbiter = 1;
8110234Syasuko.eckert@amd.com    while (win_entries > 4) {
8210234Syasuko.eckert@amd.com        win_entries = (int)ceil((double)win_entries / 4.0);
8310234Syasuko.eckert@amd.com        num_arbiter += win_entries;
8410234Syasuko.eckert@amd.com    }
8510234Syasuko.eckert@amd.com    //the 4-input OR logic to generate anyreq
8610234Syasuko.eckert@amd.com    Cor = 4 * drain_C_(WSelORn, NCH, 1, 1, g_tp.cell_h_def) +
8710234Syasuko.eckert@amd.com        drain_C_(WSelORprequ, PCH, 1, 1, g_tp.cell_h_def);
8810234Syasuko.eckert@amd.com    power.readOp.gate_leakage =
8910234Syasuko.eckert@amd.com        cmos_Ig_leakage(WSelORn, WSelORprequ, 4, nor) * g_tp.peri_global.Vdd;
9010234Syasuko.eckert@amd.com
9110234Syasuko.eckert@amd.com    //The total capacity of the 4-bit priority encoder
9210234Syasuko.eckert@amd.com    Cpencode = drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) +
9310234Syasuko.eckert@amd.com        drain_C_(WSelPp, PCH, 1, 1, g_tp.cell_h_def) +
9410234Syasuko.eckert@amd.com        2 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) +
9510234Syasuko.eckert@amd.com        drain_C_(WSelPp, PCH, 2, 1, g_tp.cell_h_def) +
9610234Syasuko.eckert@amd.com        3 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) +
9710234Syasuko.eckert@amd.com        drain_C_(WSelPp, PCH, 3, 1, g_tp.cell_h_def) +
9810234Syasuko.eckert@amd.com        4 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) +
9910234Syasuko.eckert@amd.com        drain_C_(WSelPp, PCH, 4, 1, g_tp.cell_h_def) +//precompute priority logic
10010234Syasuko.eckert@amd.com        2 * 4 * gate_C(WSelEnn + WSelEnp, 20.0) +
10110234Syasuko.eckert@amd.com        4 * drain_C_(WSelEnn, NCH, 1, 1, g_tp.cell_h_def) +
10210234Syasuko.eckert@amd.com        2 * 4 * drain_C_(WSelEnp, PCH, 1, 1, g_tp.cell_h_def) +//enable logic
10310234Syasuko.eckert@amd.com        (2 * 4 + 2 * 3 + 2 * 2 + 2) *
10410234Syasuko.eckert@amd.com        gate_C(WSelPn + WSelPp, 10.0);//requests signal
10510234Syasuko.eckert@amd.com
10610234Syasuko.eckert@amd.com    Ctotal += issue_width * num_arbiter * (Cor + Cpencode);
10710234Syasuko.eckert@amd.com
10810234Syasuko.eckert@amd.com    //2 means the abitration signal need to travel round trip
10910234Syasuko.eckert@amd.com    power.readOp.dynamic =
11010234Syasuko.eckert@amd.com        Ctotal * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 2;
11110234Syasuko.eckert@amd.com    power.readOp.leakage = issue_width * num_arbiter *
11210234Syasuko.eckert@amd.com        (cmos_Isub_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p
11310234Syasuko.eckert@amd.com         + cmos_Isub_leakage(WSelPn, WSelPp, 3, nor)//grant2p
11410234Syasuko.eckert@amd.com         + cmos_Isub_leakage(WSelPn, WSelPp, 4, nor)//grant3p
11510234Syasuko.eckert@amd.com         + cmos_Isub_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic
11610234Syasuko.eckert@amd.com         + cmos_Isub_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant sIsubnals
11710234Syasuko.eckert@amd.com            ) * g_tp.peri_global.Vdd;
11810234Syasuko.eckert@amd.com    power.readOp.gate_leakage = issue_width * num_arbiter *
11910234Syasuko.eckert@amd.com        (cmos_Ig_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p
12010234Syasuko.eckert@amd.com         + cmos_Ig_leakage(WSelPn, WSelPp, 3, nor)//grant2p
12110234Syasuko.eckert@amd.com         + cmos_Ig_leakage(WSelPn, WSelPp, 4, nor)//grant3p
12210234Syasuko.eckert@amd.com         + cmos_Ig_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic
12310234Syasuko.eckert@amd.com         + cmos_Ig_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant signals
12410234Syasuko.eckert@amd.com            ) * g_tp.peri_global.Vdd;
12510234Syasuko.eckert@amd.com    double sckRation = g_tp.sckt_co_eff;
12610234Syasuko.eckert@amd.com    power.readOp.dynamic *= sckRation;
12710234Syasuko.eckert@amd.com    power.writeOp.dynamic *= sckRation;
12810234Syasuko.eckert@amd.com    power.searchOp.dynamic *= sckRation;
12910234Syasuko.eckert@amd.com
13010234Syasuko.eckert@amd.com    double long_channel_device_reduction =
13110234Syasuko.eckert@amd.com        longer_channel_device_reduction(device_ty, core_ty);
13210234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage =
13310234Syasuko.eckert@amd.com        power.readOp.leakage * long_channel_device_reduction;
13410234Syasuko.eckert@amd.com
13510234Syasuko.eckert@amd.com    output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
13610234Syasuko.eckert@amd.com    output_data.subthreshold_leakage_power = power.readOp.leakage;
13710234Syasuko.eckert@amd.com    output_data.gate_leakage_power = power.readOp.gate_leakage;
13810234Syasuko.eckert@amd.com    output_data.runtime_dynamic_energy = power.readOp.dynamic * accesses;
13910234Syasuko.eckert@amd.com}
14010152Satgutier@umich.edu
14110152Satgutier@umich.edudep_resource_conflict_check::dep_resource_conflict_check(
14210234Syasuko.eckert@amd.com    XMLNode* _xml_data, const string _name,
14310234Syasuko.eckert@amd.com    const InputParameter *configure_interface,
14410234Syasuko.eckert@amd.com    const CoreParameters & dyn_p_, int compare_bits_,
14510234Syasuko.eckert@amd.com    double clockRate_, bool _is_default)
14610234Syasuko.eckert@amd.com    : McPATComponent(_xml_data), l_ip(*configure_interface),
14710234Syasuko.eckert@amd.com      coredynp(dyn_p_), compare_bits(compare_bits_), is_default(_is_default) {
14810152Satgutier@umich.edu
14910234Syasuko.eckert@amd.com    name = _name;
15010234Syasuko.eckert@amd.com    clockRate = clockRate_;
15110234Syasuko.eckert@amd.com    //this was 20.0 micron for the 0.8 micron process
15210234Syasuko.eckert@amd.com    Wcompn = 25 * l_ip.F_sz_um;
15310234Syasuko.eckert@amd.com    //this was 20.0 micron for the 0.8 micron process
15410234Syasuko.eckert@amd.com    Wevalinvp = 25 * l_ip.F_sz_um;
15510234Syasuko.eckert@amd.com    //this was 80.0 mcron for the 0.8 micron process
15610234Syasuko.eckert@amd.com    Wevalinvn = 100 * l_ip.F_sz_um;
15710234Syasuko.eckert@amd.com    //this was 40.0  micron for the 0.8 micron process
15810234Syasuko.eckert@amd.com    Wcomppreequ = 50 * l_ip.F_sz_um;
15910234Syasuko.eckert@amd.com    //this was 5.4 micron for the 0.8 micron process
16010234Syasuko.eckert@amd.com    WNORn =	6.75 * l_ip.F_sz_um;
16110234Syasuko.eckert@amd.com    //this was 30.5 micron for the 0.8 micron process
16210234Syasuko.eckert@amd.com    WNORp =	38.125 * l_ip.F_sz_um;
16310152Satgutier@umich.edu
16410234Syasuko.eckert@amd.com    // To make CACTI happy.
16510234Syasuko.eckert@amd.com    l_ip.cache_sz = MIN_BUFFER_SIZE;
16610234Syasuko.eckert@amd.com    local_result = init_interface(&l_ip, name);
16710152Satgutier@umich.edu
16810234Syasuko.eckert@amd.com    if (coredynp.core_ty == Inorder)
16910234Syasuko.eckert@amd.com        //TODO: opcode bits + log(shared resources) + REG TAG BITS -->
17010234Syasuko.eckert@amd.com        //opcode comparator
17110234Syasuko.eckert@amd.com        compare_bits += 16 + 8 + 8;
17210234Syasuko.eckert@amd.com    else
17310234Syasuko.eckert@amd.com        compare_bits += 16 + 8 + 8;
17410234Syasuko.eckert@amd.com
17510234Syasuko.eckert@amd.com    conflict_check_power();
17610234Syasuko.eckert@amd.com    double sckRation = g_tp.sckt_co_eff;
17710234Syasuko.eckert@amd.com    power.readOp.dynamic *= sckRation;
17810234Syasuko.eckert@amd.com    power.writeOp.dynamic *= sckRation;
17910234Syasuko.eckert@amd.com    power.searchOp.dynamic *= sckRation;
18010152Satgutier@umich.edu
18110152Satgutier@umich.edu}
18210152Satgutier@umich.edu
18310234Syasuko.eckert@amd.comvoid dep_resource_conflict_check::conflict_check_power() {
18410234Syasuko.eckert@amd.com    double Ctotal;
18510234Syasuko.eckert@amd.com    int num_comparators;
18610234Syasuko.eckert@amd.com    //2(N*N-N) is used for source to dest comparison, (N*N-N) is used for
18710234Syasuko.eckert@amd.com    //dest to dest comparision.
18810234Syasuko.eckert@amd.com    num_comparators = 3 * ((coredynp.decodeW) * (coredynp.decodeW) -
18910234Syasuko.eckert@amd.com                           coredynp.decodeW);
19010152Satgutier@umich.edu
19110234Syasuko.eckert@amd.com    Ctotal = num_comparators * compare_cap();
19210152Satgutier@umich.edu
19310234Syasuko.eckert@amd.com    power.readOp.dynamic = Ctotal * /*CLOCKRATE*/ g_tp.peri_global.Vdd *
19410234Syasuko.eckert@amd.com        g_tp.peri_global.Vdd /*AF*/;
19510234Syasuko.eckert@amd.com    power.readOp.leakage = num_comparators * compare_bits * 2 *
19610234Syasuko.eckert@amd.com        simplified_nmos_leakage(Wcompn,  false);
19710152Satgutier@umich.edu
19810234Syasuko.eckert@amd.com    double long_channel_device_reduction =
19910234Syasuko.eckert@amd.com        longer_channel_device_reduction(Core_device, coredynp.core_ty);
20010234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage	=
20110234Syasuko.eckert@amd.com        power.readOp.leakage * long_channel_device_reduction;
20210234Syasuko.eckert@amd.com    power.readOp.gate_leakage = num_comparators * compare_bits * 2 *
20310234Syasuko.eckert@amd.com        cmos_Ig_leakage(Wcompn, 0, 2, nmos);
20410152Satgutier@umich.edu
20510152Satgutier@umich.edu}
20610152Satgutier@umich.edu
20710152Satgutier@umich.edu/* estimate comparator power consumption (this comparator is similar
20810152Satgutier@umich.edu   to the tag-match structure in a CAM */
20910234Syasuko.eckert@amd.comdouble dep_resource_conflict_check::compare_cap() {
21010234Syasuko.eckert@amd.com    double c1, c2;
21110152Satgutier@umich.edu
21210234Syasuko.eckert@amd.com    //resize the big NOR gate at the DCL according to fan in.
21310234Syasuko.eckert@amd.com    WNORp = WNORp * compare_bits / 2.0;
21410234Syasuko.eckert@amd.com    /* bottom part of comparator */
21510234Syasuko.eckert@amd.com    c2 = (compare_bits) * (drain_C_(Wcompn, NCH, 1, 1, g_tp.cell_h_def) +
21610234Syasuko.eckert@amd.com                           drain_C_(Wcompn, NCH, 2, 1, g_tp.cell_h_def)) +
21710234Syasuko.eckert@amd.com        drain_C_(Wevalinvp, PCH, 1, 1, g_tp.cell_h_def) +
21810234Syasuko.eckert@amd.com        drain_C_(Wevalinvn, NCH, 1, 1, g_tp.cell_h_def);
21910152Satgutier@umich.edu
22010234Syasuko.eckert@amd.com    /* top part of comparator */
22110234Syasuko.eckert@amd.com    c1 = (compare_bits) * (drain_C_(Wcompn, NCH, 1, 1, g_tp.cell_h_def) +
22210234Syasuko.eckert@amd.com                           drain_C_(Wcompn, NCH, 2, 1, g_tp.cell_h_def) +
22310234Syasuko.eckert@amd.com                           drain_C_(Wcomppreequ, NCH, 1, 1, g_tp.cell_h_def)) +
22410234Syasuko.eckert@amd.com        gate_C(WNORn + WNORp, 10.0) +
22510234Syasuko.eckert@amd.com        drain_C_(WNORp, NCH, 2, 1, g_tp.cell_h_def) + compare_bits *
22610234Syasuko.eckert@amd.com        drain_C_(WNORn, NCH, 2, 1, g_tp.cell_h_def);
22710234Syasuko.eckert@amd.com    return(c1 + c2);
22810152Satgutier@umich.edu
22910152Satgutier@umich.edu}
23010152Satgutier@umich.edu
23110152Satgutier@umich.eduvoid dep_resource_conflict_check::leakage_feedback(double temperature)
23210152Satgutier@umich.edu{
23310152Satgutier@umich.edu  l_ip.temp = (unsigned int)round(temperature/10.0)*10;
23410234Syasuko.eckert@amd.com  uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy
23510152Satgutier@umich.edu
23610152Satgutier@umich.edu  // This is part of conflict_check_power()
23710234Syasuko.eckert@amd.com  // 2(N*N-N) is used for source to dest comparison, (N*N-N) is used for dest
23810234Syasuko.eckert@amd.com  // to dest comparison.
23910234Syasuko.eckert@amd.com  int num_comparators = 3 * ((coredynp.decodeW) * (coredynp.decodeW) -
24010234Syasuko.eckert@amd.com                             coredynp.decodeW);
24110234Syasuko.eckert@amd.com  power.readOp.leakage = num_comparators * compare_bits * 2 *
24210234Syasuko.eckert@amd.com      simplified_nmos_leakage(Wcompn,  false);
24310152Satgutier@umich.edu
24410234Syasuko.eckert@amd.com  double long_channel_device_reduction =
24510234Syasuko.eckert@amd.com      longer_channel_device_reduction(Core_device, coredynp.core_ty);
24610234Syasuko.eckert@amd.com  power.readOp.longer_channel_leakage = power.readOp.leakage *
24710234Syasuko.eckert@amd.com      long_channel_device_reduction;
24810234Syasuko.eckert@amd.com  power.readOp.gate_leakage = num_comparators * compare_bits * 2 *
24910234Syasuko.eckert@amd.com      cmos_Ig_leakage(Wcompn, 0, 2, nmos);
25010152Satgutier@umich.edu}
25110152Satgutier@umich.edu
25210152Satgutier@umich.edu
25310152Satgutier@umich.eduDFFCell::DFFCell(
25410234Syasuko.eckert@amd.com    bool _is_dram,
25510234Syasuko.eckert@amd.com    double _WdecNANDn,
25610234Syasuko.eckert@amd.com    double _WdecNANDp,
25710234Syasuko.eckert@amd.com    double _cell_load,
25810234Syasuko.eckert@amd.com    const InputParameter *configure_interface)
25910234Syasuko.eckert@amd.com        : is_dram(_is_dram),
26010234Syasuko.eckert@amd.com        cell_load(_cell_load),
26110234Syasuko.eckert@amd.com        WdecNANDn(_WdecNANDn),
26210234Syasuko.eckert@amd.com        WdecNANDp(_WdecNANDp) { //this model is based on the NAND2 based DFF.
26310234Syasuko.eckert@amd.com    l_ip = *configure_interface;
26410234Syasuko.eckert@amd.com    area.set_area(5 * compute_gate_area(NAND, 2,WdecNANDn,WdecNANDp,
26510234Syasuko.eckert@amd.com                                        g_tp.cell_h_def)
26610234Syasuko.eckert@amd.com                  + compute_gate_area(NAND, 2,WdecNANDn,WdecNANDn,
26710234Syasuko.eckert@amd.com                                      g_tp.cell_h_def));
26810152Satgutier@umich.edu
26910152Satgutier@umich.edu
27010152Satgutier@umich.edu}
27110152Satgutier@umich.edu
27210152Satgutier@umich.edu
27310234Syasuko.eckert@amd.comdouble DFFCell::fpfp_node_cap(unsigned int fan_in, unsigned int fan_out) {
27410234Syasuko.eckert@amd.com    double Ctotal = 0;
27510152Satgutier@umich.edu
27610234Syasuko.eckert@amd.com    /* part 1: drain cap of NAND gate */
27710234Syasuko.eckert@amd.com    Ctotal += drain_C_(WdecNANDn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + fan_in * drain_C_(WdecNANDp, PCH, 1, 1, g_tp.cell_h_def, is_dram);
27810152Satgutier@umich.edu
27910234Syasuko.eckert@amd.com    /* part 2: gate cap of NAND gates */
28010234Syasuko.eckert@amd.com    Ctotal += fan_out * gate_C(WdecNANDn + WdecNANDp, 0, is_dram);
28110152Satgutier@umich.edu
28210234Syasuko.eckert@amd.com    return Ctotal;
28310152Satgutier@umich.edu}
28410152Satgutier@umich.edu
28510152Satgutier@umich.edu
28610234Syasuko.eckert@amd.comvoid DFFCell::compute_DFF_cell() {
28710234Syasuko.eckert@amd.com    double c1, c2, c3, c4, c5, c6;
28810234Syasuko.eckert@amd.com    /* node 5 and node 6 are identical to node 1 in capacitance */
28910234Syasuko.eckert@amd.com    c1 = c5 = c6 = fpfp_node_cap(2, 1);
29010234Syasuko.eckert@amd.com    c2 = fpfp_node_cap(2, 3);
29110234Syasuko.eckert@amd.com    c3 = fpfp_node_cap(3, 2);
29210234Syasuko.eckert@amd.com    c4 = fpfp_node_cap(2, 2);
29310152Satgutier@umich.edu
29410234Syasuko.eckert@amd.com    //cap-load of the clock signal in each Dff, actually the clock signal only connected to one NAND2
29510234Syasuko.eckert@amd.com    clock_cap = 2 * gate_C(WdecNANDn + WdecNANDp, 0, is_dram);
29610234Syasuko.eckert@amd.com    e_switch.readOp.dynamic += (c4 + c1 + c2 + c3 + c5 + c6 + 2 * cell_load) *
29710234Syasuko.eckert@amd.com        0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;;
29810152Satgutier@umich.edu
29910234Syasuko.eckert@amd.com    /* no 1/2 for e_keep and e_clock because clock signal switches twice in one cycle */
30010234Syasuko.eckert@amd.com    e_keep_1.readOp.dynamic +=
30110234Syasuko.eckert@amd.com        c3 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ;
30210234Syasuko.eckert@amd.com    e_keep_0.readOp.dynamic +=
30310234Syasuko.eckert@amd.com        c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ;
30410234Syasuko.eckert@amd.com    e_clock.readOp.dynamic +=
30510234Syasuko.eckert@amd.com        clock_cap * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;;
30610152Satgutier@umich.edu
30710234Syasuko.eckert@amd.com    /* static power */
30810234Syasuko.eckert@amd.com    e_switch.readOp.leakage +=
30910234Syasuko.eckert@amd.com        (cmos_Isub_leakage(WdecNANDn, WdecNANDp, 2, nand) *
31010234Syasuko.eckert@amd.com         5//5 NAND2 and 1 NAND3 in a DFF
31110234Syasuko.eckert@amd.com         + cmos_Isub_leakage(WdecNANDn, WdecNANDn, 3, nand)) *
31210234Syasuko.eckert@amd.com        g_tp.peri_global.Vdd;
31310234Syasuko.eckert@amd.com    e_switch.readOp.gate_leakage +=
31410234Syasuko.eckert@amd.com        (cmos_Ig_leakage(WdecNANDn, WdecNANDp, 2, nand) *
31510234Syasuko.eckert@amd.com         5//5 NAND2 and 1 NAND3 in a DFF
31610234Syasuko.eckert@amd.com         + cmos_Ig_leakage(WdecNANDn, WdecNANDn, 3, nand)) *
31710234Syasuko.eckert@amd.com        g_tp.peri_global.Vdd;
31810152Satgutier@umich.edu}
31910152Satgutier@umich.edu
32010234Syasuko.eckert@amd.comPipeline::Pipeline(XMLNode* _xml_data,
32110234Syasuko.eckert@amd.com                   const InputParameter *configure_interface,
32210234Syasuko.eckert@amd.com                   const CoreParameters & dyn_p_,
32310234Syasuko.eckert@amd.com                   enum Device_ty device_ty_,
32410234Syasuko.eckert@amd.com                   bool _is_core_pipeline,
32510234Syasuko.eckert@amd.com                   bool _is_default)
32610234Syasuko.eckert@amd.com    : McPATComponent(_xml_data), l_ip(*configure_interface),
32710234Syasuko.eckert@amd.com      coredynp(dyn_p_), device_ty(device_ty_),
32810234Syasuko.eckert@amd.com      is_core_pipeline(_is_core_pipeline), is_default(_is_default),
32910234Syasuko.eckert@amd.com      num_piperegs(0.0) {
33010234Syasuko.eckert@amd.com    name = "Pipeline?";
33110152Satgutier@umich.edu
33210234Syasuko.eckert@amd.com    local_result = init_interface(&l_ip, name);
33310234Syasuko.eckert@amd.com    if (!coredynp.Embedded) {
33410234Syasuko.eckert@amd.com        process_ind = true;
33510234Syasuko.eckert@amd.com    } else {
33610234Syasuko.eckert@amd.com        process_ind = false;
33710234Syasuko.eckert@amd.com    }
33810234Syasuko.eckert@amd.com    //this was  20 micron for the 0.8 micron process
33910234Syasuko.eckert@amd.com    WNANDn = (process_ind) ? 25 * l_ip.F_sz_um : g_tp.min_w_nmos_ ;
34010234Syasuko.eckert@amd.com    //this was  30 micron for the 0.8 micron process
34110234Syasuko.eckert@amd.com    WNANDp = (process_ind) ? 37.5 * l_ip.F_sz_um : g_tp.min_w_nmos_ *
34210234Syasuko.eckert@amd.com        pmos_to_nmos_sz_ratio();
34310234Syasuko.eckert@amd.com    load_per_pipeline_stage = 2 * gate_C(WNANDn + WNANDp, 0, false);
34410234Syasuko.eckert@amd.com    compute();
34510152Satgutier@umich.edu
34610152Satgutier@umich.edu}
34710152Satgutier@umich.edu
34810234Syasuko.eckert@amd.comvoid Pipeline::compute() {
34910234Syasuko.eckert@amd.com    compute_stage_vector();
35010234Syasuko.eckert@amd.com    DFFCell pipe_reg(false, WNANDn, WNANDp, load_per_pipeline_stage, &l_ip);
35110234Syasuko.eckert@amd.com    pipe_reg.compute_DFF_cell();
35210152Satgutier@umich.edu
35310234Syasuko.eckert@amd.com    double clock_power_pipereg = num_piperegs * pipe_reg.e_clock.readOp.dynamic;
35410234Syasuko.eckert@amd.com    //******************pipeline power: currently, we average all the possibilities of the states of DFFs in the pipeline. A better way to do it is to consider
35510234Syasuko.eckert@amd.com    //the harming distance of two consecutive signals, However McPAT does not have plan to do this in near future as it focuses on worst case power.
35610234Syasuko.eckert@amd.com    double pipe_reg_power = num_piperegs *
35710234Syasuko.eckert@amd.com        (pipe_reg.e_switch.readOp.dynamic + pipe_reg.e_keep_0.readOp.dynamic +
35810234Syasuko.eckert@amd.com         pipe_reg.e_keep_1.readOp.dynamic) / 3 + clock_power_pipereg;
35910234Syasuko.eckert@amd.com    double pipe_reg_leakage = num_piperegs * pipe_reg.e_switch.readOp.leakage;
36010234Syasuko.eckert@amd.com    double pipe_reg_gate_leakage = num_piperegs *
36110234Syasuko.eckert@amd.com        pipe_reg.e_switch.readOp.gate_leakage;
36210234Syasuko.eckert@amd.com    power.readOp.dynamic	+= pipe_reg_power;
36310234Syasuko.eckert@amd.com    power.readOp.leakage	+= pipe_reg_leakage;
36410234Syasuko.eckert@amd.com    power.readOp.gate_leakage	+= pipe_reg_gate_leakage;
36510234Syasuko.eckert@amd.com    area.set_area(num_piperegs * pipe_reg.area.get_area());
36610152Satgutier@umich.edu
36710234Syasuko.eckert@amd.com    double long_channel_device_reduction =
36810234Syasuko.eckert@amd.com        longer_channel_device_reduction(device_ty, coredynp.core_ty);
36910234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage	= power.readOp.leakage *
37010234Syasuko.eckert@amd.com        long_channel_device_reduction;
37110152Satgutier@umich.edu
37210152Satgutier@umich.edu
37310234Syasuko.eckert@amd.com    double sckRation = g_tp.sckt_co_eff;
37410234Syasuko.eckert@amd.com    power.readOp.dynamic *= sckRation;
37510234Syasuko.eckert@amd.com    power.writeOp.dynamic *= sckRation;
37610234Syasuko.eckert@amd.com    power.searchOp.dynamic *= sckRation;
37710234Syasuko.eckert@amd.com    double macro_layout_overhead = g_tp.macro_layout_overhead;
37810152Satgutier@umich.edu        if (!coredynp.Embedded)
37910234Syasuko.eckert@amd.com                area.set_area(area.get_area() * macro_layout_overhead);
38010234Syasuko.eckert@amd.com
38110234Syasuko.eckert@amd.com    output_data.area = area.get_area() / 1e6;
38210234Syasuko.eckert@amd.com    output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
38310234Syasuko.eckert@amd.com    output_data.subthreshold_leakage_power = power.readOp.leakage;
38410234Syasuko.eckert@amd.com    output_data.gate_leakage_power = power.readOp.gate_leakage;
38510234Syasuko.eckert@amd.com    output_data.runtime_dynamic_energy = power.readOp.dynamic * total_cycles;
38610152Satgutier@umich.edu}
38710152Satgutier@umich.edu
38810234Syasuko.eckert@amd.comvoid Pipeline::compute_stage_vector() {
38910234Syasuko.eckert@amd.com    double num_stages, tot_stage_vector, per_stage_vector;
39010234Syasuko.eckert@amd.com    int opcode_length = coredynp.x86 ?
39110234Syasuko.eckert@amd.com        coredynp.micro_opcode_length : coredynp.opcode_width;
39210152Satgutier@umich.edu
39310234Syasuko.eckert@amd.com    if (!is_core_pipeline) {
39410234Syasuko.eckert@amd.com        //The number of pipeline stages are calculated based on the achievable
39510234Syasuko.eckert@amd.com        //throughput and required throughput
39610234Syasuko.eckert@amd.com        num_piperegs = l_ip.pipeline_stages * l_ip.per_stage_vector;
39710234Syasuko.eckert@amd.com    } else {
39810234Syasuko.eckert@amd.com        if (coredynp.core_ty == Inorder) {
39910234Syasuko.eckert@amd.com            /* assume 6 pipe stages and try to estimate bits per pipe stage */
40010234Syasuko.eckert@amd.com            /* pipe stage 0/IF */
40110234Syasuko.eckert@amd.com            num_piperegs += coredynp.pc_width * 2 * coredynp.num_hthreads;
40210234Syasuko.eckert@amd.com            /* pipe stage IF/ID */
40310234Syasuko.eckert@amd.com            num_piperegs += coredynp.fetchW *
40410234Syasuko.eckert@amd.com                (coredynp.instruction_length + coredynp.pc_width) *
40510234Syasuko.eckert@amd.com                coredynp.num_hthreads;
40610234Syasuko.eckert@amd.com            /* pipe stage IF/ThreadSEL */
40710234Syasuko.eckert@amd.com            if (coredynp.multithreaded) {
40810234Syasuko.eckert@amd.com                num_piperegs += coredynp.num_hthreads *
40910234Syasuko.eckert@amd.com                    coredynp.perThreadState; //8 bit thread states
41010234Syasuko.eckert@amd.com            }
41110234Syasuko.eckert@amd.com            /* pipe stage ID/EXE */
41210234Syasuko.eckert@amd.com            num_piperegs += coredynp.decodeW *
41310234Syasuko.eckert@amd.com                (coredynp.instruction_length + coredynp.pc_width +
41410234Syasuko.eckert@amd.com                 pow(2.0, opcode_length) + 2 * coredynp.int_data_width) *
41510234Syasuko.eckert@amd.com                coredynp.num_hthreads;
41610234Syasuko.eckert@amd.com            /* pipe stage EXE/MEM */
41710234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
41810234Syasuko.eckert@amd.com                (3 * coredynp.arch_ireg_width + pow(2.0, opcode_length) + 8 *
41910234Syasuko.eckert@amd.com                 2 * coredynp.int_data_width/*+2*powers (2,reg_length)*/);
42010234Syasuko.eckert@amd.com            /* pipe stage MEM/WB the 2^opcode_length means the total decoded signal for the opcode*/
42110234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
42210234Syasuko.eckert@amd.com                (2 * coredynp.int_data_width + pow(2.0, opcode_length) + 8 *
42310234Syasuko.eckert@amd.com                 2 * coredynp.int_data_width/*+2*powers (2,reg_length)*/);
42410234Syasuko.eckert@amd.com            num_stages = 6;
42510234Syasuko.eckert@amd.com        } else {
42610234Syasuko.eckert@amd.com            /* assume 12 stage pipe stages and try to estimate bits per pipe stage */
42710234Syasuko.eckert@amd.com            /*OOO: Fetch, decode, rename, IssueQ, dispatch, regread, EXE, MEM, WB, CM */
42810152Satgutier@umich.edu
42910234Syasuko.eckert@amd.com            /* pipe stage 0/1F*/
43010234Syasuko.eckert@amd.com            num_piperegs +=
43110234Syasuko.eckert@amd.com                coredynp.pc_width * 2 * coredynp.num_hthreads ;//PC and Next PC
43210234Syasuko.eckert@amd.com            /* pipe stage IF/ID */
43310234Syasuko.eckert@amd.com            num_piperegs += coredynp.fetchW *
43410234Syasuko.eckert@amd.com                (coredynp.instruction_length + coredynp.pc_width) *
43510234Syasuko.eckert@amd.com                coredynp.num_hthreads;//PC is used to feed branch predictor in ID
43610234Syasuko.eckert@amd.com            /* pipe stage 1D/Renaming*/
43710234Syasuko.eckert@amd.com            num_piperegs += coredynp.decodeW *
43810234Syasuko.eckert@amd.com                (coredynp.instruction_length + coredynp.pc_width) *
43910234Syasuko.eckert@amd.com                coredynp.num_hthreads;//PC is for branch exe in later stage.
44010234Syasuko.eckert@amd.com            /* pipe stage Renaming/wire_drive */
44110234Syasuko.eckert@amd.com            num_piperegs += coredynp.decodeW *
44210234Syasuko.eckert@amd.com                (coredynp.instruction_length + coredynp.pc_width);
44310234Syasuko.eckert@amd.com            /* pipe stage Renaming/IssueQ */
44410234Syasuko.eckert@amd.com            //3*coredynp.phy_ireg_width means 2 sources and 1 dest
44510234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
44610234Syasuko.eckert@amd.com                (coredynp.instruction_length  + coredynp.pc_width + 3 *
44710234Syasuko.eckert@amd.com                 coredynp.phy_ireg_width) * coredynp.num_hthreads;
44810234Syasuko.eckert@amd.com            /* pipe stage IssueQ/Dispatch */
44910234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
45010234Syasuko.eckert@amd.com                (coredynp.instruction_length + 3 * coredynp.phy_ireg_width);
45110234Syasuko.eckert@amd.com            /* pipe stage Dispatch/EXE */
45210152Satgutier@umich.edu
45310234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
45410234Syasuko.eckert@amd.com                (3 * coredynp.phy_ireg_width + coredynp.pc_width +
45510234Syasuko.eckert@amd.com                 pow(2.0, opcode_length)/*+2*powers (2,reg_length)*/);
45610234Syasuko.eckert@amd.com            /* 2^opcode_length means the total decoded signal for the opcode*/
45710234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
45810234Syasuko.eckert@amd.com                (2 * coredynp.int_data_width + pow(2.0, opcode_length)
45910234Syasuko.eckert@amd.com                 /*+2*powers (2,reg_length)*/);
46010234Syasuko.eckert@amd.com            /*2 source operands in EXE; Assume 2EXE stages* since we do not really distinguish OP*/
46110234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
46210234Syasuko.eckert@amd.com                (2 * coredynp.int_data_width + pow(2.0, opcode_length)
46310234Syasuko.eckert@amd.com                 /*+2*powers (2,reg_length)*/);
46410234Syasuko.eckert@amd.com            /* pipe stage EXE/MEM, data need to be read/write, address*/
46510234Syasuko.eckert@amd.com            //memory Opcode still need to be passed
46610234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
46710234Syasuko.eckert@amd.com                (coredynp.int_data_width + coredynp.v_address_width +
46810234Syasuko.eckert@amd.com                 pow(2.0, opcode_length)/*+2*powers (2,reg_length)*/);
46910234Syasuko.eckert@amd.com            /* pipe stage MEM/WB; result data, writeback regs */
47010234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
47110234Syasuko.eckert@amd.com                (coredynp.int_data_width + coredynp.phy_ireg_width
47210234Syasuko.eckert@amd.com                 /* powers (2,opcode_length) +
47310234Syasuko.eckert@amd.com                    (2,opcode_length)+2*powers (2,reg_length)*/);
47410234Syasuko.eckert@amd.com            /* pipe stage WB/CM ; result data, regs need to be updated, address for resolve memory ops in ROB's top*/
47510234Syasuko.eckert@amd.com            num_piperegs += coredynp.commitW *
47610234Syasuko.eckert@amd.com                (coredynp.int_data_width + coredynp.v_address_width +
47710234Syasuko.eckert@amd.com                 coredynp.phy_ireg_width
47810234Syasuko.eckert@amd.com                 /*+ powers (2,opcode_length)*2*powers (2,reg_length)*/) *
47910234Syasuko.eckert@amd.com                coredynp.num_hthreads;
48010234Syasuko.eckert@amd.com            num_stages = 12;
48110152Satgutier@umich.edu
48210152Satgutier@umich.edu        }
48310152Satgutier@umich.edu
48410152Satgutier@umich.edu        /* assume 50% extra in control registers and interrupt registers (rule of thumb) */
48510152Satgutier@umich.edu        num_piperegs = num_piperegs * 1.5;
48610234Syasuko.eckert@amd.com        tot_stage_vector = num_piperegs;
48710234Syasuko.eckert@amd.com        per_stage_vector = tot_stage_vector / num_stages;
48810152Satgutier@umich.edu
48910234Syasuko.eckert@amd.com        if (coredynp.core_ty == Inorder) {
49010234Syasuko.eckert@amd.com            if (coredynp.pipeline_stages > 6)
49110234Syasuko.eckert@amd.com                num_piperegs = per_stage_vector * coredynp.pipeline_stages;
49210234Syasuko.eckert@amd.com        } else { //OOO
49310234Syasuko.eckert@amd.com            if (coredynp.pipeline_stages > 12)
49410234Syasuko.eckert@amd.com                num_piperegs = per_stage_vector * coredynp.pipeline_stages;
49510152Satgutier@umich.edu        }
49610234Syasuko.eckert@amd.com    }
49710152Satgutier@umich.edu
49810152Satgutier@umich.edu}
49910152Satgutier@umich.edu
50010234Syasuko.eckert@amd.comFunctionalUnit::FunctionalUnit(XMLNode* _xml_data,
50110234Syasuko.eckert@amd.com                               InputParameter* interface_ip_,
50210234Syasuko.eckert@amd.com                               const CoreParameters & _core_params,
50310234Syasuko.eckert@amd.com                               const CoreStatistics & _core_stats,
50410234Syasuko.eckert@amd.com                               enum FU_type fu_type_)
50510234Syasuko.eckert@amd.com    : McPATComponent(_xml_data),
50610234Syasuko.eckert@amd.com      interface_ip(*interface_ip_), core_params(_core_params),
50710234Syasuko.eckert@amd.com      core_stats(_core_stats), fu_type(fu_type_) {
50810234Syasuko.eckert@amd.com    double area_t;
50910234Syasuko.eckert@amd.com    double leakage;
51010234Syasuko.eckert@amd.com    double gate_leakage;
51110152Satgutier@umich.edu    double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
51210234Syasuko.eckert@amd.com    clockRate = core_params.clockRate;
51310152Satgutier@umich.edu
51410234Syasuko.eckert@amd.com    uca_org_t result2;
51510234Syasuko.eckert@amd.com    // Temp name for the following function call
51610234Syasuko.eckert@amd.com    name = "Functional Unit";
51710234Syasuko.eckert@amd.com
51810234Syasuko.eckert@amd.com    result2 = init_interface(&interface_ip, name);
51910234Syasuko.eckert@amd.com
52010234Syasuko.eckert@amd.com        if (core_params.Embedded) {
52110234Syasuko.eckert@amd.com            if (fu_type == FPU) {
52210234Syasuko.eckert@amd.com                num_fu=core_params.num_fpus;
52310152Satgutier@umich.edu                        //area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2
52410152Satgutier@umich.edu                        area_t = 4.47*1e6*(g_ip->F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2 The base number
52510152Satgutier@umich.edu                        //4.47 contains both VFP and NEON processing unit, VFP is about 40% and NEON is about 60%
52610152Satgutier@umich.edu                        if (g_ip->F_sz_nm>90)
52710152Satgutier@umich.edu                                area_t = 4.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2
52810152Satgutier@umich.edu                        leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
52910152Satgutier@umich.edu                        gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
53010152Satgutier@umich.edu                        //energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction in FPU usually it can have up to 20 cycles.
53110152Satgutier@umich.edu//			base_energy = coredynp.core_ty==Inorder? 0: 89e-3*3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch)
53210152Satgutier@umich.edu//			base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);
53310152Satgutier@umich.edu                        base_energy = 0;
53410152Satgutier@umich.edu                        per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per Hz energy(nJ)
53510152Satgutier@umich.edu                        //FPU power from Sandia's processor sizing tech report
53610152Satgutier@umich.edu                        FU_height=(18667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data
53710234Syasuko.eckert@amd.com            } else if (fu_type == ALU) {
53810234Syasuko.eckert@amd.com                num_fu=core_params.num_alus;
53910152Satgutier@umich.edu                        area_t = 280*260*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl
54010152Satgutier@umich.edu                        leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
54110152Satgutier@umich.edu                        gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
54210152Satgutier@umich.edu//			base_energy = coredynp.core_ty==Inorder? 0:89e-3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch)
54310152Satgutier@umich.edu//			base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);
54410152Satgutier@umich.edu                        base_energy = 0;
54510152Satgutier@umich.edu                        per_access_energy = 1.15/3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ)
54610152Satgutier@umich.edu                        FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU
54710152Satgutier@umich.edu
54810234Syasuko.eckert@amd.com            } else if (fu_type == MUL) {
54910234Syasuko.eckert@amd.com                num_fu=core_params.num_muls;
55010152Satgutier@umich.edu                        area_t = 280*260*3*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl
55110152Satgutier@umich.edu                        leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
55210152Satgutier@umich.edu                        gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
55310152Satgutier@umich.edu//			base_energy = coredynp.core_ty==Inorder? 0:89e-3*2; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch)
55410152Satgutier@umich.edu//			base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);
55510152Satgutier@umich.edu                        base_energy = 0;
55610152Satgutier@umich.edu                        per_access_energy = 1.15*2/3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch
55710152Satgutier@umich.edu                        FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data
55810234Syasuko.eckert@amd.com            } else {
55910152Satgutier@umich.edu                        cout<<"Unknown Functional Unit Type"<<endl;
56010152Satgutier@umich.edu                        exit(0);
56110152Satgutier@umich.edu                }
56210152Satgutier@umich.edu                per_access_energy *=0.5;//According to ARM data embedded processor has much lower per acc energy
56310234Syasuko.eckert@amd.com        } else {
56410234Syasuko.eckert@amd.com            if (fu_type == FPU) {
56510234Syasuko.eckert@amd.com                name = "Floating Point Unit(s)";
56610234Syasuko.eckert@amd.com                num_fu = core_params.num_fpus;
56710234Syasuko.eckert@amd.com                area_t = 8.47 * 1e6 * (g_ip->F_sz_nm * g_ip->F_sz_nm / 90.0 /
56810234Syasuko.eckert@amd.com                                       90.0);//this is um^2
56910234Syasuko.eckert@amd.com                if (g_ip->F_sz_nm > 90)
57010234Syasuko.eckert@amd.com                    area_t = 8.47 * 1e6 *
57110234Syasuko.eckert@amd.com                        g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2
57210234Syasuko.eckert@amd.com            leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
57310234Syasuko.eckert@amd.com            gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
57410234Syasuko.eckert@amd.com            //W The base energy of ALU average numbers from Intel 4G and
57510234Syasuko.eckert@amd.com            //773Mhz (Wattch)
57610234Syasuko.eckert@amd.com            base_energy = core_params.core_ty == Inorder ? 0 : 89e-3 * 3;
57710234Syasuko.eckert@amd.com            base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 /
57810234Syasuko.eckert@amd.com                            1.2);
57910234Syasuko.eckert@amd.com            per_access_energy = 1.15*3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per op energy(nJ)
58010234Syasuko.eckert@amd.com            FU_height=(38667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data
58110234Syasuko.eckert@amd.com        } else if (fu_type == ALU) {
58210234Syasuko.eckert@amd.com            name = "Integer ALU(s)";
58310234Syasuko.eckert@amd.com            num_fu = core_params.num_alus;
58410234Syasuko.eckert@amd.com            //this is um^2 ALU + MUl
58510234Syasuko.eckert@amd.com            area_t = 280 * 260 * 2 * g_tp.scaling_factor.logic_scaling_co_eff;
58610234Syasuko.eckert@amd.com            leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
58710234Syasuko.eckert@amd.com            gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
58810234Syasuko.eckert@amd.com            //W The base energy of ALU average numbers from Intel 4G and 773Mhz
58910234Syasuko.eckert@amd.com            //(Wattch)
59010234Syasuko.eckert@amd.com            base_energy = core_params.core_ty == Inorder ? 0 : 89e-3;
59110234Syasuko.eckert@amd.com            base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 /
59210234Syasuko.eckert@amd.com                            1.2);
59310234Syasuko.eckert@amd.com            per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ)
59410234Syasuko.eckert@amd.com            FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU
59510234Syasuko.eckert@amd.com        } else if (fu_type == MUL) {
59610234Syasuko.eckert@amd.com            name = "Multiply/Divide Unit(s)";
59710234Syasuko.eckert@amd.com            num_fu = core_params.num_muls;
59810234Syasuko.eckert@amd.com            //this is um^2 ALU + MUl
59910234Syasuko.eckert@amd.com            area_t = 280 * 260 * 2 * 3 *
60010234Syasuko.eckert@amd.com                g_tp.scaling_factor.logic_scaling_co_eff;
60110234Syasuko.eckert@amd.com            leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
60210234Syasuko.eckert@amd.com            gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
60310234Syasuko.eckert@amd.com            //W The base energy of ALU average numbers from Intel 4G and 773Mhz
60410234Syasuko.eckert@amd.com            //(Wattch)
60510234Syasuko.eckert@amd.com            base_energy = core_params.core_ty == Inorder ? 0 : 89e-3 * 2;
60610234Syasuko.eckert@amd.com            base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 /
60710234Syasuko.eckert@amd.com                            1.2);
60810234Syasuko.eckert@amd.com            per_access_energy = 1.15*2/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch
60910234Syasuko.eckert@amd.com            FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data
61010234Syasuko.eckert@amd.com        } else {
61110234Syasuko.eckert@amd.com            cout << "Unknown Functional Unit Type" << endl;
61210234Syasuko.eckert@amd.com            exit(0);
61310152Satgutier@umich.edu        }
61410234Syasuko.eckert@amd.com    }
61510152Satgutier@umich.edu
61610152Satgutier@umich.edu    area.set_area(area_t*num_fu);
61710234Syasuko.eckert@amd.com    power.readOp.leakage = leakage * num_fu;
61810234Syasuko.eckert@amd.com    power.readOp.gate_leakage = gate_leakage * num_fu;
61910234Syasuko.eckert@amd.com
62010234Syasuko.eckert@amd.com    double long_channel_device_reduction =
62110234Syasuko.eckert@amd.com        longer_channel_device_reduction(Core_device, core_params.core_ty);
62210234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage	=
62310234Syasuko.eckert@amd.com        power.readOp.leakage * long_channel_device_reduction;
62410234Syasuko.eckert@amd.com    double macro_layout_overhead = g_tp.macro_layout_overhead;
62510234Syasuko.eckert@amd.com    area.set_area(area.get_area()*macro_layout_overhead);
62610152Satgutier@umich.edu}
62710152Satgutier@umich.edu
62810234Syasuko.eckert@amd.comvoid FunctionalUnit::computeEnergy() {
62910234Syasuko.eckert@amd.com    double pppm_t[4]    = {1, 1, 1, 1};
63010234Syasuko.eckert@amd.com    double FU_duty_cycle;
63110234Syasuko.eckert@amd.com    double sckRation = g_tp.sckt_co_eff;
63210152Satgutier@umich.edu
63310234Syasuko.eckert@amd.com    // TDP power calculation
63410234Syasuko.eckert@amd.com    //2 means two source operands needs to be passed for each int instruction.
63510234Syasuko.eckert@amd.com    set_pppm(pppm_t, 2, 2, 2, 2);
63610234Syasuko.eckert@amd.com    tdp_stats.readAc.access = num_fu;
63710234Syasuko.eckert@amd.com    if (fu_type == FPU) {
63810234Syasuko.eckert@amd.com        FU_duty_cycle = core_stats.FPU_duty_cycle;
63910234Syasuko.eckert@amd.com    } else if (fu_type == ALU) {
64010234Syasuko.eckert@amd.com        FU_duty_cycle = core_stats.ALU_duty_cycle;
64110234Syasuko.eckert@amd.com    } else if (fu_type == MUL) {
64210234Syasuko.eckert@amd.com        FU_duty_cycle = core_stats.MUL_duty_cycle;
64310234Syasuko.eckert@amd.com    }
64410152Satgutier@umich.edu
64510234Syasuko.eckert@amd.com    power.readOp.dynamic =
64610234Syasuko.eckert@amd.com        per_access_energy * tdp_stats.readAc.access + base_energy / clockRate;
64710234Syasuko.eckert@amd.com    power.readOp.dynamic *= sckRation * FU_duty_cycle;
64810152Satgutier@umich.edu
64910234Syasuko.eckert@amd.com    // Runtime power calculation
65010234Syasuko.eckert@amd.com    if (fu_type == FPU) {
65110234Syasuko.eckert@amd.com        rtp_stats.readAc.access = core_stats.fpu_accesses;
65210234Syasuko.eckert@amd.com    } else if (fu_type == ALU) {
65310234Syasuko.eckert@amd.com        rtp_stats.readAc.access = core_stats.ialu_accesses;
65410234Syasuko.eckert@amd.com    } else if (fu_type == MUL) {
65510234Syasuko.eckert@amd.com        rtp_stats.readAc.access = core_stats.mul_accesses;
65610234Syasuko.eckert@amd.com    }
65710152Satgutier@umich.edu
65810234Syasuko.eckert@amd.com    rt_power.readOp.dynamic = per_access_energy * rtp_stats.readAc.access +
65910234Syasuko.eckert@amd.com        base_energy * execution_time;
66010234Syasuko.eckert@amd.com    rt_power.readOp.dynamic *= sckRation;
66110152Satgutier@umich.edu
66210234Syasuko.eckert@amd.com    output_data.area = area.get_area() / 1e6;
66310234Syasuko.eckert@amd.com    output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
66410234Syasuko.eckert@amd.com    output_data.subthreshold_leakage_power =
66510234Syasuko.eckert@amd.com        (longer_channel_device) ? power.readOp.longer_channel_leakage :
66610234Syasuko.eckert@amd.com        power.readOp.leakage;
66710234Syasuko.eckert@amd.com    output_data.gate_leakage_power = power.readOp.gate_leakage;
66810234Syasuko.eckert@amd.com    output_data.runtime_dynamic_energy = rt_power.readOp.dynamic;
66910152Satgutier@umich.edu}
67010152Satgutier@umich.edu
67110152Satgutier@umich.eduvoid FunctionalUnit::leakage_feedback(double temperature)
67210152Satgutier@umich.edu{
67310152Satgutier@umich.edu  // Update the temperature and initialize the global interfaces.
67410152Satgutier@umich.edu  interface_ip.temp = (unsigned int)round(temperature/10.0)*10;
67510152Satgutier@umich.edu
67610234Syasuko.eckert@amd.com  // init_result is dummy
67710234Syasuko.eckert@amd.com  uca_org_t init_result = init_interface(&interface_ip, name);
67810152Satgutier@umich.edu
67910152Satgutier@umich.edu  // This is part of FunctionalUnit()
68010152Satgutier@umich.edu  double area_t, leakage, gate_leakage;
68110152Satgutier@umich.edu  double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
68210152Satgutier@umich.edu
68310152Satgutier@umich.edu  if (fu_type == FPU)
68410152Satgutier@umich.edu  {
68510152Satgutier@umich.edu        area_t = 4.47*1e6*(g_ip->F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2 The base number
68610152Satgutier@umich.edu        if (g_ip->F_sz_nm>90)
68710152Satgutier@umich.edu                area_t = 4.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2
68810152Satgutier@umich.edu        leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
68910152Satgutier@umich.edu        gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
69010152Satgutier@umich.edu  }
69110152Satgutier@umich.edu  else if (fu_type == ALU)
69210152Satgutier@umich.edu  {
69310152Satgutier@umich.edu    area_t = 280*260*2*num_fu*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl
69410152Satgutier@umich.edu    leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
69510152Satgutier@umich.edu    gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
69610152Satgutier@umich.edu  }
69710152Satgutier@umich.edu  else if (fu_type == MUL)
69810152Satgutier@umich.edu  {
69910152Satgutier@umich.edu    area_t = 280*260*2*3*num_fu*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl
70010152Satgutier@umich.edu    leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
70110152Satgutier@umich.edu    gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
70210152Satgutier@umich.edu  }
70310152Satgutier@umich.edu  else
70410152Satgutier@umich.edu  {
70510152Satgutier@umich.edu    cout<<"Unknown Functional Unit Type"<<endl;
70610152Satgutier@umich.edu    exit(1);
70710152Satgutier@umich.edu  }
70810152Satgutier@umich.edu
70910152Satgutier@umich.edu  power.readOp.leakage = leakage*num_fu;
71010152Satgutier@umich.edu  power.readOp.gate_leakage = gate_leakage*num_fu;
71110234Syasuko.eckert@amd.com  power.readOp.longer_channel_leakage =
71210234Syasuko.eckert@amd.com      longer_channel_device_reduction(Core_device, core_params.core_ty);
71310152Satgutier@umich.edu}
71410152Satgutier@umich.edu
71510234Syasuko.eckert@amd.comUndiffCore::UndiffCore(XMLNode* _xml_data, InputParameter* interface_ip_,
71610234Syasuko.eckert@amd.com                       const CoreParameters & dyn_p_,
71710234Syasuko.eckert@amd.com                       bool exist_)
71810234Syasuko.eckert@amd.com        : McPATComponent(_xml_data),
71910234Syasuko.eckert@amd.com        interface_ip(*interface_ip_), coredynp(dyn_p_),
72010234Syasuko.eckert@amd.com        core_ty(coredynp.core_ty), embedded(coredynp.Embedded),
72110234Syasuko.eckert@amd.com        pipeline_stage(coredynp.pipeline_stages),
72210234Syasuko.eckert@amd.com        num_hthreads(coredynp.num_hthreads), issue_width(coredynp.issueW),
72310234Syasuko.eckert@amd.com        exist(exist_) {
72410234Syasuko.eckert@amd.com    if (!exist) return;
72510234Syasuko.eckert@amd.com
72610234Syasuko.eckert@amd.com    name = "Undifferentiated Core";
72710234Syasuko.eckert@amd.com    clockRate = coredynp.clockRate;
72810234Syasuko.eckert@amd.com
72910234Syasuko.eckert@amd.com    double undifferentiated_core = 0;
73010234Syasuko.eckert@amd.com    double core_tx_density = 0;
73110234Syasuko.eckert@amd.com    double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
73210152Satgutier@umich.edu        double undifferentiated_core_coe;
73310234Syasuko.eckert@amd.com    uca_org_t result2;
73410234Syasuko.eckert@amd.com    result2 = init_interface(&interface_ip, name);
73510152Satgutier@umich.edu
73610234Syasuko.eckert@amd.com    //Compute undifferentiated core area at 90nm.
73710234Syasuko.eckert@amd.com    if (embedded == false) {
73810234Syasuko.eckert@amd.com        //Based on the results of polynomial/log curve fitting based on undifferentiated core of Niagara, Niagara2, Merom, Penyrn, Prescott, Opteron die measurements
73910234Syasuko.eckert@amd.com        if (core_ty == OOO) {
74010234Syasuko.eckert@amd.com            undifferentiated_core = (3.57 * log(pipeline_stage) - 1.2643) > 0 ?
74110234Syasuko.eckert@amd.com                (3.57 * log(pipeline_stage) - 1.2643) : 0;
74210234Syasuko.eckert@amd.com        } else if (core_ty == Inorder) {
74310234Syasuko.eckert@amd.com            undifferentiated_core = (-2.19 * log(pipeline_stage) + 6.55) > 0 ?
74410234Syasuko.eckert@amd.com                (-2.19 * log(pipeline_stage) + 6.55) : 0;
74510234Syasuko.eckert@amd.com        } else {
74610234Syasuko.eckert@amd.com            cout << "invalid core type" << endl;
74710234Syasuko.eckert@amd.com            exit(0);
74810152Satgutier@umich.edu        }
74910234Syasuko.eckert@amd.com        undifferentiated_core *= (1 + logtwo(num_hthreads) * 0.0716);
75010234Syasuko.eckert@amd.com    } else {
75110234Syasuko.eckert@amd.com        //Based on the results in paper "parametrized processor models" Sandia Labs
75210234Syasuko.eckert@amd.com                if (opt_for_clk)
75310152Satgutier@umich.edu                        undifferentiated_core_coe = 0.05;
75410152Satgutier@umich.edu                else
75510152Satgutier@umich.edu                        undifferentiated_core_coe = 0;
75610234Syasuko.eckert@amd.com                undifferentiated_core = (0.4109 * pipeline_stage - 0.776) *
75710234Syasuko.eckert@amd.com                    undifferentiated_core_coe;
75810234Syasuko.eckert@amd.com                undifferentiated_core *= (1 + logtwo(num_hthreads) * 0.0426);
75910234Syasuko.eckert@amd.com    }
76010152Satgutier@umich.edu
76110234Syasuko.eckert@amd.com    undifferentiated_core *= g_tp.scaling_factor.logic_scaling_co_eff *
76210234Syasuko.eckert@amd.com        1e6;//change from mm^2 to um^2
76310234Syasuko.eckert@amd.com    core_tx_density                 = g_tp.scaling_factor.core_tx_density;
76410234Syasuko.eckert@amd.com    power.readOp.leakage = undifferentiated_core*(core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
76510234Syasuko.eckert@amd.com    power.readOp.gate_leakage = undifferentiated_core*(core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;
76610152Satgutier@umich.edu
76710234Syasuko.eckert@amd.com    double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty);
76810234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage	=
76910234Syasuko.eckert@amd.com        power.readOp.leakage * long_channel_device_reduction;
77010234Syasuko.eckert@amd.com    area.set_area(undifferentiated_core);
77110152Satgutier@umich.edu
77210234Syasuko.eckert@amd.com    scktRatio = g_tp.sckt_co_eff;
77310234Syasuko.eckert@amd.com    power.readOp.dynamic *= scktRatio;
77410234Syasuko.eckert@amd.com    power.writeOp.dynamic *= scktRatio;
77510234Syasuko.eckert@amd.com    power.searchOp.dynamic *= scktRatio;
77610234Syasuko.eckert@amd.com    macro_PR_overhead = g_tp.macro_layout_overhead;
77710234Syasuko.eckert@amd.com    area.set_area(area.get_area()*macro_PR_overhead);
77810152Satgutier@umich.edu
77910234Syasuko.eckert@amd.com    output_data.area = area.get_area() / 1e6;
78010234Syasuko.eckert@amd.com    output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
78110234Syasuko.eckert@amd.com    output_data.subthreshold_leakage_power =
78210234Syasuko.eckert@amd.com        longer_channel_device ? power.readOp.longer_channel_leakage :
78310234Syasuko.eckert@amd.com        power.readOp.leakage;
78410234Syasuko.eckert@amd.com    output_data.gate_leakage_power = power.readOp.gate_leakage;
78510152Satgutier@umich.edu}
78610152Satgutier@umich.edu
78710234Syasuko.eckert@amd.comInstructionDecoder::InstructionDecoder(XMLNode* _xml_data, const string _name,
78810234Syasuko.eckert@amd.com                                       bool _is_default,
78910234Syasuko.eckert@amd.com                                       const InputParameter *configure_interface,
79010234Syasuko.eckert@amd.com                                       int opcode_length_, int num_decoders_,
79110234Syasuko.eckert@amd.com                                       bool x86_,
79210234Syasuko.eckert@amd.com                                       double clockRate_,
79310234Syasuko.eckert@amd.com                                       enum Device_ty device_ty_,
79410234Syasuko.eckert@amd.com                                       enum Core_type core_ty_)
79510234Syasuko.eckert@amd.com    : McPATComponent(_xml_data), is_default(_is_default),
79610234Syasuko.eckert@amd.com      opcode_length(opcode_length_), num_decoders(num_decoders_), x86(x86_),
79710234Syasuko.eckert@amd.com      device_ty(device_ty_), core_ty(core_ty_) {
79810234Syasuko.eckert@amd.com    /*
79910234Syasuko.eckert@amd.com     * Instruction decoder is different from n to 2^n decoders
80010234Syasuko.eckert@amd.com     * that are commonly used in row decoders in memory arrays.
80110234Syasuko.eckert@amd.com     * The RISC instruction decoder is typically a very simple device.
80210234Syasuko.eckert@amd.com     * We can decode an instruction by simply
80310234Syasuko.eckert@amd.com     * separating the machine word into small parts using wire slices
80410234Syasuko.eckert@amd.com     * The RISC instruction decoder can be approximate by the n to 2^n decoders,
80510234Syasuko.eckert@amd.com     * although this approximation usually underestimate power since each decoded
80610234Syasuko.eckert@amd.com     * instruction normally has more than 1 active signal.
80710234Syasuko.eckert@amd.com     *
80810234Syasuko.eckert@amd.com     * However, decoding a CISC instruction word is much more difficult
80910234Syasuko.eckert@amd.com     * than the RISC case. A CISC decoder is typically set up as a state machine.
81010234Syasuko.eckert@amd.com     * The machine reads the opcode field to determine
81110234Syasuko.eckert@amd.com     * what type of instruction it is,
81210234Syasuko.eckert@amd.com     * and where the other data values are.
81310234Syasuko.eckert@amd.com     * The instruction word is read in piece by piece,
81410234Syasuko.eckert@amd.com     * and decisions are made at each stage as to
81510234Syasuko.eckert@amd.com     * how the remainder of the instruction word will be read.
81610234Syasuko.eckert@amd.com     * (sequencer and ROM are usually needed)
81710234Syasuko.eckert@amd.com     * An x86 decoder can be even more complex since
81810234Syasuko.eckert@amd.com     * it involve  both decoding instructions into u-ops and
81910234Syasuko.eckert@amd.com     * merge u-ops when doing micro-ops fusion.
82010234Syasuko.eckert@amd.com     */
82110234Syasuko.eckert@amd.com    name = _name;
82210234Syasuko.eckert@amd.com    clockRate = clockRate_;
82310234Syasuko.eckert@amd.com    bool is_dram = false;
82410234Syasuko.eckert@amd.com    double pmos_to_nmos_sizing_r;
82510234Syasuko.eckert@amd.com    double load_nmos_width, load_pmos_width;
82610234Syasuko.eckert@amd.com    double C_driver_load, R_wire_load;
82710234Syasuko.eckert@amd.com    Area cell;
82810152Satgutier@umich.edu
82910234Syasuko.eckert@amd.com    l_ip = *configure_interface;
83010234Syasuko.eckert@amd.com    local_result = init_interface(&l_ip, name);
83110234Syasuko.eckert@amd.com    cell.h = g_tp.cell_h_def;
83210234Syasuko.eckert@amd.com    cell.w = g_tp.cell_h_def;
83310152Satgutier@umich.edu
83410234Syasuko.eckert@amd.com    num_decoder_segments = (int)ceil(opcode_length / 18.0);
83510234Syasuko.eckert@amd.com    if (opcode_length > 18)	opcode_length = 18;
83610234Syasuko.eckert@amd.com    num_decoded_signals = (int)pow(2.0, opcode_length);
83710234Syasuko.eckert@amd.com    pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
83810234Syasuko.eckert@amd.com    load_nmos_width = g_tp.max_w_nmos_ / 2;
83910234Syasuko.eckert@amd.com    load_pmos_width = g_tp.max_w_nmos_ * pmos_to_nmos_sizing_r;
84010234Syasuko.eckert@amd.com    C_driver_load = 1024 * gate_C(load_nmos_width + load_pmos_width, 0, is_dram);
84110234Syasuko.eckert@amd.com    R_wire_load   = 3000 * l_ip.F_sz_um * g_tp.wire_outside_mat.R_per_um;
84210152Satgutier@umich.edu
84310234Syasuko.eckert@amd.com    final_dec = new Decoder(
84410234Syasuko.eckert@amd.com        num_decoded_signals,
84510234Syasuko.eckert@amd.com        false,
84610234Syasuko.eckert@amd.com        C_driver_load,
84710234Syasuko.eckert@amd.com        R_wire_load,
84810234Syasuko.eckert@amd.com        false/*is_fa*/,
84910234Syasuko.eckert@amd.com        false/*is_dram*/,
85010234Syasuko.eckert@amd.com        false/*wl_tr*/, //to use peri device
85110234Syasuko.eckert@amd.com        cell);
85210234Syasuko.eckert@amd.com
85310234Syasuko.eckert@amd.com    PredecBlk * predec_blk1 = new PredecBlk(
85410234Syasuko.eckert@amd.com        num_decoded_signals,
85510234Syasuko.eckert@amd.com        final_dec,
85610234Syasuko.eckert@amd.com        0,//Assuming predec and dec are back to back
85710234Syasuko.eckert@amd.com        0,
85810234Syasuko.eckert@amd.com        1,//Each Predec only drives one final dec
85910234Syasuko.eckert@amd.com        false/*is_dram*/,
86010234Syasuko.eckert@amd.com        true);
86110234Syasuko.eckert@amd.com    PredecBlk * predec_blk2 = new PredecBlk(
86210234Syasuko.eckert@amd.com        num_decoded_signals,
86310234Syasuko.eckert@amd.com        final_dec,
86410234Syasuko.eckert@amd.com        0,//Assuming predec and dec are back to back
86510234Syasuko.eckert@amd.com        0,
86610234Syasuko.eckert@amd.com        1,//Each Predec only drives one final dec
86710234Syasuko.eckert@amd.com        false/*is_dram*/,
86810234Syasuko.eckert@amd.com        false);
86910234Syasuko.eckert@amd.com
87010234Syasuko.eckert@amd.com    PredecBlkDrv * predec_blk_drv1 = new PredecBlkDrv(0, predec_blk1, false);
87110234Syasuko.eckert@amd.com    PredecBlkDrv * predec_blk_drv2 = new PredecBlkDrv(0, predec_blk2, false);
87210234Syasuko.eckert@amd.com
87310234Syasuko.eckert@amd.com    pre_dec            = new Predec(predec_blk_drv1, predec_blk_drv2);
87410234Syasuko.eckert@amd.com
87510234Syasuko.eckert@amd.com    double area_decoder = final_dec->area.get_area() * num_decoded_signals *
87610234Syasuko.eckert@amd.com        num_decoder_segments * num_decoders;
87710234Syasuko.eckert@amd.com    //double w_decoder    = area_decoder / area.get_h();
87810234Syasuko.eckert@amd.com    double area_pre_dec = (predec_blk_drv1->area.get_area() +
87910234Syasuko.eckert@amd.com                           predec_blk_drv2->area.get_area() +
88010234Syasuko.eckert@amd.com                           predec_blk1->area.get_area() +
88110234Syasuko.eckert@amd.com                           predec_blk2->area.get_area()) *
88210234Syasuko.eckert@amd.com                          num_decoder_segments * num_decoders;
88310234Syasuko.eckert@amd.com    area.set_area(area.get_area() + area_decoder + area_pre_dec);
88410234Syasuko.eckert@amd.com    double macro_layout_overhead   = g_tp.macro_layout_overhead;
88510234Syasuko.eckert@amd.com    double chip_PR_overhead        = g_tp.chip_layout_overhead;
88610234Syasuko.eckert@amd.com    area.set_area(area.get_area()*macro_layout_overhead*chip_PR_overhead);
88710234Syasuko.eckert@amd.com
88810234Syasuko.eckert@amd.com    inst_decoder_delay_power();
88910234Syasuko.eckert@amd.com
89010234Syasuko.eckert@amd.com    double sckRation = g_tp.sckt_co_eff;
89110234Syasuko.eckert@amd.com    power.readOp.dynamic *= sckRation;
89210234Syasuko.eckert@amd.com    power.writeOp.dynamic *= sckRation;
89310234Syasuko.eckert@amd.com    power.searchOp.dynamic *= sckRation;
89410234Syasuko.eckert@amd.com
89510234Syasuko.eckert@amd.com    double long_channel_device_reduction =
89610234Syasuko.eckert@amd.com        longer_channel_device_reduction(device_ty, core_ty);
89710234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage	= power.readOp.leakage *
89810234Syasuko.eckert@amd.com        long_channel_device_reduction;
89910234Syasuko.eckert@amd.com
90010234Syasuko.eckert@amd.com    output_data.area = area.get_area() / 1e6;
90110234Syasuko.eckert@amd.com    output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
90210234Syasuko.eckert@amd.com    output_data.subthreshold_leakage_power = power.readOp.leakage;
90310234Syasuko.eckert@amd.com    output_data.gate_leakage_power = power.readOp.gate_leakage;
90410152Satgutier@umich.edu}
90510152Satgutier@umich.edu
90610234Syasuko.eckert@amd.comvoid InstructionDecoder::inst_decoder_delay_power() {
90710152Satgutier@umich.edu
90810234Syasuko.eckert@amd.com    double dec_outrisetime;
90910234Syasuko.eckert@amd.com    double inrisetime = 0, outrisetime;
91010234Syasuko.eckert@amd.com    double pppm_t[4]    = {1, 1, 1, 1};
91110234Syasuko.eckert@amd.com    double squencer_passes = x86 ? 2 : 1;
91210152Satgutier@umich.edu
91310234Syasuko.eckert@amd.com    outrisetime = pre_dec->compute_delays(inrisetime);
91410234Syasuko.eckert@amd.com    dec_outrisetime = final_dec->compute_delays(outrisetime);
91510234Syasuko.eckert@amd.com    set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments, squencer_passes*num_decoder_segments, num_decoder_segments);
91610234Syasuko.eckert@amd.com    power = power + pre_dec->power * pppm_t;
91710234Syasuko.eckert@amd.com    set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments*num_decoded_signals,
91810234Syasuko.eckert@amd.com             num_decoder_segments*num_decoded_signals, squencer_passes*num_decoder_segments);
91910234Syasuko.eckert@amd.com    power = power + final_dec->power * pppm_t;
92010152Satgutier@umich.edu}
92110152Satgutier@umich.edu
92210234Syasuko.eckert@amd.comvoid InstructionDecoder::leakage_feedback(double temperature) {
92310152Satgutier@umich.edu  l_ip.temp = (unsigned int)round(temperature/10.0)*10;
92410234Syasuko.eckert@amd.com  uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy
92510152Satgutier@umich.edu
92610152Satgutier@umich.edu  final_dec->leakage_feedback(temperature);
92710152Satgutier@umich.edu  pre_dec->leakage_feedback(temperature);
92810152Satgutier@umich.edu
92910152Satgutier@umich.edu  double pppm_t[4]    = {1,1,1,1};
93010152Satgutier@umich.edu  double squencer_passes = x86?2:1;
93110152Satgutier@umich.edu
93210152Satgutier@umich.edu  set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments, squencer_passes*num_decoder_segments, num_decoder_segments);
93310152Satgutier@umich.edu  power = pre_dec->power*pppm_t;
93410152Satgutier@umich.edu
93510152Satgutier@umich.edu  set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments*num_decoded_signals,num_decoder_segments*num_decoded_signals, squencer_passes*num_decoder_segments);
93610152Satgutier@umich.edu  power = power + final_dec->power*pppm_t;
93710152Satgutier@umich.edu
93810152Satgutier@umich.edu  double sckRation = g_tp.sckt_co_eff;
93910152Satgutier@umich.edu
94010152Satgutier@umich.edu  power.readOp.dynamic *= sckRation;
94110152Satgutier@umich.edu  power.writeOp.dynamic *= sckRation;
94210152Satgutier@umich.edu  power.searchOp.dynamic *= sckRation;
94310152Satgutier@umich.edu
94410152Satgutier@umich.edu  double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
94510152Satgutier@umich.edu  power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction;
94610152Satgutier@umich.edu}
94710152Satgutier@umich.edu
94810234Syasuko.eckert@amd.comInstructionDecoder::~InstructionDecoder() {
94910234Syasuko.eckert@amd.com    local_result.cleanup();
95010152Satgutier@umich.edu
95110234Syasuko.eckert@amd.com    delete final_dec;
95210152Satgutier@umich.edu
95310234Syasuko.eckert@amd.com    delete pre_dec->blk1;
95410234Syasuko.eckert@amd.com    delete pre_dec->blk2;
95510234Syasuko.eckert@amd.com    delete pre_dec->drv1;
95610234Syasuko.eckert@amd.com    delete pre_dec->drv2;
95710234Syasuko.eckert@amd.com    delete pre_dec;
95810152Satgutier@umich.edu}
959