ext/mcpat/logic.cc

10152Satgutier@umich.edu/*****************************************************************************
10152Satgutier@umich.edu *                                McPAT
10152Satgutier@umich.edu *                      SOFTWARE LICENSE AGREEMENT
10152Satgutier@umich.edu *            Copyright 2012 Hewlett-Packard Development Company, L.P.
10234Syasuko.eckert@amd.com *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
10152Satgutier@umich.edu *                          All Rights Reserved
10152Satgutier@umich.edu *
10152Satgutier@umich.edu * Redistribution and use in source and binary forms, with or without
10152Satgutier@umich.edu * modification, are permitted provided that the following conditions are
10152Satgutier@umich.edu * met: redistributions of source code must retain the above copyright
10152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer;
10152Satgutier@umich.edu * redistributions in binary form must reproduce the above copyright
10152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer in the
10152Satgutier@umich.edu * documentation and/or other materials provided with the distribution;
10152Satgutier@umich.edu * neither the name of the copyright holders nor the names of its
10152Satgutier@umich.edu * contributors may be used to endorse or promote products derived from
10152Satgutier@umich.edu * this software without specific prior written permission.
10152Satgutier@umich.edu
10152Satgutier@umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
10152Satgutier@umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
10152Satgutier@umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
10152Satgutier@umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
10152Satgutier@umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
10152Satgutier@umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
10152Satgutier@umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
10152Satgutier@umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
10152Satgutier@umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
10152Satgutier@umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
10234Syasuko.eckert@amd.com * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
10152Satgutier@umich.edu *
10152Satgutier@umich.edu ***************************************************************************/
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com#include "common.h"
10152Satgutier@umich.edu#include "logic.h"
10152Satgutier@umich.edu
10152Satgutier@umich.edu//selection_logic
10234Syasuko.eckert@amd.comselection_logic::selection_logic(XMLNode* _xml_data, bool _is_default,
10234Syasuko.eckert@amd.com                                 int _win_entries, int issue_width_,
10234Syasuko.eckert@amd.com                                 const InputParameter *configure_interface,
10234Syasuko.eckert@amd.com                                 string _name, double _accesses,
10234Syasuko.eckert@amd.com                                 double clockRate_, enum Device_ty device_ty_,
10234Syasuko.eckert@amd.com                                 enum Core_type core_ty_)
10234Syasuko.eckert@amd.com    : McPATComponent(_xml_data), is_default(_is_default),
10234Syasuko.eckert@amd.com      win_entries(_win_entries),
10234Syasuko.eckert@amd.com      issue_width(issue_width_),
10234Syasuko.eckert@amd.com      accesses(_accesses),
10234Syasuko.eckert@amd.com      device_ty(device_ty_),
10234Syasuko.eckert@amd.com      core_ty(core_ty_) {
10234Syasuko.eckert@amd.com    clockRate = clockRate_;
10234Syasuko.eckert@amd.com    name = _name;
10234Syasuko.eckert@amd.com    l_ip = *configure_interface;
10234Syasuko.eckert@amd.com    local_result = init_interface(&l_ip, name);
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.comvoid selection_logic::computeArea() {
10234Syasuko.eckert@amd.com    output_data.area = local_result.area;
10234Syasuko.eckert@amd.com}
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.comvoid selection_logic::computeEnergy() {
10234Syasuko.eckert@amd.com    //based on cost effective superscalar processor TR pp27-31
10234Syasuko.eckert@amd.com    double Ctotal, Cor, Cpencode;
10234Syasuko.eckert@amd.com    int num_arbiter;
10234Syasuko.eckert@amd.com    double WSelORn, WSelORprequ, WSelPn, WSelPp, WSelEnn, WSelEnp;
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    //the 0.8um process data is used.
10234Syasuko.eckert@amd.com    //this was 10 micron for the 0.8 micron process
10234Syasuko.eckert@amd.com    WSelORn	= 12.5 * l_ip.F_sz_um;
10234Syasuko.eckert@amd.com    //this was 40 micron for the 0.8 micron process
10234Syasuko.eckert@amd.com    WSelORprequ = 50 * l_ip.F_sz_um;
10234Syasuko.eckert@amd.com    //this was 10mcron for the 0.8 micron process
10234Syasuko.eckert@amd.com    WSelPn = 12.5 * l_ip.F_sz_um;
10234Syasuko.eckert@amd.com    //this was 15 micron for the 0.8 micron process
10234Syasuko.eckert@amd.com    WSelPp = 18.75 * l_ip.F_sz_um;
10234Syasuko.eckert@amd.com    //this was 5 micron for the 0.8 micron process
10234Syasuko.eckert@amd.com    WSelEnn	= 6.25 * l_ip.F_sz_um;
10234Syasuko.eckert@amd.com    //this was 10 micron for the 0.8 micron process
10234Syasuko.eckert@amd.com    WSelEnp	= 12.5 * l_ip.F_sz_um;
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    Ctotal = 0;
10234Syasuko.eckert@amd.com    num_arbiter = 1;
10234Syasuko.eckert@amd.com    while (win_entries > 4) {
10234Syasuko.eckert@amd.com        win_entries = (int)ceil((double)win_entries / 4.0);
10234Syasuko.eckert@amd.com        num_arbiter += win_entries;
10234Syasuko.eckert@amd.com    }
10234Syasuko.eckert@amd.com    //the 4-input OR logic to generate anyreq
10234Syasuko.eckert@amd.com    Cor = 4 * drain_C_(WSelORn, NCH, 1, 1, g_tp.cell_h_def) +
10234Syasuko.eckert@amd.com        drain_C_(WSelORprequ, PCH, 1, 1, g_tp.cell_h_def);
10234Syasuko.eckert@amd.com    power.readOp.gate_leakage =
10234Syasuko.eckert@amd.com        cmos_Ig_leakage(WSelORn, WSelORprequ, 4, nor) * g_tp.peri_global.Vdd;
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    //The total capacity of the 4-bit priority encoder
10234Syasuko.eckert@amd.com    Cpencode = drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) +
10234Syasuko.eckert@amd.com        drain_C_(WSelPp, PCH, 1, 1, g_tp.cell_h_def) +
10234Syasuko.eckert@amd.com        2 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) +
10234Syasuko.eckert@amd.com        drain_C_(WSelPp, PCH, 2, 1, g_tp.cell_h_def) +
10234Syasuko.eckert@amd.com        3 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) +
10234Syasuko.eckert@amd.com        drain_C_(WSelPp, PCH, 3, 1, g_tp.cell_h_def) +
10234Syasuko.eckert@amd.com        4 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) +
10234Syasuko.eckert@amd.com        drain_C_(WSelPp, PCH, 4, 1, g_tp.cell_h_def) +//precompute priority logic
10234Syasuko.eckert@amd.com        2 * 4 * gate_C(WSelEnn + WSelEnp, 20.0) +
10234Syasuko.eckert@amd.com        4 * drain_C_(WSelEnn, NCH, 1, 1, g_tp.cell_h_def) +
10234Syasuko.eckert@amd.com        2 * 4 * drain_C_(WSelEnp, PCH, 1, 1, g_tp.cell_h_def) +//enable logic
10234Syasuko.eckert@amd.com        (2 * 4 + 2 * 3 + 2 * 2 + 2) *
10234Syasuko.eckert@amd.com        gate_C(WSelPn + WSelPp, 10.0);//requests signal
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    Ctotal += issue_width * num_arbiter * (Cor + Cpencode);
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    //2 means the abitration signal need to travel round trip
10234Syasuko.eckert@amd.com    power.readOp.dynamic =
10234Syasuko.eckert@amd.com        Ctotal * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 2;
10234Syasuko.eckert@amd.com    power.readOp.leakage = issue_width * num_arbiter *
10234Syasuko.eckert@amd.com        (cmos_Isub_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p
10234Syasuko.eckert@amd.com         + cmos_Isub_leakage(WSelPn, WSelPp, 3, nor)//grant2p
10234Syasuko.eckert@amd.com         + cmos_Isub_leakage(WSelPn, WSelPp, 4, nor)//grant3p
10234Syasuko.eckert@amd.com         + cmos_Isub_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic
10234Syasuko.eckert@amd.com         + cmos_Isub_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant sIsubnals
10234Syasuko.eckert@amd.com            ) * g_tp.peri_global.Vdd;
10234Syasuko.eckert@amd.com    power.readOp.gate_leakage = issue_width * num_arbiter *
10234Syasuko.eckert@amd.com        (cmos_Ig_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p
10234Syasuko.eckert@amd.com         + cmos_Ig_leakage(WSelPn, WSelPp, 3, nor)//grant2p
10234Syasuko.eckert@amd.com         + cmos_Ig_leakage(WSelPn, WSelPp, 4, nor)//grant3p
10234Syasuko.eckert@amd.com         + cmos_Ig_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic
10234Syasuko.eckert@amd.com         + cmos_Ig_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant signals
10234Syasuko.eckert@amd.com            ) * g_tp.peri_global.Vdd;
10234Syasuko.eckert@amd.com    double sckRation = g_tp.sckt_co_eff;
10234Syasuko.eckert@amd.com    power.readOp.dynamic *= sckRation;
10234Syasuko.eckert@amd.com    power.writeOp.dynamic *= sckRation;
10234Syasuko.eckert@amd.com    power.searchOp.dynamic *= sckRation;
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    double long_channel_device_reduction =
10234Syasuko.eckert@amd.com        longer_channel_device_reduction(device_ty, core_ty);
10234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage =
10234Syasuko.eckert@amd.com        power.readOp.leakage * long_channel_device_reduction;
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
10234Syasuko.eckert@amd.com    output_data.subthreshold_leakage_power = power.readOp.leakage;
10234Syasuko.eckert@amd.com    output_data.gate_leakage_power = power.readOp.gate_leakage;
10234Syasuko.eckert@amd.com    output_data.runtime_dynamic_energy = power.readOp.dynamic * accesses;
10234Syasuko.eckert@amd.com}
10152Satgutier@umich.edu
10152Satgutier@umich.edudep_resource_conflict_check::dep_resource_conflict_check(
10234Syasuko.eckert@amd.com    XMLNode* _xml_data, const string _name,
10234Syasuko.eckert@amd.com    const InputParameter *configure_interface,
10234Syasuko.eckert@amd.com    const CoreParameters & dyn_p_, int compare_bits_,
10234Syasuko.eckert@amd.com    double clockRate_, bool _is_default)
10234Syasuko.eckert@amd.com    : McPATComponent(_xml_data), l_ip(*configure_interface),
10234Syasuko.eckert@amd.com      coredynp(dyn_p_), compare_bits(compare_bits_), is_default(_is_default) {
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    name = _name;
10234Syasuko.eckert@amd.com    clockRate = clockRate_;
10234Syasuko.eckert@amd.com    //this was 20.0 micron for the 0.8 micron process
10234Syasuko.eckert@amd.com    Wcompn = 25 * l_ip.F_sz_um;
10234Syasuko.eckert@amd.com    //this was 20.0 micron for the 0.8 micron process
10234Syasuko.eckert@amd.com    Wevalinvp = 25 * l_ip.F_sz_um;
10234Syasuko.eckert@amd.com    //this was 80.0 mcron for the 0.8 micron process
10234Syasuko.eckert@amd.com    Wevalinvn = 100 * l_ip.F_sz_um;
10234Syasuko.eckert@amd.com    //this was 40.0  micron for the 0.8 micron process
10234Syasuko.eckert@amd.com    Wcomppreequ = 50 * l_ip.F_sz_um;
10234Syasuko.eckert@amd.com    //this was 5.4 micron for the 0.8 micron process
10234Syasuko.eckert@amd.com    WNORn =	6.75 * l_ip.F_sz_um;
10234Syasuko.eckert@amd.com    //this was 30.5 micron for the 0.8 micron process
10234Syasuko.eckert@amd.com    WNORp =	38.125 * l_ip.F_sz_um;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    // To make CACTI happy.
10234Syasuko.eckert@amd.com    l_ip.cache_sz = MIN_BUFFER_SIZE;
10234Syasuko.eckert@amd.com    local_result = init_interface(&l_ip, name);
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    if (coredynp.core_ty == Inorder)
10234Syasuko.eckert@amd.com        //TODO: opcode bits + log(shared resources) + REG TAG BITS -->
10234Syasuko.eckert@amd.com        //opcode comparator
10234Syasuko.eckert@amd.com        compare_bits += 16 + 8 + 8;
10234Syasuko.eckert@amd.com    else
10234Syasuko.eckert@amd.com        compare_bits += 16 + 8 + 8;
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    conflict_check_power();
10234Syasuko.eckert@amd.com    double sckRation = g_tp.sckt_co_eff;
10234Syasuko.eckert@amd.com    power.readOp.dynamic *= sckRation;
10234Syasuko.eckert@amd.com    power.writeOp.dynamic *= sckRation;
10234Syasuko.eckert@amd.com    power.searchOp.dynamic *= sckRation;
10152Satgutier@umich.edu
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.comvoid dep_resource_conflict_check::conflict_check_power() {
10234Syasuko.eckert@amd.com    double Ctotal;
10234Syasuko.eckert@amd.com    int num_comparators;
10234Syasuko.eckert@amd.com    //2(N*N-N) is used for source to dest comparison, (N*N-N) is used for
10234Syasuko.eckert@amd.com    //dest to dest comparision.
10234Syasuko.eckert@amd.com    num_comparators = 3 * ((coredynp.decodeW) * (coredynp.decodeW) -
10234Syasuko.eckert@amd.com                           coredynp.decodeW);
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    Ctotal = num_comparators * compare_cap();
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    power.readOp.dynamic = Ctotal * /*CLOCKRATE*/ g_tp.peri_global.Vdd *
10234Syasuko.eckert@amd.com        g_tp.peri_global.Vdd /*AF*/;
10234Syasuko.eckert@amd.com    power.readOp.leakage = num_comparators * compare_bits * 2 *
10234Syasuko.eckert@amd.com        simplified_nmos_leakage(Wcompn,  false);
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    double long_channel_device_reduction =
10234Syasuko.eckert@amd.com        longer_channel_device_reduction(Core_device, coredynp.core_ty);
10234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage	=
10234Syasuko.eckert@amd.com        power.readOp.leakage * long_channel_device_reduction;
10234Syasuko.eckert@amd.com    power.readOp.gate_leakage = num_comparators * compare_bits * 2 *
10234Syasuko.eckert@amd.com        cmos_Ig_leakage(Wcompn, 0, 2, nmos);
10152Satgutier@umich.edu
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10152Satgutier@umich.edu/* estimate comparator power consumption (this comparator is similar
10152Satgutier@umich.edu   to the tag-match structure in a CAM */
10234Syasuko.eckert@amd.comdouble dep_resource_conflict_check::compare_cap() {
10234Syasuko.eckert@amd.com    double c1, c2;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    //resize the big NOR gate at the DCL according to fan in.
10234Syasuko.eckert@amd.com    WNORp = WNORp * compare_bits / 2.0;
10234Syasuko.eckert@amd.com    /* bottom part of comparator */
10234Syasuko.eckert@amd.com    c2 = (compare_bits) * (drain_C_(Wcompn, NCH, 1, 1, g_tp.cell_h_def) +
10234Syasuko.eckert@amd.com                           drain_C_(Wcompn, NCH, 2, 1, g_tp.cell_h_def)) +
10234Syasuko.eckert@amd.com        drain_C_(Wevalinvp, PCH, 1, 1, g_tp.cell_h_def) +
10234Syasuko.eckert@amd.com        drain_C_(Wevalinvn, NCH, 1, 1, g_tp.cell_h_def);
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    /* top part of comparator */
10234Syasuko.eckert@amd.com    c1 = (compare_bits) * (drain_C_(Wcompn, NCH, 1, 1, g_tp.cell_h_def) +
10234Syasuko.eckert@amd.com                           drain_C_(Wcompn, NCH, 2, 1, g_tp.cell_h_def) +
10234Syasuko.eckert@amd.com                           drain_C_(Wcomppreequ, NCH, 1, 1, g_tp.cell_h_def)) +
10234Syasuko.eckert@amd.com        gate_C(WNORn + WNORp, 10.0) +
10234Syasuko.eckert@amd.com        drain_C_(WNORp, NCH, 2, 1, g_tp.cell_h_def) + compare_bits *
10234Syasuko.eckert@amd.com        drain_C_(WNORn, NCH, 2, 1, g_tp.cell_h_def);
10234Syasuko.eckert@amd.com    return(c1 + c2);
10152Satgutier@umich.edu
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10152Satgutier@umich.eduvoid dep_resource_conflict_check::leakage_feedback(double temperature)
10152Satgutier@umich.edu{
10152Satgutier@umich.edu  l_ip.temp = (unsigned int)round(temperature/10.0)*10;
10234Syasuko.eckert@amd.com  uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy
10152Satgutier@umich.edu
10152Satgutier@umich.edu  // This is part of conflict_check_power()
10234Syasuko.eckert@amd.com  // 2(N*N-N) is used for source to dest comparison, (N*N-N) is used for dest
10234Syasuko.eckert@amd.com  // to dest comparison.
10234Syasuko.eckert@amd.com  int num_comparators = 3 * ((coredynp.decodeW) * (coredynp.decodeW) -
10234Syasuko.eckert@amd.com                             coredynp.decodeW);
10234Syasuko.eckert@amd.com  power.readOp.leakage = num_comparators * compare_bits * 2 *
10234Syasuko.eckert@amd.com      simplified_nmos_leakage(Wcompn,  false);
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com  double long_channel_device_reduction =
10234Syasuko.eckert@amd.com      longer_channel_device_reduction(Core_device, coredynp.core_ty);
10234Syasuko.eckert@amd.com  power.readOp.longer_channel_leakage = power.readOp.leakage *
10234Syasuko.eckert@amd.com      long_channel_device_reduction;
10234Syasuko.eckert@amd.com  power.readOp.gate_leakage = num_comparators * compare_bits * 2 *
10234Syasuko.eckert@amd.com      cmos_Ig_leakage(Wcompn, 0, 2, nmos);
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10152Satgutier@umich.edu
10152Satgutier@umich.eduDFFCell::DFFCell(
10234Syasuko.eckert@amd.com    bool _is_dram,
10234Syasuko.eckert@amd.com    double _WdecNANDn,
10234Syasuko.eckert@amd.com    double _WdecNANDp,
10234Syasuko.eckert@amd.com    double _cell_load,
10234Syasuko.eckert@amd.com    const InputParameter *configure_interface)
10234Syasuko.eckert@amd.com        : is_dram(_is_dram),
10234Syasuko.eckert@amd.com        cell_load(_cell_load),
10234Syasuko.eckert@amd.com        WdecNANDn(_WdecNANDn),
10234Syasuko.eckert@amd.com        WdecNANDp(_WdecNANDp) { //this model is based on the NAND2 based DFF.
10234Syasuko.eckert@amd.com    l_ip = *configure_interface;
10234Syasuko.eckert@amd.com    area.set_area(5 * compute_gate_area(NAND, 2,WdecNANDn,WdecNANDp,
10234Syasuko.eckert@amd.com                                        g_tp.cell_h_def)
10234Syasuko.eckert@amd.com                  + compute_gate_area(NAND, 2,WdecNANDn,WdecNANDn,
10234Syasuko.eckert@amd.com                                      g_tp.cell_h_def));
10152Satgutier@umich.edu
10152Satgutier@umich.edu
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.comdouble DFFCell::fpfp_node_cap(unsigned int fan_in, unsigned int fan_out) {
10234Syasuko.eckert@amd.com    double Ctotal = 0;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    /* part 1: drain cap of NAND gate */
10234Syasuko.eckert@amd.com    Ctotal += drain_C_(WdecNANDn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + fan_in * drain_C_(WdecNANDp, PCH, 1, 1, g_tp.cell_h_def, is_dram);
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    /* part 2: gate cap of NAND gates */
10234Syasuko.eckert@amd.com    Ctotal += fan_out * gate_C(WdecNANDn + WdecNANDp, 0, is_dram);
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    return Ctotal;
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.comvoid DFFCell::compute_DFF_cell() {
10234Syasuko.eckert@amd.com    double c1, c2, c3, c4, c5, c6;
10234Syasuko.eckert@amd.com    /* node 5 and node 6 are identical to node 1 in capacitance */
10234Syasuko.eckert@amd.com    c1 = c5 = c6 = fpfp_node_cap(2, 1);
10234Syasuko.eckert@amd.com    c2 = fpfp_node_cap(2, 3);
10234Syasuko.eckert@amd.com    c3 = fpfp_node_cap(3, 2);
10234Syasuko.eckert@amd.com    c4 = fpfp_node_cap(2, 2);
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    //cap-load of the clock signal in each Dff, actually the clock signal only connected to one NAND2
10234Syasuko.eckert@amd.com    clock_cap = 2 * gate_C(WdecNANDn + WdecNANDp, 0, is_dram);
10234Syasuko.eckert@amd.com    e_switch.readOp.dynamic += (c4 + c1 + c2 + c3 + c5 + c6 + 2 * cell_load) *
10234Syasuko.eckert@amd.com        0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    /* no 1/2 for e_keep and e_clock because clock signal switches twice in one cycle */
10234Syasuko.eckert@amd.com    e_keep_1.readOp.dynamic +=
10234Syasuko.eckert@amd.com        c3 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ;
10234Syasuko.eckert@amd.com    e_keep_0.readOp.dynamic +=
10234Syasuko.eckert@amd.com        c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ;
10234Syasuko.eckert@amd.com    e_clock.readOp.dynamic +=
10234Syasuko.eckert@amd.com        clock_cap * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    /* static power */
10234Syasuko.eckert@amd.com    e_switch.readOp.leakage +=
10234Syasuko.eckert@amd.com        (cmos_Isub_leakage(WdecNANDn, WdecNANDp, 2, nand) *
10234Syasuko.eckert@amd.com         5//5 NAND2 and 1 NAND3 in a DFF
10234Syasuko.eckert@amd.com         + cmos_Isub_leakage(WdecNANDn, WdecNANDn, 3, nand)) *
10234Syasuko.eckert@amd.com        g_tp.peri_global.Vdd;
10234Syasuko.eckert@amd.com    e_switch.readOp.gate_leakage +=
10234Syasuko.eckert@amd.com        (cmos_Ig_leakage(WdecNANDn, WdecNANDp, 2, nand) *
10234Syasuko.eckert@amd.com         5//5 NAND2 and 1 NAND3 in a DFF
10234Syasuko.eckert@amd.com         + cmos_Ig_leakage(WdecNANDn, WdecNANDn, 3, nand)) *
10234Syasuko.eckert@amd.com        g_tp.peri_global.Vdd;
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.comPipeline::Pipeline(XMLNode* _xml_data,
10234Syasuko.eckert@amd.com                   const InputParameter *configure_interface,
10234Syasuko.eckert@amd.com                   const CoreParameters & dyn_p_,
10234Syasuko.eckert@amd.com                   enum Device_ty device_ty_,
10234Syasuko.eckert@amd.com                   bool _is_core_pipeline,
10234Syasuko.eckert@amd.com                   bool _is_default)
10234Syasuko.eckert@amd.com    : McPATComponent(_xml_data), l_ip(*configure_interface),
10234Syasuko.eckert@amd.com      coredynp(dyn_p_), device_ty(device_ty_),
10234Syasuko.eckert@amd.com      is_core_pipeline(_is_core_pipeline), is_default(_is_default),
10234Syasuko.eckert@amd.com      num_piperegs(0.0) {
10234Syasuko.eckert@amd.com    name = "Pipeline?";
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    local_result = init_interface(&l_ip, name);
10234Syasuko.eckert@amd.com    if (!coredynp.Embedded) {
10234Syasuko.eckert@amd.com        process_ind = true;
10234Syasuko.eckert@amd.com    } else {
10234Syasuko.eckert@amd.com        process_ind = false;
10234Syasuko.eckert@amd.com    }
10234Syasuko.eckert@amd.com    //this was  20 micron for the 0.8 micron process
10234Syasuko.eckert@amd.com    WNANDn = (process_ind) ? 25 * l_ip.F_sz_um : g_tp.min_w_nmos_ ;
10234Syasuko.eckert@amd.com    //this was  30 micron for the 0.8 micron process
10234Syasuko.eckert@amd.com    WNANDp = (process_ind) ? 37.5 * l_ip.F_sz_um : g_tp.min_w_nmos_ *
10234Syasuko.eckert@amd.com        pmos_to_nmos_sz_ratio();
10234Syasuko.eckert@amd.com    load_per_pipeline_stage = 2 * gate_C(WNANDn + WNANDp, 0, false);
10234Syasuko.eckert@amd.com    compute();
10152Satgutier@umich.edu
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.comvoid Pipeline::compute() {
10234Syasuko.eckert@amd.com    compute_stage_vector();
10234Syasuko.eckert@amd.com    DFFCell pipe_reg(false, WNANDn, WNANDp, load_per_pipeline_stage, &l_ip);
10234Syasuko.eckert@amd.com    pipe_reg.compute_DFF_cell();
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    double clock_power_pipereg = num_piperegs * pipe_reg.e_clock.readOp.dynamic;
10234Syasuko.eckert@amd.com    //******************pipeline power: currently, we average all the possibilities of the states of DFFs in the pipeline. A better way to do it is to consider
10234Syasuko.eckert@amd.com    //the harming distance of two consecutive signals, However McPAT does not have plan to do this in near future as it focuses on worst case power.
10234Syasuko.eckert@amd.com    double pipe_reg_power = num_piperegs *
10234Syasuko.eckert@amd.com        (pipe_reg.e_switch.readOp.dynamic + pipe_reg.e_keep_0.readOp.dynamic +
10234Syasuko.eckert@amd.com         pipe_reg.e_keep_1.readOp.dynamic) / 3 + clock_power_pipereg;
10234Syasuko.eckert@amd.com    double pipe_reg_leakage = num_piperegs * pipe_reg.e_switch.readOp.leakage;
10234Syasuko.eckert@amd.com    double pipe_reg_gate_leakage = num_piperegs *
10234Syasuko.eckert@amd.com        pipe_reg.e_switch.readOp.gate_leakage;
10234Syasuko.eckert@amd.com    power.readOp.dynamic	+= pipe_reg_power;
10234Syasuko.eckert@amd.com    power.readOp.leakage	+= pipe_reg_leakage;
10234Syasuko.eckert@amd.com    power.readOp.gate_leakage	+= pipe_reg_gate_leakage;
10234Syasuko.eckert@amd.com    area.set_area(num_piperegs * pipe_reg.area.get_area());
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    double long_channel_device_reduction =
10234Syasuko.eckert@amd.com        longer_channel_device_reduction(device_ty, coredynp.core_ty);
10234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage	= power.readOp.leakage *
10234Syasuko.eckert@amd.com        long_channel_device_reduction;
10152Satgutier@umich.edu
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    double sckRation = g_tp.sckt_co_eff;
10234Syasuko.eckert@amd.com    power.readOp.dynamic *= sckRation;
10234Syasuko.eckert@amd.com    power.writeOp.dynamic *= sckRation;
10234Syasuko.eckert@amd.com    power.searchOp.dynamic *= sckRation;
10234Syasuko.eckert@amd.com    double macro_layout_overhead = g_tp.macro_layout_overhead;
10152Satgutier@umich.edu        if (!coredynp.Embedded)
10234Syasuko.eckert@amd.com                area.set_area(area.get_area() * macro_layout_overhead);
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    output_data.area = area.get_area() / 1e6;
10234Syasuko.eckert@amd.com    output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
10234Syasuko.eckert@amd.com    output_data.subthreshold_leakage_power = power.readOp.leakage;
10234Syasuko.eckert@amd.com    output_data.gate_leakage_power = power.readOp.gate_leakage;
10234Syasuko.eckert@amd.com    output_data.runtime_dynamic_energy = power.readOp.dynamic * total_cycles;
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.comvoid Pipeline::compute_stage_vector() {
10234Syasuko.eckert@amd.com    double num_stages, tot_stage_vector, per_stage_vector;
10234Syasuko.eckert@amd.com    int opcode_length = coredynp.x86 ?
10234Syasuko.eckert@amd.com        coredynp.micro_opcode_length : coredynp.opcode_width;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    if (!is_core_pipeline) {
10234Syasuko.eckert@amd.com        //The number of pipeline stages are calculated based on the achievable
10234Syasuko.eckert@amd.com        //throughput and required throughput
10234Syasuko.eckert@amd.com        num_piperegs = l_ip.pipeline_stages * l_ip.per_stage_vector;
10234Syasuko.eckert@amd.com    } else {
10234Syasuko.eckert@amd.com        if (coredynp.core_ty == Inorder) {
10234Syasuko.eckert@amd.com            /* assume 6 pipe stages and try to estimate bits per pipe stage */
10234Syasuko.eckert@amd.com            /* pipe stage 0/IF */
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.pc_width * 2 * coredynp.num_hthreads;
10234Syasuko.eckert@amd.com            /* pipe stage IF/ID */
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.fetchW *
10234Syasuko.eckert@amd.com                (coredynp.instruction_length + coredynp.pc_width) *
10234Syasuko.eckert@amd.com                coredynp.num_hthreads;
10234Syasuko.eckert@amd.com            /* pipe stage IF/ThreadSEL */
10234Syasuko.eckert@amd.com            if (coredynp.multithreaded) {
10234Syasuko.eckert@amd.com                num_piperegs += coredynp.num_hthreads *
10234Syasuko.eckert@amd.com                    coredynp.perThreadState; //8 bit thread states
10234Syasuko.eckert@amd.com            }
10234Syasuko.eckert@amd.com            /* pipe stage ID/EXE */
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.decodeW *
10234Syasuko.eckert@amd.com                (coredynp.instruction_length + coredynp.pc_width +
10234Syasuko.eckert@amd.com                 pow(2.0, opcode_length) + 2 * coredynp.int_data_width) *
10234Syasuko.eckert@amd.com                coredynp.num_hthreads;
10234Syasuko.eckert@amd.com            /* pipe stage EXE/MEM */
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
10234Syasuko.eckert@amd.com                (3 * coredynp.arch_ireg_width + pow(2.0, opcode_length) + 8 *
10234Syasuko.eckert@amd.com                 2 * coredynp.int_data_width/*+2*powers (2,reg_length)*/);
10234Syasuko.eckert@amd.com            /* pipe stage MEM/WB the 2^opcode_length means the total decoded signal for the opcode*/
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
10234Syasuko.eckert@amd.com                (2 * coredynp.int_data_width + pow(2.0, opcode_length) + 8 *
10234Syasuko.eckert@amd.com                 2 * coredynp.int_data_width/*+2*powers (2,reg_length)*/);
10234Syasuko.eckert@amd.com            num_stages = 6;
10234Syasuko.eckert@amd.com        } else {
10234Syasuko.eckert@amd.com            /* assume 12 stage pipe stages and try to estimate bits per pipe stage */
10234Syasuko.eckert@amd.com            /*OOO: Fetch, decode, rename, IssueQ, dispatch, regread, EXE, MEM, WB, CM */
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com            /* pipe stage 0/1F*/
10234Syasuko.eckert@amd.com            num_piperegs +=
10234Syasuko.eckert@amd.com                coredynp.pc_width * 2 * coredynp.num_hthreads ;//PC and Next PC
10234Syasuko.eckert@amd.com            /* pipe stage IF/ID */
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.fetchW *
10234Syasuko.eckert@amd.com                (coredynp.instruction_length + coredynp.pc_width) *
10234Syasuko.eckert@amd.com                coredynp.num_hthreads;//PC is used to feed branch predictor in ID
10234Syasuko.eckert@amd.com            /* pipe stage 1D/Renaming*/
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.decodeW *
10234Syasuko.eckert@amd.com                (coredynp.instruction_length + coredynp.pc_width) *
10234Syasuko.eckert@amd.com                coredynp.num_hthreads;//PC is for branch exe in later stage.
10234Syasuko.eckert@amd.com            /* pipe stage Renaming/wire_drive */
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.decodeW *
10234Syasuko.eckert@amd.com                (coredynp.instruction_length + coredynp.pc_width);
10234Syasuko.eckert@amd.com            /* pipe stage Renaming/IssueQ */
10234Syasuko.eckert@amd.com            //3*coredynp.phy_ireg_width means 2 sources and 1 dest
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
10234Syasuko.eckert@amd.com                (coredynp.instruction_length  + coredynp.pc_width + 3 *
10234Syasuko.eckert@amd.com                 coredynp.phy_ireg_width) * coredynp.num_hthreads;
10234Syasuko.eckert@amd.com            /* pipe stage IssueQ/Dispatch */
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
10234Syasuko.eckert@amd.com                (coredynp.instruction_length + 3 * coredynp.phy_ireg_width);
10234Syasuko.eckert@amd.com            /* pipe stage Dispatch/EXE */
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
10234Syasuko.eckert@amd.com                (3 * coredynp.phy_ireg_width + coredynp.pc_width +
10234Syasuko.eckert@amd.com                 pow(2.0, opcode_length)/*+2*powers (2,reg_length)*/);
10234Syasuko.eckert@amd.com            /* 2^opcode_length means the total decoded signal for the opcode*/
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
10234Syasuko.eckert@amd.com                (2 * coredynp.int_data_width + pow(2.0, opcode_length)
10234Syasuko.eckert@amd.com                 /*+2*powers (2,reg_length)*/);
10234Syasuko.eckert@amd.com            /*2 source operands in EXE; Assume 2EXE stages* since we do not really distinguish OP*/
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
10234Syasuko.eckert@amd.com                (2 * coredynp.int_data_width + pow(2.0, opcode_length)
10234Syasuko.eckert@amd.com                 /*+2*powers (2,reg_length)*/);
10234Syasuko.eckert@amd.com            /* pipe stage EXE/MEM, data need to be read/write, address*/
10234Syasuko.eckert@amd.com            //memory Opcode still need to be passed
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
10234Syasuko.eckert@amd.com                (coredynp.int_data_width + coredynp.v_address_width +
10234Syasuko.eckert@amd.com                 pow(2.0, opcode_length)/*+2*powers (2,reg_length)*/);
10234Syasuko.eckert@amd.com            /* pipe stage MEM/WB; result data, writeback regs */
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.issueW *
10234Syasuko.eckert@amd.com                (coredynp.int_data_width + coredynp.phy_ireg_width
10234Syasuko.eckert@amd.com                 /* powers (2,opcode_length) +
10234Syasuko.eckert@amd.com                    (2,opcode_length)+2*powers (2,reg_length)*/);
10234Syasuko.eckert@amd.com            /* pipe stage WB/CM ; result data, regs need to be updated, address for resolve memory ops in ROB's top*/
10234Syasuko.eckert@amd.com            num_piperegs += coredynp.commitW *
10234Syasuko.eckert@amd.com                (coredynp.int_data_width + coredynp.v_address_width +
10234Syasuko.eckert@amd.com                 coredynp.phy_ireg_width
10234Syasuko.eckert@amd.com                 /*+ powers (2,opcode_length)*2*powers (2,reg_length)*/) *
10234Syasuko.eckert@amd.com                coredynp.num_hthreads;
10234Syasuko.eckert@amd.com            num_stages = 12;
10152Satgutier@umich.edu
10152Satgutier@umich.edu        }
10152Satgutier@umich.edu
10152Satgutier@umich.edu        /* assume 50% extra in control registers and interrupt registers (rule of thumb) */
10152Satgutier@umich.edu        num_piperegs = num_piperegs * 1.5;
10234Syasuko.eckert@amd.com        tot_stage_vector = num_piperegs;
10234Syasuko.eckert@amd.com        per_stage_vector = tot_stage_vector / num_stages;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com        if (coredynp.core_ty == Inorder) {
10234Syasuko.eckert@amd.com            if (coredynp.pipeline_stages > 6)
10234Syasuko.eckert@amd.com                num_piperegs = per_stage_vector * coredynp.pipeline_stages;
10234Syasuko.eckert@amd.com        } else { //OOO
10234Syasuko.eckert@amd.com            if (coredynp.pipeline_stages > 12)
10234Syasuko.eckert@amd.com                num_piperegs = per_stage_vector * coredynp.pipeline_stages;
10152Satgutier@umich.edu        }
10234Syasuko.eckert@amd.com    }
10152Satgutier@umich.edu
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.comFunctionalUnit::FunctionalUnit(XMLNode* _xml_data,
10234Syasuko.eckert@amd.com                               InputParameter* interface_ip_,
10234Syasuko.eckert@amd.com                               const CoreParameters & _core_params,
10234Syasuko.eckert@amd.com                               const CoreStatistics & _core_stats,
10234Syasuko.eckert@amd.com                               enum FU_type fu_type_)
10234Syasuko.eckert@amd.com    : McPATComponent(_xml_data),
10234Syasuko.eckert@amd.com      interface_ip(*interface_ip_), core_params(_core_params),
10234Syasuko.eckert@amd.com      core_stats(_core_stats), fu_type(fu_type_) {
10234Syasuko.eckert@amd.com    double area_t;
10234Syasuko.eckert@amd.com    double leakage;
10234Syasuko.eckert@amd.com    double gate_leakage;
10152Satgutier@umich.edu    double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
10234Syasuko.eckert@amd.com    clockRate = core_params.clockRate;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    uca_org_t result2;
10234Syasuko.eckert@amd.com    // Temp name for the following function call
10234Syasuko.eckert@amd.com    name = "Functional Unit";
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    result2 = init_interface(&interface_ip, name);
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com        if (core_params.Embedded) {
10234Syasuko.eckert@amd.com            if (fu_type == FPU) {
10234Syasuko.eckert@amd.com                num_fu=core_params.num_fpus;
10152Satgutier@umich.edu                        //area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2
10152Satgutier@umich.edu                        area_t = 4.47*1e6*(g_ip->F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2 The base number
10152Satgutier@umich.edu                        //4.47 contains both VFP and NEON processing unit, VFP is about 40% and NEON is about 60%
10152Satgutier@umich.edu                        if (g_ip->F_sz_nm>90)
10152Satgutier@umich.edu                                area_t = 4.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2
10152Satgutier@umich.edu                        leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
10152Satgutier@umich.edu                        gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
10152Satgutier@umich.edu                        //energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction in FPU usually it can have up to 20 cycles.
10152Satgutier@umich.edu//			base_energy = coredynp.core_ty==Inorder? 0: 89e-3*3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch)
10152Satgutier@umich.edu//			base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);
10152Satgutier@umich.edu                        base_energy = 0;
10152Satgutier@umich.edu                        per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per Hz energy(nJ)
10152Satgutier@umich.edu                        //FPU power from Sandia's processor sizing tech report
10152Satgutier@umich.edu                        FU_height=(18667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data
10234Syasuko.eckert@amd.com            } else if (fu_type == ALU) {
10234Syasuko.eckert@amd.com                num_fu=core_params.num_alus;
10152Satgutier@umich.edu                        area_t = 280*260*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl
10152Satgutier@umich.edu                        leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
10152Satgutier@umich.edu                        gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
10152Satgutier@umich.edu//			base_energy = coredynp.core_ty==Inorder? 0:89e-3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch)
10152Satgutier@umich.edu//			base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);
10152Satgutier@umich.edu                        base_energy = 0;
10152Satgutier@umich.edu                        per_access_energy = 1.15/3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ)
10152Satgutier@umich.edu                        FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com            } else if (fu_type == MUL) {
10234Syasuko.eckert@amd.com                num_fu=core_params.num_muls;
10152Satgutier@umich.edu                        area_t = 280*260*3*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl
10152Satgutier@umich.edu                        leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
10152Satgutier@umich.edu                        gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
10152Satgutier@umich.edu//			base_energy = coredynp.core_ty==Inorder? 0:89e-3*2; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch)
10152Satgutier@umich.edu//			base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);
10152Satgutier@umich.edu                        base_energy = 0;
10152Satgutier@umich.edu                        per_access_energy = 1.15*2/3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch
10152Satgutier@umich.edu                        FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data
10234Syasuko.eckert@amd.com            } else {
10152Satgutier@umich.edu                        cout<<"Unknown Functional Unit Type"<<endl;
10152Satgutier@umich.edu                        exit(0);
10152Satgutier@umich.edu                }
10152Satgutier@umich.edu                per_access_energy *=0.5;//According to ARM data embedded processor has much lower per acc energy
10234Syasuko.eckert@amd.com        } else {
10234Syasuko.eckert@amd.com            if (fu_type == FPU) {
10234Syasuko.eckert@amd.com                name = "Floating Point Unit(s)";
10234Syasuko.eckert@amd.com                num_fu = core_params.num_fpus;
10234Syasuko.eckert@amd.com                area_t = 8.47 * 1e6 * (g_ip->F_sz_nm * g_ip->F_sz_nm / 90.0 /
10234Syasuko.eckert@amd.com                                       90.0);//this is um^2
10234Syasuko.eckert@amd.com                if (g_ip->F_sz_nm > 90)
10234Syasuko.eckert@amd.com                    area_t = 8.47 * 1e6 *
10234Syasuko.eckert@amd.com                        g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2
10234Syasuko.eckert@amd.com            leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
10234Syasuko.eckert@amd.com            gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
10234Syasuko.eckert@amd.com            //W The base energy of ALU average numbers from Intel 4G and
10234Syasuko.eckert@amd.com            //773Mhz (Wattch)
10234Syasuko.eckert@amd.com            base_energy = core_params.core_ty == Inorder ? 0 : 89e-3 * 3;
10234Syasuko.eckert@amd.com            base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 /
10234Syasuko.eckert@amd.com                            1.2);
10234Syasuko.eckert@amd.com            per_access_energy = 1.15*3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per op energy(nJ)
10234Syasuko.eckert@amd.com            FU_height=(38667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data
10234Syasuko.eckert@amd.com        } else if (fu_type == ALU) {
10234Syasuko.eckert@amd.com            name = "Integer ALU(s)";
10234Syasuko.eckert@amd.com            num_fu = core_params.num_alus;
10234Syasuko.eckert@amd.com            //this is um^2 ALU + MUl
10234Syasuko.eckert@amd.com            area_t = 280 * 260 * 2 * g_tp.scaling_factor.logic_scaling_co_eff;
10234Syasuko.eckert@amd.com            leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
10234Syasuko.eckert@amd.com            gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
10234Syasuko.eckert@amd.com            //W The base energy of ALU average numbers from Intel 4G and 773Mhz
10234Syasuko.eckert@amd.com            //(Wattch)
10234Syasuko.eckert@amd.com            base_energy = core_params.core_ty == Inorder ? 0 : 89e-3;
10234Syasuko.eckert@amd.com            base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 /
10234Syasuko.eckert@amd.com                            1.2);
10234Syasuko.eckert@amd.com            per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ)
10234Syasuko.eckert@amd.com            FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU
10234Syasuko.eckert@amd.com        } else if (fu_type == MUL) {
10234Syasuko.eckert@amd.com            name = "Multiply/Divide Unit(s)";
10234Syasuko.eckert@amd.com            num_fu = core_params.num_muls;
10234Syasuko.eckert@amd.com            //this is um^2 ALU + MUl
10234Syasuko.eckert@amd.com            area_t = 280 * 260 * 2 * 3 *
10234Syasuko.eckert@amd.com                g_tp.scaling_factor.logic_scaling_co_eff;
10234Syasuko.eckert@amd.com            leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
10234Syasuko.eckert@amd.com            gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
10234Syasuko.eckert@amd.com            //W The base energy of ALU average numbers from Intel 4G and 773Mhz
10234Syasuko.eckert@amd.com            //(Wattch)
10234Syasuko.eckert@amd.com            base_energy = core_params.core_ty == Inorder ? 0 : 89e-3 * 2;
10234Syasuko.eckert@amd.com            base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 /
10234Syasuko.eckert@amd.com                            1.2);
10234Syasuko.eckert@amd.com            per_access_energy = 1.15*2/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch
10234Syasuko.eckert@amd.com            FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data
10234Syasuko.eckert@amd.com        } else {
10234Syasuko.eckert@amd.com            cout << "Unknown Functional Unit Type" << endl;
10234Syasuko.eckert@amd.com            exit(0);
10152Satgutier@umich.edu        }
10234Syasuko.eckert@amd.com    }
10152Satgutier@umich.edu
10152Satgutier@umich.edu    area.set_area(area_t*num_fu);
10234Syasuko.eckert@amd.com    power.readOp.leakage = leakage * num_fu;
10234Syasuko.eckert@amd.com    power.readOp.gate_leakage = gate_leakage * num_fu;
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    double long_channel_device_reduction =
10234Syasuko.eckert@amd.com        longer_channel_device_reduction(Core_device, core_params.core_ty);
10234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage	=
10234Syasuko.eckert@amd.com        power.readOp.leakage * long_channel_device_reduction;
10234Syasuko.eckert@amd.com    double macro_layout_overhead = g_tp.macro_layout_overhead;
10234Syasuko.eckert@amd.com    area.set_area(area.get_area()*macro_layout_overhead);
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.comvoid FunctionalUnit::computeEnergy() {
10234Syasuko.eckert@amd.com    double pppm_t[4]    = {1, 1, 1, 1};
10234Syasuko.eckert@amd.com    double FU_duty_cycle;
10234Syasuko.eckert@amd.com    double sckRation = g_tp.sckt_co_eff;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    // TDP power calculation
10234Syasuko.eckert@amd.com    //2 means two source operands needs to be passed for each int instruction.
10234Syasuko.eckert@amd.com    set_pppm(pppm_t, 2, 2, 2, 2);
10234Syasuko.eckert@amd.com    tdp_stats.readAc.access = num_fu;
10234Syasuko.eckert@amd.com    if (fu_type == FPU) {
10234Syasuko.eckert@amd.com        FU_duty_cycle = core_stats.FPU_duty_cycle;
10234Syasuko.eckert@amd.com    } else if (fu_type == ALU) {
10234Syasuko.eckert@amd.com        FU_duty_cycle = core_stats.ALU_duty_cycle;
10234Syasuko.eckert@amd.com    } else if (fu_type == MUL) {
10234Syasuko.eckert@amd.com        FU_duty_cycle = core_stats.MUL_duty_cycle;
10234Syasuko.eckert@amd.com    }
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    power.readOp.dynamic =
10234Syasuko.eckert@amd.com        per_access_energy * tdp_stats.readAc.access + base_energy / clockRate;
10234Syasuko.eckert@amd.com    power.readOp.dynamic *= sckRation * FU_duty_cycle;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    // Runtime power calculation
10234Syasuko.eckert@amd.com    if (fu_type == FPU) {
10234Syasuko.eckert@amd.com        rtp_stats.readAc.access = core_stats.fpu_accesses;
10234Syasuko.eckert@amd.com    } else if (fu_type == ALU) {
10234Syasuko.eckert@amd.com        rtp_stats.readAc.access = core_stats.ialu_accesses;
10234Syasuko.eckert@amd.com    } else if (fu_type == MUL) {
10234Syasuko.eckert@amd.com        rtp_stats.readAc.access = core_stats.mul_accesses;
10234Syasuko.eckert@amd.com    }
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    rt_power.readOp.dynamic = per_access_energy * rtp_stats.readAc.access +
10234Syasuko.eckert@amd.com        base_energy * execution_time;
10234Syasuko.eckert@amd.com    rt_power.readOp.dynamic *= sckRation;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    output_data.area = area.get_area() / 1e6;
10234Syasuko.eckert@amd.com    output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
10234Syasuko.eckert@amd.com    output_data.subthreshold_leakage_power =
10234Syasuko.eckert@amd.com        (longer_channel_device) ? power.readOp.longer_channel_leakage :
10234Syasuko.eckert@amd.com        power.readOp.leakage;
10234Syasuko.eckert@amd.com    output_data.gate_leakage_power = power.readOp.gate_leakage;
10234Syasuko.eckert@amd.com    output_data.runtime_dynamic_energy = rt_power.readOp.dynamic;
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10152Satgutier@umich.eduvoid FunctionalUnit::leakage_feedback(double temperature)
10152Satgutier@umich.edu{
10152Satgutier@umich.edu  // Update the temperature and initialize the global interfaces.
10152Satgutier@umich.edu  interface_ip.temp = (unsigned int)round(temperature/10.0)*10;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com  // init_result is dummy
10234Syasuko.eckert@amd.com  uca_org_t init_result = init_interface(&interface_ip, name);
10152Satgutier@umich.edu
10152Satgutier@umich.edu  // This is part of FunctionalUnit()
10152Satgutier@umich.edu  double area_t, leakage, gate_leakage;
10152Satgutier@umich.edu  double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
10152Satgutier@umich.edu
10152Satgutier@umich.edu  if (fu_type == FPU)
10152Satgutier@umich.edu  {
10152Satgutier@umich.edu        area_t = 4.47*1e6*(g_ip->F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2 The base number
10152Satgutier@umich.edu        if (g_ip->F_sz_nm>90)
10152Satgutier@umich.edu                area_t = 4.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2
10152Satgutier@umich.edu        leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
10152Satgutier@umich.edu        gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
10152Satgutier@umich.edu  }
10152Satgutier@umich.edu  else if (fu_type == ALU)
10152Satgutier@umich.edu  {
10152Satgutier@umich.edu    area_t = 280*260*2*num_fu*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl
10152Satgutier@umich.edu    leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
10152Satgutier@umich.edu    gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
10152Satgutier@umich.edu  }
10152Satgutier@umich.edu  else if (fu_type == MUL)
10152Satgutier@umich.edu  {
10152Satgutier@umich.edu    area_t = 280*260*2*3*num_fu*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl
10152Satgutier@umich.edu    leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W
10152Satgutier@umich.edu    gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;
10152Satgutier@umich.edu  }
10152Satgutier@umich.edu  else
10152Satgutier@umich.edu  {
10152Satgutier@umich.edu    cout<<"Unknown Functional Unit Type"<<endl;
10152Satgutier@umich.edu    exit(1);
10152Satgutier@umich.edu  }
10152Satgutier@umich.edu
10152Satgutier@umich.edu  power.readOp.leakage = leakage*num_fu;
10152Satgutier@umich.edu  power.readOp.gate_leakage = gate_leakage*num_fu;
10234Syasuko.eckert@amd.com  power.readOp.longer_channel_leakage =
10234Syasuko.eckert@amd.com      longer_channel_device_reduction(Core_device, core_params.core_ty);
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.comUndiffCore::UndiffCore(XMLNode* _xml_data, InputParameter* interface_ip_,
10234Syasuko.eckert@amd.com                       const CoreParameters & dyn_p_,
10234Syasuko.eckert@amd.com                       bool exist_)
10234Syasuko.eckert@amd.com        : McPATComponent(_xml_data),
10234Syasuko.eckert@amd.com        interface_ip(*interface_ip_), coredynp(dyn_p_),
10234Syasuko.eckert@amd.com        core_ty(coredynp.core_ty), embedded(coredynp.Embedded),
10234Syasuko.eckert@amd.com        pipeline_stage(coredynp.pipeline_stages),
10234Syasuko.eckert@amd.com        num_hthreads(coredynp.num_hthreads), issue_width(coredynp.issueW),
10234Syasuko.eckert@amd.com        exist(exist_) {
10234Syasuko.eckert@amd.com    if (!exist) return;
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    name = "Undifferentiated Core";
10234Syasuko.eckert@amd.com    clockRate = coredynp.clockRate;
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    double undifferentiated_core = 0;
10234Syasuko.eckert@amd.com    double core_tx_density = 0;
10234Syasuko.eckert@amd.com    double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
10152Satgutier@umich.edu        double undifferentiated_core_coe;
10234Syasuko.eckert@amd.com    uca_org_t result2;
10234Syasuko.eckert@amd.com    result2 = init_interface(&interface_ip, name);
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    //Compute undifferentiated core area at 90nm.
10234Syasuko.eckert@amd.com    if (embedded == false) {
10234Syasuko.eckert@amd.com        //Based on the results of polynomial/log curve fitting based on undifferentiated core of Niagara, Niagara2, Merom, Penyrn, Prescott, Opteron die measurements
10234Syasuko.eckert@amd.com        if (core_ty == OOO) {
10234Syasuko.eckert@amd.com            undifferentiated_core = (3.57 * log(pipeline_stage) - 1.2643) > 0 ?
10234Syasuko.eckert@amd.com                (3.57 * log(pipeline_stage) - 1.2643) : 0;
10234Syasuko.eckert@amd.com        } else if (core_ty == Inorder) {
10234Syasuko.eckert@amd.com            undifferentiated_core = (-2.19 * log(pipeline_stage) + 6.55) > 0 ?
10234Syasuko.eckert@amd.com                (-2.19 * log(pipeline_stage) + 6.55) : 0;
10234Syasuko.eckert@amd.com        } else {
10234Syasuko.eckert@amd.com            cout << "invalid core type" << endl;
10234Syasuko.eckert@amd.com            exit(0);
10152Satgutier@umich.edu        }
10234Syasuko.eckert@amd.com        undifferentiated_core *= (1 + logtwo(num_hthreads) * 0.0716);
10234Syasuko.eckert@amd.com    } else {
10234Syasuko.eckert@amd.com        //Based on the results in paper "parametrized processor models" Sandia Labs
10234Syasuko.eckert@amd.com                if (opt_for_clk)
10152Satgutier@umich.edu                        undifferentiated_core_coe = 0.05;
10152Satgutier@umich.edu                else
10152Satgutier@umich.edu                        undifferentiated_core_coe = 0;
10234Syasuko.eckert@amd.com                undifferentiated_core = (0.4109 * pipeline_stage - 0.776) *
10234Syasuko.eckert@amd.com                    undifferentiated_core_coe;
10234Syasuko.eckert@amd.com                undifferentiated_core *= (1 + logtwo(num_hthreads) * 0.0426);
10234Syasuko.eckert@amd.com    }
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    undifferentiated_core *= g_tp.scaling_factor.logic_scaling_co_eff *
10234Syasuko.eckert@amd.com        1e6;//change from mm^2 to um^2
10234Syasuko.eckert@amd.com    core_tx_density                 = g_tp.scaling_factor.core_tx_density;
10234Syasuko.eckert@amd.com    power.readOp.leakage = undifferentiated_core*(core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W
10234Syasuko.eckert@amd.com    power.readOp.gate_leakage = undifferentiated_core*(core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty);
10234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage	=
10234Syasuko.eckert@amd.com        power.readOp.leakage * long_channel_device_reduction;
10234Syasuko.eckert@amd.com    area.set_area(undifferentiated_core);
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    scktRatio = g_tp.sckt_co_eff;
10234Syasuko.eckert@amd.com    power.readOp.dynamic *= scktRatio;
10234Syasuko.eckert@amd.com    power.writeOp.dynamic *= scktRatio;
10234Syasuko.eckert@amd.com    power.searchOp.dynamic *= scktRatio;
10234Syasuko.eckert@amd.com    macro_PR_overhead = g_tp.macro_layout_overhead;
10234Syasuko.eckert@amd.com    area.set_area(area.get_area()*macro_PR_overhead);
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    output_data.area = area.get_area() / 1e6;
10234Syasuko.eckert@amd.com    output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
10234Syasuko.eckert@amd.com    output_data.subthreshold_leakage_power =
10234Syasuko.eckert@amd.com        longer_channel_device ? power.readOp.longer_channel_leakage :
10234Syasuko.eckert@amd.com        power.readOp.leakage;
10234Syasuko.eckert@amd.com    output_data.gate_leakage_power = power.readOp.gate_leakage;
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.comInstructionDecoder::InstructionDecoder(XMLNode* _xml_data, const string _name,
10234Syasuko.eckert@amd.com                                       bool _is_default,
10234Syasuko.eckert@amd.com                                       const InputParameter *configure_interface,
10234Syasuko.eckert@amd.com                                       int opcode_length_, int num_decoders_,
10234Syasuko.eckert@amd.com                                       bool x86_,
10234Syasuko.eckert@amd.com                                       double clockRate_,
10234Syasuko.eckert@amd.com                                       enum Device_ty device_ty_,
10234Syasuko.eckert@amd.com                                       enum Core_type core_ty_)
10234Syasuko.eckert@amd.com    : McPATComponent(_xml_data), is_default(_is_default),
10234Syasuko.eckert@amd.com      opcode_length(opcode_length_), num_decoders(num_decoders_), x86(x86_),
10234Syasuko.eckert@amd.com      device_ty(device_ty_), core_ty(core_ty_) {
10234Syasuko.eckert@amd.com    /*
10234Syasuko.eckert@amd.com     * Instruction decoder is different from n to 2^n decoders
10234Syasuko.eckert@amd.com     * that are commonly used in row decoders in memory arrays.
10234Syasuko.eckert@amd.com     * The RISC instruction decoder is typically a very simple device.
10234Syasuko.eckert@amd.com     * We can decode an instruction by simply
10234Syasuko.eckert@amd.com     * separating the machine word into small parts using wire slices
10234Syasuko.eckert@amd.com     * The RISC instruction decoder can be approximate by the n to 2^n decoders,
10234Syasuko.eckert@amd.com     * although this approximation usually underestimate power since each decoded
10234Syasuko.eckert@amd.com     * instruction normally has more than 1 active signal.
10234Syasuko.eckert@amd.com     *
10234Syasuko.eckert@amd.com     * However, decoding a CISC instruction word is much more difficult
10234Syasuko.eckert@amd.com     * than the RISC case. A CISC decoder is typically set up as a state machine.
10234Syasuko.eckert@amd.com     * The machine reads the opcode field to determine
10234Syasuko.eckert@amd.com     * what type of instruction it is,
10234Syasuko.eckert@amd.com     * and where the other data values are.
10234Syasuko.eckert@amd.com     * The instruction word is read in piece by piece,
10234Syasuko.eckert@amd.com     * and decisions are made at each stage as to
10234Syasuko.eckert@amd.com     * how the remainder of the instruction word will be read.
10234Syasuko.eckert@amd.com     * (sequencer and ROM are usually needed)
10234Syasuko.eckert@amd.com     * An x86 decoder can be even more complex since
10234Syasuko.eckert@amd.com     * it involve  both decoding instructions into u-ops and
10234Syasuko.eckert@amd.com     * merge u-ops when doing micro-ops fusion.
10234Syasuko.eckert@amd.com     */
10234Syasuko.eckert@amd.com    name = _name;
10234Syasuko.eckert@amd.com    clockRate = clockRate_;
10234Syasuko.eckert@amd.com    bool is_dram = false;
10234Syasuko.eckert@amd.com    double pmos_to_nmos_sizing_r;
10234Syasuko.eckert@amd.com    double load_nmos_width, load_pmos_width;
10234Syasuko.eckert@amd.com    double C_driver_load, R_wire_load;
10234Syasuko.eckert@amd.com    Area cell;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    l_ip = *configure_interface;
10234Syasuko.eckert@amd.com    local_result = init_interface(&l_ip, name);
10234Syasuko.eckert@amd.com    cell.h = g_tp.cell_h_def;
10234Syasuko.eckert@amd.com    cell.w = g_tp.cell_h_def;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    num_decoder_segments = (int)ceil(opcode_length / 18.0);
10234Syasuko.eckert@amd.com    if (opcode_length > 18)	opcode_length = 18;
10234Syasuko.eckert@amd.com    num_decoded_signals = (int)pow(2.0, opcode_length);
10234Syasuko.eckert@amd.com    pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
10234Syasuko.eckert@amd.com    load_nmos_width = g_tp.max_w_nmos_ / 2;
10234Syasuko.eckert@amd.com    load_pmos_width = g_tp.max_w_nmos_ * pmos_to_nmos_sizing_r;
10234Syasuko.eckert@amd.com    C_driver_load = 1024 * gate_C(load_nmos_width + load_pmos_width, 0, is_dram);
10234Syasuko.eckert@amd.com    R_wire_load   = 3000 * l_ip.F_sz_um * g_tp.wire_outside_mat.R_per_um;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    final_dec = new Decoder(
10234Syasuko.eckert@amd.com        num_decoded_signals,
10234Syasuko.eckert@amd.com        false,
10234Syasuko.eckert@amd.com        C_driver_load,
10234Syasuko.eckert@amd.com        R_wire_load,
10234Syasuko.eckert@amd.com        false/*is_fa*/,
10234Syasuko.eckert@amd.com        false/*is_dram*/,
10234Syasuko.eckert@amd.com        false/*wl_tr*/, //to use peri device
10234Syasuko.eckert@amd.com        cell);
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    PredecBlk * predec_blk1 = new PredecBlk(
10234Syasuko.eckert@amd.com        num_decoded_signals,
10234Syasuko.eckert@amd.com        final_dec,
10234Syasuko.eckert@amd.com        0,//Assuming predec and dec are back to back
10234Syasuko.eckert@amd.com        0,
10234Syasuko.eckert@amd.com        1,//Each Predec only drives one final dec
10234Syasuko.eckert@amd.com        false/*is_dram*/,
10234Syasuko.eckert@amd.com        true);
10234Syasuko.eckert@amd.com    PredecBlk * predec_blk2 = new PredecBlk(
10234Syasuko.eckert@amd.com        num_decoded_signals,
10234Syasuko.eckert@amd.com        final_dec,
10234Syasuko.eckert@amd.com        0,//Assuming predec and dec are back to back
10234Syasuko.eckert@amd.com        0,
10234Syasuko.eckert@amd.com        1,//Each Predec only drives one final dec
10234Syasuko.eckert@amd.com        false/*is_dram*/,
10234Syasuko.eckert@amd.com        false);
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    PredecBlkDrv * predec_blk_drv1 = new PredecBlkDrv(0, predec_blk1, false);
10234Syasuko.eckert@amd.com    PredecBlkDrv * predec_blk_drv2 = new PredecBlkDrv(0, predec_blk2, false);
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    pre_dec            = new Predec(predec_blk_drv1, predec_blk_drv2);
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    double area_decoder = final_dec->area.get_area() * num_decoded_signals *
10234Syasuko.eckert@amd.com        num_decoder_segments * num_decoders;
10234Syasuko.eckert@amd.com    //double w_decoder    = area_decoder / area.get_h();
10234Syasuko.eckert@amd.com    double area_pre_dec = (predec_blk_drv1->area.get_area() +
10234Syasuko.eckert@amd.com                           predec_blk_drv2->area.get_area() +
10234Syasuko.eckert@amd.com                           predec_blk1->area.get_area() +
10234Syasuko.eckert@amd.com                           predec_blk2->area.get_area()) *
10234Syasuko.eckert@amd.com                          num_decoder_segments * num_decoders;
10234Syasuko.eckert@amd.com    area.set_area(area.get_area() + area_decoder + area_pre_dec);
10234Syasuko.eckert@amd.com    double macro_layout_overhead   = g_tp.macro_layout_overhead;
10234Syasuko.eckert@amd.com    double chip_PR_overhead        = g_tp.chip_layout_overhead;
10234Syasuko.eckert@amd.com    area.set_area(area.get_area()*macro_layout_overhead*chip_PR_overhead);
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    inst_decoder_delay_power();
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    double sckRation = g_tp.sckt_co_eff;
10234Syasuko.eckert@amd.com    power.readOp.dynamic *= sckRation;
10234Syasuko.eckert@amd.com    power.writeOp.dynamic *= sckRation;
10234Syasuko.eckert@amd.com    power.searchOp.dynamic *= sckRation;
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    double long_channel_device_reduction =
10234Syasuko.eckert@amd.com        longer_channel_device_reduction(device_ty, core_ty);
10234Syasuko.eckert@amd.com    power.readOp.longer_channel_leakage	= power.readOp.leakage *
10234Syasuko.eckert@amd.com        long_channel_device_reduction;
10234Syasuko.eckert@amd.com
10234Syasuko.eckert@amd.com    output_data.area = area.get_area() / 1e6;
10234Syasuko.eckert@amd.com    output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
10234Syasuko.eckert@amd.com    output_data.subthreshold_leakage_power = power.readOp.leakage;
10234Syasuko.eckert@amd.com    output_data.gate_leakage_power = power.readOp.gate_leakage;
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.comvoid InstructionDecoder::inst_decoder_delay_power() {
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    double dec_outrisetime;
10234Syasuko.eckert@amd.com    double inrisetime = 0, outrisetime;
10234Syasuko.eckert@amd.com    double pppm_t[4]    = {1, 1, 1, 1};
10234Syasuko.eckert@amd.com    double squencer_passes = x86 ? 2 : 1;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    outrisetime = pre_dec->compute_delays(inrisetime);
10234Syasuko.eckert@amd.com    dec_outrisetime = final_dec->compute_delays(outrisetime);
10234Syasuko.eckert@amd.com    set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments, squencer_passes*num_decoder_segments, num_decoder_segments);
10234Syasuko.eckert@amd.com    power = power + pre_dec->power * pppm_t;
10234Syasuko.eckert@amd.com    set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments*num_decoded_signals,
10234Syasuko.eckert@amd.com             num_decoder_segments*num_decoded_signals, squencer_passes*num_decoder_segments);
10234Syasuko.eckert@amd.com    power = power + final_dec->power * pppm_t;
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.comvoid InstructionDecoder::leakage_feedback(double temperature) {
10152Satgutier@umich.edu  l_ip.temp = (unsigned int)round(temperature/10.0)*10;
10234Syasuko.eckert@amd.com  uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy
10152Satgutier@umich.edu
10152Satgutier@umich.edu  final_dec->leakage_feedback(temperature);
10152Satgutier@umich.edu  pre_dec->leakage_feedback(temperature);
10152Satgutier@umich.edu
10152Satgutier@umich.edu  double pppm_t[4]    = {1,1,1,1};
10152Satgutier@umich.edu  double squencer_passes = x86?2:1;
10152Satgutier@umich.edu
10152Satgutier@umich.edu  set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments, squencer_passes*num_decoder_segments, num_decoder_segments);
10152Satgutier@umich.edu  power = pre_dec->power*pppm_t;
10152Satgutier@umich.edu
10152Satgutier@umich.edu  set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments*num_decoded_signals,num_decoder_segments*num_decoded_signals, squencer_passes*num_decoder_segments);
10152Satgutier@umich.edu  power = power + final_dec->power*pppm_t;
10152Satgutier@umich.edu
10152Satgutier@umich.edu  double sckRation = g_tp.sckt_co_eff;
10152Satgutier@umich.edu
10152Satgutier@umich.edu  power.readOp.dynamic *= sckRation;
10152Satgutier@umich.edu  power.writeOp.dynamic *= sckRation;
10152Satgutier@umich.edu  power.searchOp.dynamic *= sckRation;
10152Satgutier@umich.edu
10152Satgutier@umich.edu  double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
10152Satgutier@umich.edu  power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction;
10152Satgutier@umich.edu}
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.comInstructionDecoder::~InstructionDecoder() {
10234Syasuko.eckert@amd.com    local_result.cleanup();
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    delete final_dec;
10152Satgutier@umich.edu
10234Syasuko.eckert@amd.com    delete pre_dec->blk1;
10234Syasuko.eckert@amd.com    delete pre_dec->blk2;
10234Syasuko.eckert@amd.com    delete pre_dec->drv1;
10234Syasuko.eckert@amd.com    delete pre_dec->drv2;
10234Syasuko.eckert@amd.com    delete pre_dec;
10152Satgutier@umich.edu}