110152Satgutier@umich.edu/***************************************************************************** 210152Satgutier@umich.edu * McPAT 310152Satgutier@umich.edu * SOFTWARE LICENSE AGREEMENT 410152Satgutier@umich.edu * Copyright 2012 Hewlett-Packard Development Company, L.P. 510234Syasuko.eckert@amd.com * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. 610152Satgutier@umich.edu * All Rights Reserved 710152Satgutier@umich.edu * 810152Satgutier@umich.edu * Redistribution and use in source and binary forms, with or without 910152Satgutier@umich.edu * modification, are permitted provided that the following conditions are 1010152Satgutier@umich.edu * met: redistributions of source code must retain the above copyright 1110152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer; 1210152Satgutier@umich.edu * redistributions in binary form must reproduce the above copyright 1310152Satgutier@umich.edu * notice, this list of conditions and the following disclaimer in the 1410152Satgutier@umich.edu * documentation and/or other materials provided with the distribution; 1510152Satgutier@umich.edu * neither the name of the copyright holders nor the names of its 1610152Satgutier@umich.edu * contributors may be used to endorse or promote products derived from 1710152Satgutier@umich.edu * this software without specific prior written permission. 1810152Satgutier@umich.edu 1910152Satgutier@umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2010152Satgutier@umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2110152Satgutier@umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2210152Satgutier@umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2310152Satgutier@umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2410152Satgutier@umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 2510152Satgutier@umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2610152Satgutier@umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2710152Satgutier@umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2810152Satgutier@umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 2910234Syasuko.eckert@amd.com * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3010152Satgutier@umich.edu * 3110152Satgutier@umich.edu ***************************************************************************/ 3210152Satgutier@umich.edu 3310234Syasuko.eckert@amd.com#include "common.h" 3410152Satgutier@umich.edu#include "logic.h" 3510152Satgutier@umich.edu 3610152Satgutier@umich.edu//selection_logic 3710234Syasuko.eckert@amd.comselection_logic::selection_logic(XMLNode* _xml_data, bool _is_default, 3810234Syasuko.eckert@amd.com int _win_entries, int issue_width_, 3910234Syasuko.eckert@amd.com const InputParameter *configure_interface, 4010234Syasuko.eckert@amd.com string _name, double _accesses, 4110234Syasuko.eckert@amd.com double clockRate_, enum Device_ty device_ty_, 4210234Syasuko.eckert@amd.com enum Core_type core_ty_) 4310234Syasuko.eckert@amd.com : McPATComponent(_xml_data), is_default(_is_default), 4410234Syasuko.eckert@amd.com win_entries(_win_entries), 4510234Syasuko.eckert@amd.com issue_width(issue_width_), 4610234Syasuko.eckert@amd.com accesses(_accesses), 4710234Syasuko.eckert@amd.com device_ty(device_ty_), 4810234Syasuko.eckert@amd.com core_ty(core_ty_) { 4910234Syasuko.eckert@amd.com clockRate = clockRate_; 5010234Syasuko.eckert@amd.com name = _name; 5110234Syasuko.eckert@amd.com l_ip = *configure_interface; 5210234Syasuko.eckert@amd.com local_result = init_interface(&l_ip, name); 5310152Satgutier@umich.edu} 5410152Satgutier@umich.edu 5510234Syasuko.eckert@amd.comvoid selection_logic::computeArea() { 5610234Syasuko.eckert@amd.com output_data.area = local_result.area; 5710234Syasuko.eckert@amd.com} 5810234Syasuko.eckert@amd.com 5910234Syasuko.eckert@amd.comvoid selection_logic::computeEnergy() { 6010234Syasuko.eckert@amd.com //based on cost effective superscalar processor TR pp27-31 6110234Syasuko.eckert@amd.com double Ctotal, Cor, Cpencode; 6210234Syasuko.eckert@amd.com int num_arbiter; 6310234Syasuko.eckert@amd.com double WSelORn, WSelORprequ, WSelPn, WSelPp, WSelEnn, WSelEnp; 6410234Syasuko.eckert@amd.com 6510234Syasuko.eckert@amd.com //the 0.8um process data is used. 6610234Syasuko.eckert@amd.com //this was 10 micron for the 0.8 micron process 6710234Syasuko.eckert@amd.com WSelORn = 12.5 * l_ip.F_sz_um; 6810234Syasuko.eckert@amd.com //this was 40 micron for the 0.8 micron process 6910234Syasuko.eckert@amd.com WSelORprequ = 50 * l_ip.F_sz_um; 7010234Syasuko.eckert@amd.com //this was 10mcron for the 0.8 micron process 7110234Syasuko.eckert@amd.com WSelPn = 12.5 * l_ip.F_sz_um; 7210234Syasuko.eckert@amd.com //this was 15 micron for the 0.8 micron process 7310234Syasuko.eckert@amd.com WSelPp = 18.75 * l_ip.F_sz_um; 7410234Syasuko.eckert@amd.com //this was 5 micron for the 0.8 micron process 7510234Syasuko.eckert@amd.com WSelEnn = 6.25 * l_ip.F_sz_um; 7610234Syasuko.eckert@amd.com //this was 10 micron for the 0.8 micron process 7710234Syasuko.eckert@amd.com WSelEnp = 12.5 * l_ip.F_sz_um; 7810234Syasuko.eckert@amd.com 7910234Syasuko.eckert@amd.com Ctotal = 0; 8010234Syasuko.eckert@amd.com num_arbiter = 1; 8110234Syasuko.eckert@amd.com while (win_entries > 4) { 8210234Syasuko.eckert@amd.com win_entries = (int)ceil((double)win_entries / 4.0); 8310234Syasuko.eckert@amd.com num_arbiter += win_entries; 8410234Syasuko.eckert@amd.com } 8510234Syasuko.eckert@amd.com //the 4-input OR logic to generate anyreq 8610234Syasuko.eckert@amd.com Cor = 4 * drain_C_(WSelORn, NCH, 1, 1, g_tp.cell_h_def) + 8710234Syasuko.eckert@amd.com drain_C_(WSelORprequ, PCH, 1, 1, g_tp.cell_h_def); 8810234Syasuko.eckert@amd.com power.readOp.gate_leakage = 8910234Syasuko.eckert@amd.com cmos_Ig_leakage(WSelORn, WSelORprequ, 4, nor) * g_tp.peri_global.Vdd; 9010234Syasuko.eckert@amd.com 9110234Syasuko.eckert@amd.com //The total capacity of the 4-bit priority encoder 9210234Syasuko.eckert@amd.com Cpencode = drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + 9310234Syasuko.eckert@amd.com drain_C_(WSelPp, PCH, 1, 1, g_tp.cell_h_def) + 9410234Syasuko.eckert@amd.com 2 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + 9510234Syasuko.eckert@amd.com drain_C_(WSelPp, PCH, 2, 1, g_tp.cell_h_def) + 9610234Syasuko.eckert@amd.com 3 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + 9710234Syasuko.eckert@amd.com drain_C_(WSelPp, PCH, 3, 1, g_tp.cell_h_def) + 9810234Syasuko.eckert@amd.com 4 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + 9910234Syasuko.eckert@amd.com drain_C_(WSelPp, PCH, 4, 1, g_tp.cell_h_def) +//precompute priority logic 10010234Syasuko.eckert@amd.com 2 * 4 * gate_C(WSelEnn + WSelEnp, 20.0) + 10110234Syasuko.eckert@amd.com 4 * drain_C_(WSelEnn, NCH, 1, 1, g_tp.cell_h_def) + 10210234Syasuko.eckert@amd.com 2 * 4 * drain_C_(WSelEnp, PCH, 1, 1, g_tp.cell_h_def) +//enable logic 10310234Syasuko.eckert@amd.com (2 * 4 + 2 * 3 + 2 * 2 + 2) * 10410234Syasuko.eckert@amd.com gate_C(WSelPn + WSelPp, 10.0);//requests signal 10510234Syasuko.eckert@amd.com 10610234Syasuko.eckert@amd.com Ctotal += issue_width * num_arbiter * (Cor + Cpencode); 10710234Syasuko.eckert@amd.com 10810234Syasuko.eckert@amd.com //2 means the abitration signal need to travel round trip 10910234Syasuko.eckert@amd.com power.readOp.dynamic = 11010234Syasuko.eckert@amd.com Ctotal * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 2; 11110234Syasuko.eckert@amd.com power.readOp.leakage = issue_width * num_arbiter * 11210234Syasuko.eckert@amd.com (cmos_Isub_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p 11310234Syasuko.eckert@amd.com + cmos_Isub_leakage(WSelPn, WSelPp, 3, nor)//grant2p 11410234Syasuko.eckert@amd.com + cmos_Isub_leakage(WSelPn, WSelPp, 4, nor)//grant3p 11510234Syasuko.eckert@amd.com + cmos_Isub_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic 11610234Syasuko.eckert@amd.com + cmos_Isub_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant sIsubnals 11710234Syasuko.eckert@amd.com ) * g_tp.peri_global.Vdd; 11810234Syasuko.eckert@amd.com power.readOp.gate_leakage = issue_width * num_arbiter * 11910234Syasuko.eckert@amd.com (cmos_Ig_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p 12010234Syasuko.eckert@amd.com + cmos_Ig_leakage(WSelPn, WSelPp, 3, nor)//grant2p 12110234Syasuko.eckert@amd.com + cmos_Ig_leakage(WSelPn, WSelPp, 4, nor)//grant3p 12210234Syasuko.eckert@amd.com + cmos_Ig_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic 12310234Syasuko.eckert@amd.com + cmos_Ig_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant signals 12410234Syasuko.eckert@amd.com ) * g_tp.peri_global.Vdd; 12510234Syasuko.eckert@amd.com double sckRation = g_tp.sckt_co_eff; 12610234Syasuko.eckert@amd.com power.readOp.dynamic *= sckRation; 12710234Syasuko.eckert@amd.com power.writeOp.dynamic *= sckRation; 12810234Syasuko.eckert@amd.com power.searchOp.dynamic *= sckRation; 12910234Syasuko.eckert@amd.com 13010234Syasuko.eckert@amd.com double long_channel_device_reduction = 13110234Syasuko.eckert@amd.com longer_channel_device_reduction(device_ty, core_ty); 13210234Syasuko.eckert@amd.com power.readOp.longer_channel_leakage = 13310234Syasuko.eckert@amd.com power.readOp.leakage * long_channel_device_reduction; 13410234Syasuko.eckert@amd.com 13510234Syasuko.eckert@amd.com output_data.peak_dynamic_power = power.readOp.dynamic * clockRate; 13610234Syasuko.eckert@amd.com output_data.subthreshold_leakage_power = power.readOp.leakage; 13710234Syasuko.eckert@amd.com output_data.gate_leakage_power = power.readOp.gate_leakage; 13810234Syasuko.eckert@amd.com output_data.runtime_dynamic_energy = power.readOp.dynamic * accesses; 13910234Syasuko.eckert@amd.com} 14010152Satgutier@umich.edu 14110152Satgutier@umich.edudep_resource_conflict_check::dep_resource_conflict_check( 14210234Syasuko.eckert@amd.com XMLNode* _xml_data, const string _name, 14310234Syasuko.eckert@amd.com const InputParameter *configure_interface, 14410234Syasuko.eckert@amd.com const CoreParameters & dyn_p_, int compare_bits_, 14510234Syasuko.eckert@amd.com double clockRate_, bool _is_default) 14610234Syasuko.eckert@amd.com : McPATComponent(_xml_data), l_ip(*configure_interface), 14710234Syasuko.eckert@amd.com coredynp(dyn_p_), compare_bits(compare_bits_), is_default(_is_default) { 14810152Satgutier@umich.edu 14910234Syasuko.eckert@amd.com name = _name; 15010234Syasuko.eckert@amd.com clockRate = clockRate_; 15110234Syasuko.eckert@amd.com //this was 20.0 micron for the 0.8 micron process 15210234Syasuko.eckert@amd.com Wcompn = 25 * l_ip.F_sz_um; 15310234Syasuko.eckert@amd.com //this was 20.0 micron for the 0.8 micron process 15410234Syasuko.eckert@amd.com Wevalinvp = 25 * l_ip.F_sz_um; 15510234Syasuko.eckert@amd.com //this was 80.0 mcron for the 0.8 micron process 15610234Syasuko.eckert@amd.com Wevalinvn = 100 * l_ip.F_sz_um; 15710234Syasuko.eckert@amd.com //this was 40.0 micron for the 0.8 micron process 15810234Syasuko.eckert@amd.com Wcomppreequ = 50 * l_ip.F_sz_um; 15910234Syasuko.eckert@amd.com //this was 5.4 micron for the 0.8 micron process 16010234Syasuko.eckert@amd.com WNORn = 6.75 * l_ip.F_sz_um; 16110234Syasuko.eckert@amd.com //this was 30.5 micron for the 0.8 micron process 16210234Syasuko.eckert@amd.com WNORp = 38.125 * l_ip.F_sz_um; 16310152Satgutier@umich.edu 16410234Syasuko.eckert@amd.com // To make CACTI happy. 16510234Syasuko.eckert@amd.com l_ip.cache_sz = MIN_BUFFER_SIZE; 16610234Syasuko.eckert@amd.com local_result = init_interface(&l_ip, name); 16710152Satgutier@umich.edu 16810234Syasuko.eckert@amd.com if (coredynp.core_ty == Inorder) 16910234Syasuko.eckert@amd.com //TODO: opcode bits + log(shared resources) + REG TAG BITS --> 17010234Syasuko.eckert@amd.com //opcode comparator 17110234Syasuko.eckert@amd.com compare_bits += 16 + 8 + 8; 17210234Syasuko.eckert@amd.com else 17310234Syasuko.eckert@amd.com compare_bits += 16 + 8 + 8; 17410234Syasuko.eckert@amd.com 17510234Syasuko.eckert@amd.com conflict_check_power(); 17610234Syasuko.eckert@amd.com double sckRation = g_tp.sckt_co_eff; 17710234Syasuko.eckert@amd.com power.readOp.dynamic *= sckRation; 17810234Syasuko.eckert@amd.com power.writeOp.dynamic *= sckRation; 17910234Syasuko.eckert@amd.com power.searchOp.dynamic *= sckRation; 18010152Satgutier@umich.edu 18110152Satgutier@umich.edu} 18210152Satgutier@umich.edu 18310234Syasuko.eckert@amd.comvoid dep_resource_conflict_check::conflict_check_power() { 18410234Syasuko.eckert@amd.com double Ctotal; 18510234Syasuko.eckert@amd.com int num_comparators; 18610234Syasuko.eckert@amd.com //2(N*N-N) is used for source to dest comparison, (N*N-N) is used for 18710234Syasuko.eckert@amd.com //dest to dest comparision. 18810234Syasuko.eckert@amd.com num_comparators = 3 * ((coredynp.decodeW) * (coredynp.decodeW) - 18910234Syasuko.eckert@amd.com coredynp.decodeW); 19010152Satgutier@umich.edu 19110234Syasuko.eckert@amd.com Ctotal = num_comparators * compare_cap(); 19210152Satgutier@umich.edu 19310234Syasuko.eckert@amd.com power.readOp.dynamic = Ctotal * /*CLOCKRATE*/ g_tp.peri_global.Vdd * 19410234Syasuko.eckert@amd.com g_tp.peri_global.Vdd /*AF*/; 19510234Syasuko.eckert@amd.com power.readOp.leakage = num_comparators * compare_bits * 2 * 19610234Syasuko.eckert@amd.com simplified_nmos_leakage(Wcompn, false); 19710152Satgutier@umich.edu 19810234Syasuko.eckert@amd.com double long_channel_device_reduction = 19910234Syasuko.eckert@amd.com longer_channel_device_reduction(Core_device, coredynp.core_ty); 20010234Syasuko.eckert@amd.com power.readOp.longer_channel_leakage = 20110234Syasuko.eckert@amd.com power.readOp.leakage * long_channel_device_reduction; 20210234Syasuko.eckert@amd.com power.readOp.gate_leakage = num_comparators * compare_bits * 2 * 20310234Syasuko.eckert@amd.com cmos_Ig_leakage(Wcompn, 0, 2, nmos); 20410152Satgutier@umich.edu 20510152Satgutier@umich.edu} 20610152Satgutier@umich.edu 20710152Satgutier@umich.edu/* estimate comparator power consumption (this comparator is similar 20810152Satgutier@umich.edu to the tag-match structure in a CAM */ 20910234Syasuko.eckert@amd.comdouble dep_resource_conflict_check::compare_cap() { 21010234Syasuko.eckert@amd.com double c1, c2; 21110152Satgutier@umich.edu 21210234Syasuko.eckert@amd.com //resize the big NOR gate at the DCL according to fan in. 21310234Syasuko.eckert@amd.com WNORp = WNORp * compare_bits / 2.0; 21410234Syasuko.eckert@amd.com /* bottom part of comparator */ 21510234Syasuko.eckert@amd.com c2 = (compare_bits) * (drain_C_(Wcompn, NCH, 1, 1, g_tp.cell_h_def) + 21610234Syasuko.eckert@amd.com drain_C_(Wcompn, NCH, 2, 1, g_tp.cell_h_def)) + 21710234Syasuko.eckert@amd.com drain_C_(Wevalinvp, PCH, 1, 1, g_tp.cell_h_def) + 21810234Syasuko.eckert@amd.com drain_C_(Wevalinvn, NCH, 1, 1, g_tp.cell_h_def); 21910152Satgutier@umich.edu 22010234Syasuko.eckert@amd.com /* top part of comparator */ 22110234Syasuko.eckert@amd.com c1 = (compare_bits) * (drain_C_(Wcompn, NCH, 1, 1, g_tp.cell_h_def) + 22210234Syasuko.eckert@amd.com drain_C_(Wcompn, NCH, 2, 1, g_tp.cell_h_def) + 22310234Syasuko.eckert@amd.com drain_C_(Wcomppreequ, NCH, 1, 1, g_tp.cell_h_def)) + 22410234Syasuko.eckert@amd.com gate_C(WNORn + WNORp, 10.0) + 22510234Syasuko.eckert@amd.com drain_C_(WNORp, NCH, 2, 1, g_tp.cell_h_def) + compare_bits * 22610234Syasuko.eckert@amd.com drain_C_(WNORn, NCH, 2, 1, g_tp.cell_h_def); 22710234Syasuko.eckert@amd.com return(c1 + c2); 22810152Satgutier@umich.edu 22910152Satgutier@umich.edu} 23010152Satgutier@umich.edu 23110152Satgutier@umich.eduvoid dep_resource_conflict_check::leakage_feedback(double temperature) 23210152Satgutier@umich.edu{ 23310152Satgutier@umich.edu l_ip.temp = (unsigned int)round(temperature/10.0)*10; 23410234Syasuko.eckert@amd.com uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy 23510152Satgutier@umich.edu 23610152Satgutier@umich.edu // This is part of conflict_check_power() 23710234Syasuko.eckert@amd.com // 2(N*N-N) is used for source to dest comparison, (N*N-N) is used for dest 23810234Syasuko.eckert@amd.com // to dest comparison. 23910234Syasuko.eckert@amd.com int num_comparators = 3 * ((coredynp.decodeW) * (coredynp.decodeW) - 24010234Syasuko.eckert@amd.com coredynp.decodeW); 24110234Syasuko.eckert@amd.com power.readOp.leakage = num_comparators * compare_bits * 2 * 24210234Syasuko.eckert@amd.com simplified_nmos_leakage(Wcompn, false); 24310152Satgutier@umich.edu 24410234Syasuko.eckert@amd.com double long_channel_device_reduction = 24510234Syasuko.eckert@amd.com longer_channel_device_reduction(Core_device, coredynp.core_ty); 24610234Syasuko.eckert@amd.com power.readOp.longer_channel_leakage = power.readOp.leakage * 24710234Syasuko.eckert@amd.com long_channel_device_reduction; 24810234Syasuko.eckert@amd.com power.readOp.gate_leakage = num_comparators * compare_bits * 2 * 24910234Syasuko.eckert@amd.com cmos_Ig_leakage(Wcompn, 0, 2, nmos); 25010152Satgutier@umich.edu} 25110152Satgutier@umich.edu 25210152Satgutier@umich.edu 25310152Satgutier@umich.eduDFFCell::DFFCell( 25410234Syasuko.eckert@amd.com bool _is_dram, 25510234Syasuko.eckert@amd.com double _WdecNANDn, 25610234Syasuko.eckert@amd.com double _WdecNANDp, 25710234Syasuko.eckert@amd.com double _cell_load, 25810234Syasuko.eckert@amd.com const InputParameter *configure_interface) 25910234Syasuko.eckert@amd.com : is_dram(_is_dram), 26010234Syasuko.eckert@amd.com cell_load(_cell_load), 26110234Syasuko.eckert@amd.com WdecNANDn(_WdecNANDn), 26210234Syasuko.eckert@amd.com WdecNANDp(_WdecNANDp) { //this model is based on the NAND2 based DFF. 26310234Syasuko.eckert@amd.com l_ip = *configure_interface; 26410234Syasuko.eckert@amd.com area.set_area(5 * compute_gate_area(NAND, 2,WdecNANDn,WdecNANDp, 26510234Syasuko.eckert@amd.com g_tp.cell_h_def) 26610234Syasuko.eckert@amd.com + compute_gate_area(NAND, 2,WdecNANDn,WdecNANDn, 26710234Syasuko.eckert@amd.com g_tp.cell_h_def)); 26810152Satgutier@umich.edu 26910152Satgutier@umich.edu 27010152Satgutier@umich.edu} 27110152Satgutier@umich.edu 27210152Satgutier@umich.edu 27310234Syasuko.eckert@amd.comdouble DFFCell::fpfp_node_cap(unsigned int fan_in, unsigned int fan_out) { 27410234Syasuko.eckert@amd.com double Ctotal = 0; 27510152Satgutier@umich.edu 27610234Syasuko.eckert@amd.com /* part 1: drain cap of NAND gate */ 27710234Syasuko.eckert@amd.com Ctotal += drain_C_(WdecNANDn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + fan_in * drain_C_(WdecNANDp, PCH, 1, 1, g_tp.cell_h_def, is_dram); 27810152Satgutier@umich.edu 27910234Syasuko.eckert@amd.com /* part 2: gate cap of NAND gates */ 28010234Syasuko.eckert@amd.com Ctotal += fan_out * gate_C(WdecNANDn + WdecNANDp, 0, is_dram); 28110152Satgutier@umich.edu 28210234Syasuko.eckert@amd.com return Ctotal; 28310152Satgutier@umich.edu} 28410152Satgutier@umich.edu 28510152Satgutier@umich.edu 28610234Syasuko.eckert@amd.comvoid DFFCell::compute_DFF_cell() { 28710234Syasuko.eckert@amd.com double c1, c2, c3, c4, c5, c6; 28810234Syasuko.eckert@amd.com /* node 5 and node 6 are identical to node 1 in capacitance */ 28910234Syasuko.eckert@amd.com c1 = c5 = c6 = fpfp_node_cap(2, 1); 29010234Syasuko.eckert@amd.com c2 = fpfp_node_cap(2, 3); 29110234Syasuko.eckert@amd.com c3 = fpfp_node_cap(3, 2); 29210234Syasuko.eckert@amd.com c4 = fpfp_node_cap(2, 2); 29310152Satgutier@umich.edu 29410234Syasuko.eckert@amd.com //cap-load of the clock signal in each Dff, actually the clock signal only connected to one NAND2 29510234Syasuko.eckert@amd.com clock_cap = 2 * gate_C(WdecNANDn + WdecNANDp, 0, is_dram); 29610234Syasuko.eckert@amd.com e_switch.readOp.dynamic += (c4 + c1 + c2 + c3 + c5 + c6 + 2 * cell_load) * 29710234Syasuko.eckert@amd.com 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;; 29810152Satgutier@umich.edu 29910234Syasuko.eckert@amd.com /* no 1/2 for e_keep and e_clock because clock signal switches twice in one cycle */ 30010234Syasuko.eckert@amd.com e_keep_1.readOp.dynamic += 30110234Syasuko.eckert@amd.com c3 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ; 30210234Syasuko.eckert@amd.com e_keep_0.readOp.dynamic += 30310234Syasuko.eckert@amd.com c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ; 30410234Syasuko.eckert@amd.com e_clock.readOp.dynamic += 30510234Syasuko.eckert@amd.com clock_cap * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;; 30610152Satgutier@umich.edu 30710234Syasuko.eckert@amd.com /* static power */ 30810234Syasuko.eckert@amd.com e_switch.readOp.leakage += 30910234Syasuko.eckert@amd.com (cmos_Isub_leakage(WdecNANDn, WdecNANDp, 2, nand) * 31010234Syasuko.eckert@amd.com 5//5 NAND2 and 1 NAND3 in a DFF 31110234Syasuko.eckert@amd.com + cmos_Isub_leakage(WdecNANDn, WdecNANDn, 3, nand)) * 31210234Syasuko.eckert@amd.com g_tp.peri_global.Vdd; 31310234Syasuko.eckert@amd.com e_switch.readOp.gate_leakage += 31410234Syasuko.eckert@amd.com (cmos_Ig_leakage(WdecNANDn, WdecNANDp, 2, nand) * 31510234Syasuko.eckert@amd.com 5//5 NAND2 and 1 NAND3 in a DFF 31610234Syasuko.eckert@amd.com + cmos_Ig_leakage(WdecNANDn, WdecNANDn, 3, nand)) * 31710234Syasuko.eckert@amd.com g_tp.peri_global.Vdd; 31810152Satgutier@umich.edu} 31910152Satgutier@umich.edu 32010234Syasuko.eckert@amd.comPipeline::Pipeline(XMLNode* _xml_data, 32110234Syasuko.eckert@amd.com const InputParameter *configure_interface, 32210234Syasuko.eckert@amd.com const CoreParameters & dyn_p_, 32310234Syasuko.eckert@amd.com enum Device_ty device_ty_, 32410234Syasuko.eckert@amd.com bool _is_core_pipeline, 32510234Syasuko.eckert@amd.com bool _is_default) 32610234Syasuko.eckert@amd.com : McPATComponent(_xml_data), l_ip(*configure_interface), 32710234Syasuko.eckert@amd.com coredynp(dyn_p_), device_ty(device_ty_), 32810234Syasuko.eckert@amd.com is_core_pipeline(_is_core_pipeline), is_default(_is_default), 32910234Syasuko.eckert@amd.com num_piperegs(0.0) { 33010234Syasuko.eckert@amd.com name = "Pipeline?"; 33110152Satgutier@umich.edu 33210234Syasuko.eckert@amd.com local_result = init_interface(&l_ip, name); 33310234Syasuko.eckert@amd.com if (!coredynp.Embedded) { 33410234Syasuko.eckert@amd.com process_ind = true; 33510234Syasuko.eckert@amd.com } else { 33610234Syasuko.eckert@amd.com process_ind = false; 33710234Syasuko.eckert@amd.com } 33810234Syasuko.eckert@amd.com //this was 20 micron for the 0.8 micron process 33910234Syasuko.eckert@amd.com WNANDn = (process_ind) ? 25 * l_ip.F_sz_um : g_tp.min_w_nmos_ ; 34010234Syasuko.eckert@amd.com //this was 30 micron for the 0.8 micron process 34110234Syasuko.eckert@amd.com WNANDp = (process_ind) ? 37.5 * l_ip.F_sz_um : g_tp.min_w_nmos_ * 34210234Syasuko.eckert@amd.com pmos_to_nmos_sz_ratio(); 34310234Syasuko.eckert@amd.com load_per_pipeline_stage = 2 * gate_C(WNANDn + WNANDp, 0, false); 34410234Syasuko.eckert@amd.com compute(); 34510152Satgutier@umich.edu 34610152Satgutier@umich.edu} 34710152Satgutier@umich.edu 34810234Syasuko.eckert@amd.comvoid Pipeline::compute() { 34910234Syasuko.eckert@amd.com compute_stage_vector(); 35010234Syasuko.eckert@amd.com DFFCell pipe_reg(false, WNANDn, WNANDp, load_per_pipeline_stage, &l_ip); 35110234Syasuko.eckert@amd.com pipe_reg.compute_DFF_cell(); 35210152Satgutier@umich.edu 35310234Syasuko.eckert@amd.com double clock_power_pipereg = num_piperegs * pipe_reg.e_clock.readOp.dynamic; 35410234Syasuko.eckert@amd.com //******************pipeline power: currently, we average all the possibilities of the states of DFFs in the pipeline. A better way to do it is to consider 35510234Syasuko.eckert@amd.com //the harming distance of two consecutive signals, However McPAT does not have plan to do this in near future as it focuses on worst case power. 35610234Syasuko.eckert@amd.com double pipe_reg_power = num_piperegs * 35710234Syasuko.eckert@amd.com (pipe_reg.e_switch.readOp.dynamic + pipe_reg.e_keep_0.readOp.dynamic + 35810234Syasuko.eckert@amd.com pipe_reg.e_keep_1.readOp.dynamic) / 3 + clock_power_pipereg; 35910234Syasuko.eckert@amd.com double pipe_reg_leakage = num_piperegs * pipe_reg.e_switch.readOp.leakage; 36010234Syasuko.eckert@amd.com double pipe_reg_gate_leakage = num_piperegs * 36110234Syasuko.eckert@amd.com pipe_reg.e_switch.readOp.gate_leakage; 36210234Syasuko.eckert@amd.com power.readOp.dynamic += pipe_reg_power; 36310234Syasuko.eckert@amd.com power.readOp.leakage += pipe_reg_leakage; 36410234Syasuko.eckert@amd.com power.readOp.gate_leakage += pipe_reg_gate_leakage; 36510234Syasuko.eckert@amd.com area.set_area(num_piperegs * pipe_reg.area.get_area()); 36610152Satgutier@umich.edu 36710234Syasuko.eckert@amd.com double long_channel_device_reduction = 36810234Syasuko.eckert@amd.com longer_channel_device_reduction(device_ty, coredynp.core_ty); 36910234Syasuko.eckert@amd.com power.readOp.longer_channel_leakage = power.readOp.leakage * 37010234Syasuko.eckert@amd.com long_channel_device_reduction; 37110152Satgutier@umich.edu 37210152Satgutier@umich.edu 37310234Syasuko.eckert@amd.com double sckRation = g_tp.sckt_co_eff; 37410234Syasuko.eckert@amd.com power.readOp.dynamic *= sckRation; 37510234Syasuko.eckert@amd.com power.writeOp.dynamic *= sckRation; 37610234Syasuko.eckert@amd.com power.searchOp.dynamic *= sckRation; 37710234Syasuko.eckert@amd.com double macro_layout_overhead = g_tp.macro_layout_overhead; 37810152Satgutier@umich.edu if (!coredynp.Embedded) 37910234Syasuko.eckert@amd.com area.set_area(area.get_area() * macro_layout_overhead); 38010234Syasuko.eckert@amd.com 38110234Syasuko.eckert@amd.com output_data.area = area.get_area() / 1e6; 38210234Syasuko.eckert@amd.com output_data.peak_dynamic_power = power.readOp.dynamic * clockRate; 38310234Syasuko.eckert@amd.com output_data.subthreshold_leakage_power = power.readOp.leakage; 38410234Syasuko.eckert@amd.com output_data.gate_leakage_power = power.readOp.gate_leakage; 38510234Syasuko.eckert@amd.com output_data.runtime_dynamic_energy = power.readOp.dynamic * total_cycles; 38610152Satgutier@umich.edu} 38710152Satgutier@umich.edu 38810234Syasuko.eckert@amd.comvoid Pipeline::compute_stage_vector() { 38910234Syasuko.eckert@amd.com double num_stages, tot_stage_vector, per_stage_vector; 39010234Syasuko.eckert@amd.com int opcode_length = coredynp.x86 ? 39110234Syasuko.eckert@amd.com coredynp.micro_opcode_length : coredynp.opcode_width; 39210152Satgutier@umich.edu 39310234Syasuko.eckert@amd.com if (!is_core_pipeline) { 39410234Syasuko.eckert@amd.com //The number of pipeline stages are calculated based on the achievable 39510234Syasuko.eckert@amd.com //throughput and required throughput 39610234Syasuko.eckert@amd.com num_piperegs = l_ip.pipeline_stages * l_ip.per_stage_vector; 39710234Syasuko.eckert@amd.com } else { 39810234Syasuko.eckert@amd.com if (coredynp.core_ty == Inorder) { 39910234Syasuko.eckert@amd.com /* assume 6 pipe stages and try to estimate bits per pipe stage */ 40010234Syasuko.eckert@amd.com /* pipe stage 0/IF */ 40110234Syasuko.eckert@amd.com num_piperegs += coredynp.pc_width * 2 * coredynp.num_hthreads; 40210234Syasuko.eckert@amd.com /* pipe stage IF/ID */ 40310234Syasuko.eckert@amd.com num_piperegs += coredynp.fetchW * 40410234Syasuko.eckert@amd.com (coredynp.instruction_length + coredynp.pc_width) * 40510234Syasuko.eckert@amd.com coredynp.num_hthreads; 40610234Syasuko.eckert@amd.com /* pipe stage IF/ThreadSEL */ 40710234Syasuko.eckert@amd.com if (coredynp.multithreaded) { 40810234Syasuko.eckert@amd.com num_piperegs += coredynp.num_hthreads * 40910234Syasuko.eckert@amd.com coredynp.perThreadState; //8 bit thread states 41010234Syasuko.eckert@amd.com } 41110234Syasuko.eckert@amd.com /* pipe stage ID/EXE */ 41210234Syasuko.eckert@amd.com num_piperegs += coredynp.decodeW * 41310234Syasuko.eckert@amd.com (coredynp.instruction_length + coredynp.pc_width + 41410234Syasuko.eckert@amd.com pow(2.0, opcode_length) + 2 * coredynp.int_data_width) * 41510234Syasuko.eckert@amd.com coredynp.num_hthreads; 41610234Syasuko.eckert@amd.com /* pipe stage EXE/MEM */ 41710234Syasuko.eckert@amd.com num_piperegs += coredynp.issueW * 41810234Syasuko.eckert@amd.com (3 * coredynp.arch_ireg_width + pow(2.0, opcode_length) + 8 * 41910234Syasuko.eckert@amd.com 2 * coredynp.int_data_width/*+2*powers (2,reg_length)*/); 42010234Syasuko.eckert@amd.com /* pipe stage MEM/WB the 2^opcode_length means the total decoded signal for the opcode*/ 42110234Syasuko.eckert@amd.com num_piperegs += coredynp.issueW * 42210234Syasuko.eckert@amd.com (2 * coredynp.int_data_width + pow(2.0, opcode_length) + 8 * 42310234Syasuko.eckert@amd.com 2 * coredynp.int_data_width/*+2*powers (2,reg_length)*/); 42410234Syasuko.eckert@amd.com num_stages = 6; 42510234Syasuko.eckert@amd.com } else { 42610234Syasuko.eckert@amd.com /* assume 12 stage pipe stages and try to estimate bits per pipe stage */ 42710234Syasuko.eckert@amd.com /*OOO: Fetch, decode, rename, IssueQ, dispatch, regread, EXE, MEM, WB, CM */ 42810152Satgutier@umich.edu 42910234Syasuko.eckert@amd.com /* pipe stage 0/1F*/ 43010234Syasuko.eckert@amd.com num_piperegs += 43110234Syasuko.eckert@amd.com coredynp.pc_width * 2 * coredynp.num_hthreads ;//PC and Next PC 43210234Syasuko.eckert@amd.com /* pipe stage IF/ID */ 43310234Syasuko.eckert@amd.com num_piperegs += coredynp.fetchW * 43410234Syasuko.eckert@amd.com (coredynp.instruction_length + coredynp.pc_width) * 43510234Syasuko.eckert@amd.com coredynp.num_hthreads;//PC is used to feed branch predictor in ID 43610234Syasuko.eckert@amd.com /* pipe stage 1D/Renaming*/ 43710234Syasuko.eckert@amd.com num_piperegs += coredynp.decodeW * 43810234Syasuko.eckert@amd.com (coredynp.instruction_length + coredynp.pc_width) * 43910234Syasuko.eckert@amd.com coredynp.num_hthreads;//PC is for branch exe in later stage. 44010234Syasuko.eckert@amd.com /* pipe stage Renaming/wire_drive */ 44110234Syasuko.eckert@amd.com num_piperegs += coredynp.decodeW * 44210234Syasuko.eckert@amd.com (coredynp.instruction_length + coredynp.pc_width); 44310234Syasuko.eckert@amd.com /* pipe stage Renaming/IssueQ */ 44410234Syasuko.eckert@amd.com //3*coredynp.phy_ireg_width means 2 sources and 1 dest 44510234Syasuko.eckert@amd.com num_piperegs += coredynp.issueW * 44610234Syasuko.eckert@amd.com (coredynp.instruction_length + coredynp.pc_width + 3 * 44710234Syasuko.eckert@amd.com coredynp.phy_ireg_width) * coredynp.num_hthreads; 44810234Syasuko.eckert@amd.com /* pipe stage IssueQ/Dispatch */ 44910234Syasuko.eckert@amd.com num_piperegs += coredynp.issueW * 45010234Syasuko.eckert@amd.com (coredynp.instruction_length + 3 * coredynp.phy_ireg_width); 45110234Syasuko.eckert@amd.com /* pipe stage Dispatch/EXE */ 45210152Satgutier@umich.edu 45310234Syasuko.eckert@amd.com num_piperegs += coredynp.issueW * 45410234Syasuko.eckert@amd.com (3 * coredynp.phy_ireg_width + coredynp.pc_width + 45510234Syasuko.eckert@amd.com pow(2.0, opcode_length)/*+2*powers (2,reg_length)*/); 45610234Syasuko.eckert@amd.com /* 2^opcode_length means the total decoded signal for the opcode*/ 45710234Syasuko.eckert@amd.com num_piperegs += coredynp.issueW * 45810234Syasuko.eckert@amd.com (2 * coredynp.int_data_width + pow(2.0, opcode_length) 45910234Syasuko.eckert@amd.com /*+2*powers (2,reg_length)*/); 46010234Syasuko.eckert@amd.com /*2 source operands in EXE; Assume 2EXE stages* since we do not really distinguish OP*/ 46110234Syasuko.eckert@amd.com num_piperegs += coredynp.issueW * 46210234Syasuko.eckert@amd.com (2 * coredynp.int_data_width + pow(2.0, opcode_length) 46310234Syasuko.eckert@amd.com /*+2*powers (2,reg_length)*/); 46410234Syasuko.eckert@amd.com /* pipe stage EXE/MEM, data need to be read/write, address*/ 46510234Syasuko.eckert@amd.com //memory Opcode still need to be passed 46610234Syasuko.eckert@amd.com num_piperegs += coredynp.issueW * 46710234Syasuko.eckert@amd.com (coredynp.int_data_width + coredynp.v_address_width + 46810234Syasuko.eckert@amd.com pow(2.0, opcode_length)/*+2*powers (2,reg_length)*/); 46910234Syasuko.eckert@amd.com /* pipe stage MEM/WB; result data, writeback regs */ 47010234Syasuko.eckert@amd.com num_piperegs += coredynp.issueW * 47110234Syasuko.eckert@amd.com (coredynp.int_data_width + coredynp.phy_ireg_width 47210234Syasuko.eckert@amd.com /* powers (2,opcode_length) + 47310234Syasuko.eckert@amd.com (2,opcode_length)+2*powers (2,reg_length)*/); 47410234Syasuko.eckert@amd.com /* pipe stage WB/CM ; result data, regs need to be updated, address for resolve memory ops in ROB's top*/ 47510234Syasuko.eckert@amd.com num_piperegs += coredynp.commitW * 47610234Syasuko.eckert@amd.com (coredynp.int_data_width + coredynp.v_address_width + 47710234Syasuko.eckert@amd.com coredynp.phy_ireg_width 47810234Syasuko.eckert@amd.com /*+ powers (2,opcode_length)*2*powers (2,reg_length)*/) * 47910234Syasuko.eckert@amd.com coredynp.num_hthreads; 48010234Syasuko.eckert@amd.com num_stages = 12; 48110152Satgutier@umich.edu 48210152Satgutier@umich.edu } 48310152Satgutier@umich.edu 48410152Satgutier@umich.edu /* assume 50% extra in control registers and interrupt registers (rule of thumb) */ 48510152Satgutier@umich.edu num_piperegs = num_piperegs * 1.5; 48610234Syasuko.eckert@amd.com tot_stage_vector = num_piperegs; 48710234Syasuko.eckert@amd.com per_stage_vector = tot_stage_vector / num_stages; 48810152Satgutier@umich.edu 48910234Syasuko.eckert@amd.com if (coredynp.core_ty == Inorder) { 49010234Syasuko.eckert@amd.com if (coredynp.pipeline_stages > 6) 49110234Syasuko.eckert@amd.com num_piperegs = per_stage_vector * coredynp.pipeline_stages; 49210234Syasuko.eckert@amd.com } else { //OOO 49310234Syasuko.eckert@amd.com if (coredynp.pipeline_stages > 12) 49410234Syasuko.eckert@amd.com num_piperegs = per_stage_vector * coredynp.pipeline_stages; 49510152Satgutier@umich.edu } 49610234Syasuko.eckert@amd.com } 49710152Satgutier@umich.edu 49810152Satgutier@umich.edu} 49910152Satgutier@umich.edu 50010234Syasuko.eckert@amd.comFunctionalUnit::FunctionalUnit(XMLNode* _xml_data, 50110234Syasuko.eckert@amd.com InputParameter* interface_ip_, 50210234Syasuko.eckert@amd.com const CoreParameters & _core_params, 50310234Syasuko.eckert@amd.com const CoreStatistics & _core_stats, 50410234Syasuko.eckert@amd.com enum FU_type fu_type_) 50510234Syasuko.eckert@amd.com : McPATComponent(_xml_data), 50610234Syasuko.eckert@amd.com interface_ip(*interface_ip_), core_params(_core_params), 50710234Syasuko.eckert@amd.com core_stats(_core_stats), fu_type(fu_type_) { 50810234Syasuko.eckert@amd.com double area_t; 50910234Syasuko.eckert@amd.com double leakage; 51010234Syasuko.eckert@amd.com double gate_leakage; 51110152Satgutier@umich.edu double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); 51210234Syasuko.eckert@amd.com clockRate = core_params.clockRate; 51310152Satgutier@umich.edu 51410234Syasuko.eckert@amd.com uca_org_t result2; 51510234Syasuko.eckert@amd.com // Temp name for the following function call 51610234Syasuko.eckert@amd.com name = "Functional Unit"; 51710234Syasuko.eckert@amd.com 51810234Syasuko.eckert@amd.com result2 = init_interface(&interface_ip, name); 51910234Syasuko.eckert@amd.com 52010234Syasuko.eckert@amd.com if (core_params.Embedded) { 52110234Syasuko.eckert@amd.com if (fu_type == FPU) { 52210234Syasuko.eckert@amd.com num_fu=core_params.num_fpus; 52310152Satgutier@umich.edu //area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 52410152Satgutier@umich.edu area_t = 4.47*1e6*(g_ip->F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2 The base number 52510152Satgutier@umich.edu //4.47 contains both VFP and NEON processing unit, VFP is about 40% and NEON is about 60% 52610152Satgutier@umich.edu if (g_ip->F_sz_nm>90) 52710152Satgutier@umich.edu area_t = 4.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 52810152Satgutier@umich.edu leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W 52910152Satgutier@umich.edu gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W 53010152Satgutier@umich.edu //energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction in FPU usually it can have up to 20 cycles. 53110152Satgutier@umich.edu// base_energy = coredynp.core_ty==Inorder? 0: 89e-3*3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) 53210152Satgutier@umich.edu// base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); 53310152Satgutier@umich.edu base_energy = 0; 53410152Satgutier@umich.edu per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per Hz energy(nJ) 53510152Satgutier@umich.edu //FPU power from Sandia's processor sizing tech report 53610152Satgutier@umich.edu FU_height=(18667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data 53710234Syasuko.eckert@amd.com } else if (fu_type == ALU) { 53810234Syasuko.eckert@amd.com num_fu=core_params.num_alus; 53910152Satgutier@umich.edu area_t = 280*260*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl 54010152Satgutier@umich.edu leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W 54110152Satgutier@umich.edu gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; 54210152Satgutier@umich.edu// base_energy = coredynp.core_ty==Inorder? 0:89e-3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) 54310152Satgutier@umich.edu// base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); 54410152Satgutier@umich.edu base_energy = 0; 54510152Satgutier@umich.edu per_access_energy = 1.15/3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ) 54610152Satgutier@umich.edu FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU 54710152Satgutier@umich.edu 54810234Syasuko.eckert@amd.com } else if (fu_type == MUL) { 54910234Syasuko.eckert@amd.com num_fu=core_params.num_muls; 55010152Satgutier@umich.edu area_t = 280*260*3*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl 55110152Satgutier@umich.edu leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W 55210152Satgutier@umich.edu gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; 55310152Satgutier@umich.edu// base_energy = coredynp.core_ty==Inorder? 0:89e-3*2; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) 55410152Satgutier@umich.edu// base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); 55510152Satgutier@umich.edu base_energy = 0; 55610152Satgutier@umich.edu per_access_energy = 1.15*2/3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch 55710152Satgutier@umich.edu FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data 55810234Syasuko.eckert@amd.com } else { 55910152Satgutier@umich.edu cout<<"Unknown Functional Unit Type"<<endl; 56010152Satgutier@umich.edu exit(0); 56110152Satgutier@umich.edu } 56210152Satgutier@umich.edu per_access_energy *=0.5;//According to ARM data embedded processor has much lower per acc energy 56310234Syasuko.eckert@amd.com } else { 56410234Syasuko.eckert@amd.com if (fu_type == FPU) { 56510234Syasuko.eckert@amd.com name = "Floating Point Unit(s)"; 56610234Syasuko.eckert@amd.com num_fu = core_params.num_fpus; 56710234Syasuko.eckert@amd.com area_t = 8.47 * 1e6 * (g_ip->F_sz_nm * g_ip->F_sz_nm / 90.0 / 56810234Syasuko.eckert@amd.com 90.0);//this is um^2 56910234Syasuko.eckert@amd.com if (g_ip->F_sz_nm > 90) 57010234Syasuko.eckert@amd.com area_t = 8.47 * 1e6 * 57110234Syasuko.eckert@amd.com g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 57210234Syasuko.eckert@amd.com leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W 57310234Syasuko.eckert@amd.com gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W 57410234Syasuko.eckert@amd.com //W The base energy of ALU average numbers from Intel 4G and 57510234Syasuko.eckert@amd.com //773Mhz (Wattch) 57610234Syasuko.eckert@amd.com base_energy = core_params.core_ty == Inorder ? 0 : 89e-3 * 3; 57710234Syasuko.eckert@amd.com base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / 57810234Syasuko.eckert@amd.com 1.2); 57910234Syasuko.eckert@amd.com per_access_energy = 1.15*3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per op energy(nJ) 58010234Syasuko.eckert@amd.com FU_height=(38667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data 58110234Syasuko.eckert@amd.com } else if (fu_type == ALU) { 58210234Syasuko.eckert@amd.com name = "Integer ALU(s)"; 58310234Syasuko.eckert@amd.com num_fu = core_params.num_alus; 58410234Syasuko.eckert@amd.com //this is um^2 ALU + MUl 58510234Syasuko.eckert@amd.com area_t = 280 * 260 * 2 * g_tp.scaling_factor.logic_scaling_co_eff; 58610234Syasuko.eckert@amd.com leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W 58710234Syasuko.eckert@amd.com gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; 58810234Syasuko.eckert@amd.com //W The base energy of ALU average numbers from Intel 4G and 773Mhz 58910234Syasuko.eckert@amd.com //(Wattch) 59010234Syasuko.eckert@amd.com base_energy = core_params.core_ty == Inorder ? 0 : 89e-3; 59110234Syasuko.eckert@amd.com base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / 59210234Syasuko.eckert@amd.com 1.2); 59310234Syasuko.eckert@amd.com per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ) 59410234Syasuko.eckert@amd.com FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU 59510234Syasuko.eckert@amd.com } else if (fu_type == MUL) { 59610234Syasuko.eckert@amd.com name = "Multiply/Divide Unit(s)"; 59710234Syasuko.eckert@amd.com num_fu = core_params.num_muls; 59810234Syasuko.eckert@amd.com //this is um^2 ALU + MUl 59910234Syasuko.eckert@amd.com area_t = 280 * 260 * 2 * 3 * 60010234Syasuko.eckert@amd.com g_tp.scaling_factor.logic_scaling_co_eff; 60110234Syasuko.eckert@amd.com leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W 60210234Syasuko.eckert@amd.com gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; 60310234Syasuko.eckert@amd.com //W The base energy of ALU average numbers from Intel 4G and 773Mhz 60410234Syasuko.eckert@amd.com //(Wattch) 60510234Syasuko.eckert@amd.com base_energy = core_params.core_ty == Inorder ? 0 : 89e-3 * 2; 60610234Syasuko.eckert@amd.com base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / 60710234Syasuko.eckert@amd.com 1.2); 60810234Syasuko.eckert@amd.com per_access_energy = 1.15*2/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch 60910234Syasuko.eckert@amd.com FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data 61010234Syasuko.eckert@amd.com } else { 61110234Syasuko.eckert@amd.com cout << "Unknown Functional Unit Type" << endl; 61210234Syasuko.eckert@amd.com exit(0); 61310152Satgutier@umich.edu } 61410234Syasuko.eckert@amd.com } 61510152Satgutier@umich.edu 61610152Satgutier@umich.edu area.set_area(area_t*num_fu); 61710234Syasuko.eckert@amd.com power.readOp.leakage = leakage * num_fu; 61810234Syasuko.eckert@amd.com power.readOp.gate_leakage = gate_leakage * num_fu; 61910234Syasuko.eckert@amd.com 62010234Syasuko.eckert@amd.com double long_channel_device_reduction = 62110234Syasuko.eckert@amd.com longer_channel_device_reduction(Core_device, core_params.core_ty); 62210234Syasuko.eckert@amd.com power.readOp.longer_channel_leakage = 62310234Syasuko.eckert@amd.com power.readOp.leakage * long_channel_device_reduction; 62410234Syasuko.eckert@amd.com double macro_layout_overhead = g_tp.macro_layout_overhead; 62510234Syasuko.eckert@amd.com area.set_area(area.get_area()*macro_layout_overhead); 62610152Satgutier@umich.edu} 62710152Satgutier@umich.edu 62810234Syasuko.eckert@amd.comvoid FunctionalUnit::computeEnergy() { 62910234Syasuko.eckert@amd.com double pppm_t[4] = {1, 1, 1, 1}; 63010234Syasuko.eckert@amd.com double FU_duty_cycle; 63110234Syasuko.eckert@amd.com double sckRation = g_tp.sckt_co_eff; 63210152Satgutier@umich.edu 63310234Syasuko.eckert@amd.com // TDP power calculation 63410234Syasuko.eckert@amd.com //2 means two source operands needs to be passed for each int instruction. 63510234Syasuko.eckert@amd.com set_pppm(pppm_t, 2, 2, 2, 2); 63610234Syasuko.eckert@amd.com tdp_stats.readAc.access = num_fu; 63710234Syasuko.eckert@amd.com if (fu_type == FPU) { 63810234Syasuko.eckert@amd.com FU_duty_cycle = core_stats.FPU_duty_cycle; 63910234Syasuko.eckert@amd.com } else if (fu_type == ALU) { 64010234Syasuko.eckert@amd.com FU_duty_cycle = core_stats.ALU_duty_cycle; 64110234Syasuko.eckert@amd.com } else if (fu_type == MUL) { 64210234Syasuko.eckert@amd.com FU_duty_cycle = core_stats.MUL_duty_cycle; 64310234Syasuko.eckert@amd.com } 64410152Satgutier@umich.edu 64510234Syasuko.eckert@amd.com power.readOp.dynamic = 64610234Syasuko.eckert@amd.com per_access_energy * tdp_stats.readAc.access + base_energy / clockRate; 64710234Syasuko.eckert@amd.com power.readOp.dynamic *= sckRation * FU_duty_cycle; 64810152Satgutier@umich.edu 64910234Syasuko.eckert@amd.com // Runtime power calculation 65010234Syasuko.eckert@amd.com if (fu_type == FPU) { 65110234Syasuko.eckert@amd.com rtp_stats.readAc.access = core_stats.fpu_accesses; 65210234Syasuko.eckert@amd.com } else if (fu_type == ALU) { 65310234Syasuko.eckert@amd.com rtp_stats.readAc.access = core_stats.ialu_accesses; 65410234Syasuko.eckert@amd.com } else if (fu_type == MUL) { 65510234Syasuko.eckert@amd.com rtp_stats.readAc.access = core_stats.mul_accesses; 65610234Syasuko.eckert@amd.com } 65710152Satgutier@umich.edu 65810234Syasuko.eckert@amd.com rt_power.readOp.dynamic = per_access_energy * rtp_stats.readAc.access + 65910234Syasuko.eckert@amd.com base_energy * execution_time; 66010234Syasuko.eckert@amd.com rt_power.readOp.dynamic *= sckRation; 66110152Satgutier@umich.edu 66210234Syasuko.eckert@amd.com output_data.area = area.get_area() / 1e6; 66310234Syasuko.eckert@amd.com output_data.peak_dynamic_power = power.readOp.dynamic * clockRate; 66410234Syasuko.eckert@amd.com output_data.subthreshold_leakage_power = 66510234Syasuko.eckert@amd.com (longer_channel_device) ? power.readOp.longer_channel_leakage : 66610234Syasuko.eckert@amd.com power.readOp.leakage; 66710234Syasuko.eckert@amd.com output_data.gate_leakage_power = power.readOp.gate_leakage; 66810234Syasuko.eckert@amd.com output_data.runtime_dynamic_energy = rt_power.readOp.dynamic; 66910152Satgutier@umich.edu} 67010152Satgutier@umich.edu 67110152Satgutier@umich.eduvoid FunctionalUnit::leakage_feedback(double temperature) 67210152Satgutier@umich.edu{ 67310152Satgutier@umich.edu // Update the temperature and initialize the global interfaces. 67410152Satgutier@umich.edu interface_ip.temp = (unsigned int)round(temperature/10.0)*10; 67510152Satgutier@umich.edu 67610234Syasuko.eckert@amd.com // init_result is dummy 67710234Syasuko.eckert@amd.com uca_org_t init_result = init_interface(&interface_ip, name); 67810152Satgutier@umich.edu 67910152Satgutier@umich.edu // This is part of FunctionalUnit() 68010152Satgutier@umich.edu double area_t, leakage, gate_leakage; 68110152Satgutier@umich.edu double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); 68210152Satgutier@umich.edu 68310152Satgutier@umich.edu if (fu_type == FPU) 68410152Satgutier@umich.edu { 68510152Satgutier@umich.edu area_t = 4.47*1e6*(g_ip->F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2 The base number 68610152Satgutier@umich.edu if (g_ip->F_sz_nm>90) 68710152Satgutier@umich.edu area_t = 4.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 68810152Satgutier@umich.edu leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W 68910152Satgutier@umich.edu gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W 69010152Satgutier@umich.edu } 69110152Satgutier@umich.edu else if (fu_type == ALU) 69210152Satgutier@umich.edu { 69310152Satgutier@umich.edu area_t = 280*260*2*num_fu*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl 69410152Satgutier@umich.edu leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W 69510152Satgutier@umich.edu gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; 69610152Satgutier@umich.edu } 69710152Satgutier@umich.edu else if (fu_type == MUL) 69810152Satgutier@umich.edu { 69910152Satgutier@umich.edu area_t = 280*260*2*3*num_fu*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl 70010152Satgutier@umich.edu leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W 70110152Satgutier@umich.edu gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; 70210152Satgutier@umich.edu } 70310152Satgutier@umich.edu else 70410152Satgutier@umich.edu { 70510152Satgutier@umich.edu cout<<"Unknown Functional Unit Type"<<endl; 70610152Satgutier@umich.edu exit(1); 70710152Satgutier@umich.edu } 70810152Satgutier@umich.edu 70910152Satgutier@umich.edu power.readOp.leakage = leakage*num_fu; 71010152Satgutier@umich.edu power.readOp.gate_leakage = gate_leakage*num_fu; 71110234Syasuko.eckert@amd.com power.readOp.longer_channel_leakage = 71210234Syasuko.eckert@amd.com longer_channel_device_reduction(Core_device, core_params.core_ty); 71310152Satgutier@umich.edu} 71410152Satgutier@umich.edu 71510234Syasuko.eckert@amd.comUndiffCore::UndiffCore(XMLNode* _xml_data, InputParameter* interface_ip_, 71610234Syasuko.eckert@amd.com const CoreParameters & dyn_p_, 71710234Syasuko.eckert@amd.com bool exist_) 71810234Syasuko.eckert@amd.com : McPATComponent(_xml_data), 71910234Syasuko.eckert@amd.com interface_ip(*interface_ip_), coredynp(dyn_p_), 72010234Syasuko.eckert@amd.com core_ty(coredynp.core_ty), embedded(coredynp.Embedded), 72110234Syasuko.eckert@amd.com pipeline_stage(coredynp.pipeline_stages), 72210234Syasuko.eckert@amd.com num_hthreads(coredynp.num_hthreads), issue_width(coredynp.issueW), 72310234Syasuko.eckert@amd.com exist(exist_) { 72410234Syasuko.eckert@amd.com if (!exist) return; 72510234Syasuko.eckert@amd.com 72610234Syasuko.eckert@amd.com name = "Undifferentiated Core"; 72710234Syasuko.eckert@amd.com clockRate = coredynp.clockRate; 72810234Syasuko.eckert@amd.com 72910234Syasuko.eckert@amd.com double undifferentiated_core = 0; 73010234Syasuko.eckert@amd.com double core_tx_density = 0; 73110234Syasuko.eckert@amd.com double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); 73210152Satgutier@umich.edu double undifferentiated_core_coe; 73310234Syasuko.eckert@amd.com uca_org_t result2; 73410234Syasuko.eckert@amd.com result2 = init_interface(&interface_ip, name); 73510152Satgutier@umich.edu 73610234Syasuko.eckert@amd.com //Compute undifferentiated core area at 90nm. 73710234Syasuko.eckert@amd.com if (embedded == false) { 73810234Syasuko.eckert@amd.com //Based on the results of polynomial/log curve fitting based on undifferentiated core of Niagara, Niagara2, Merom, Penyrn, Prescott, Opteron die measurements 73910234Syasuko.eckert@amd.com if (core_ty == OOO) { 74010234Syasuko.eckert@amd.com undifferentiated_core = (3.57 * log(pipeline_stage) - 1.2643) > 0 ? 74110234Syasuko.eckert@amd.com (3.57 * log(pipeline_stage) - 1.2643) : 0; 74210234Syasuko.eckert@amd.com } else if (core_ty == Inorder) { 74310234Syasuko.eckert@amd.com undifferentiated_core = (-2.19 * log(pipeline_stage) + 6.55) > 0 ? 74410234Syasuko.eckert@amd.com (-2.19 * log(pipeline_stage) + 6.55) : 0; 74510234Syasuko.eckert@amd.com } else { 74610234Syasuko.eckert@amd.com cout << "invalid core type" << endl; 74710234Syasuko.eckert@amd.com exit(0); 74810152Satgutier@umich.edu } 74910234Syasuko.eckert@amd.com undifferentiated_core *= (1 + logtwo(num_hthreads) * 0.0716); 75010234Syasuko.eckert@amd.com } else { 75110234Syasuko.eckert@amd.com //Based on the results in paper "parametrized processor models" Sandia Labs 75210234Syasuko.eckert@amd.com if (opt_for_clk) 75310152Satgutier@umich.edu undifferentiated_core_coe = 0.05; 75410152Satgutier@umich.edu else 75510152Satgutier@umich.edu undifferentiated_core_coe = 0; 75610234Syasuko.eckert@amd.com undifferentiated_core = (0.4109 * pipeline_stage - 0.776) * 75710234Syasuko.eckert@amd.com undifferentiated_core_coe; 75810234Syasuko.eckert@amd.com undifferentiated_core *= (1 + logtwo(num_hthreads) * 0.0426); 75910234Syasuko.eckert@amd.com } 76010152Satgutier@umich.edu 76110234Syasuko.eckert@amd.com undifferentiated_core *= g_tp.scaling_factor.logic_scaling_co_eff * 76210234Syasuko.eckert@amd.com 1e6;//change from mm^2 to um^2 76310234Syasuko.eckert@amd.com core_tx_density = g_tp.scaling_factor.core_tx_density; 76410234Syasuko.eckert@amd.com power.readOp.leakage = undifferentiated_core*(core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W 76510234Syasuko.eckert@amd.com power.readOp.gate_leakage = undifferentiated_core*(core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd; 76610152Satgutier@umich.edu 76710234Syasuko.eckert@amd.com double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty); 76810234Syasuko.eckert@amd.com power.readOp.longer_channel_leakage = 76910234Syasuko.eckert@amd.com power.readOp.leakage * long_channel_device_reduction; 77010234Syasuko.eckert@amd.com area.set_area(undifferentiated_core); 77110152Satgutier@umich.edu 77210234Syasuko.eckert@amd.com scktRatio = g_tp.sckt_co_eff; 77310234Syasuko.eckert@amd.com power.readOp.dynamic *= scktRatio; 77410234Syasuko.eckert@amd.com power.writeOp.dynamic *= scktRatio; 77510234Syasuko.eckert@amd.com power.searchOp.dynamic *= scktRatio; 77610234Syasuko.eckert@amd.com macro_PR_overhead = g_tp.macro_layout_overhead; 77710234Syasuko.eckert@amd.com area.set_area(area.get_area()*macro_PR_overhead); 77810152Satgutier@umich.edu 77910234Syasuko.eckert@amd.com output_data.area = area.get_area() / 1e6; 78010234Syasuko.eckert@amd.com output_data.peak_dynamic_power = power.readOp.dynamic * clockRate; 78110234Syasuko.eckert@amd.com output_data.subthreshold_leakage_power = 78210234Syasuko.eckert@amd.com longer_channel_device ? power.readOp.longer_channel_leakage : 78310234Syasuko.eckert@amd.com power.readOp.leakage; 78410234Syasuko.eckert@amd.com output_data.gate_leakage_power = power.readOp.gate_leakage; 78510152Satgutier@umich.edu} 78610152Satgutier@umich.edu 78710234Syasuko.eckert@amd.comInstructionDecoder::InstructionDecoder(XMLNode* _xml_data, const string _name, 78810234Syasuko.eckert@amd.com bool _is_default, 78910234Syasuko.eckert@amd.com const InputParameter *configure_interface, 79010234Syasuko.eckert@amd.com int opcode_length_, int num_decoders_, 79110234Syasuko.eckert@amd.com bool x86_, 79210234Syasuko.eckert@amd.com double clockRate_, 79310234Syasuko.eckert@amd.com enum Device_ty device_ty_, 79410234Syasuko.eckert@amd.com enum Core_type core_ty_) 79510234Syasuko.eckert@amd.com : McPATComponent(_xml_data), is_default(_is_default), 79610234Syasuko.eckert@amd.com opcode_length(opcode_length_), num_decoders(num_decoders_), x86(x86_), 79710234Syasuko.eckert@amd.com device_ty(device_ty_), core_ty(core_ty_) { 79810234Syasuko.eckert@amd.com /* 79910234Syasuko.eckert@amd.com * Instruction decoder is different from n to 2^n decoders 80010234Syasuko.eckert@amd.com * that are commonly used in row decoders in memory arrays. 80110234Syasuko.eckert@amd.com * The RISC instruction decoder is typically a very simple device. 80210234Syasuko.eckert@amd.com * We can decode an instruction by simply 80310234Syasuko.eckert@amd.com * separating the machine word into small parts using wire slices 80410234Syasuko.eckert@amd.com * The RISC instruction decoder can be approximate by the n to 2^n decoders, 80510234Syasuko.eckert@amd.com * although this approximation usually underestimate power since each decoded 80610234Syasuko.eckert@amd.com * instruction normally has more than 1 active signal. 80710234Syasuko.eckert@amd.com * 80810234Syasuko.eckert@amd.com * However, decoding a CISC instruction word is much more difficult 80910234Syasuko.eckert@amd.com * than the RISC case. A CISC decoder is typically set up as a state machine. 81010234Syasuko.eckert@amd.com * The machine reads the opcode field to determine 81110234Syasuko.eckert@amd.com * what type of instruction it is, 81210234Syasuko.eckert@amd.com * and where the other data values are. 81310234Syasuko.eckert@amd.com * The instruction word is read in piece by piece, 81410234Syasuko.eckert@amd.com * and decisions are made at each stage as to 81510234Syasuko.eckert@amd.com * how the remainder of the instruction word will be read. 81610234Syasuko.eckert@amd.com * (sequencer and ROM are usually needed) 81710234Syasuko.eckert@amd.com * An x86 decoder can be even more complex since 81810234Syasuko.eckert@amd.com * it involve both decoding instructions into u-ops and 81910234Syasuko.eckert@amd.com * merge u-ops when doing micro-ops fusion. 82010234Syasuko.eckert@amd.com */ 82110234Syasuko.eckert@amd.com name = _name; 82210234Syasuko.eckert@amd.com clockRate = clockRate_; 82310234Syasuko.eckert@amd.com bool is_dram = false; 82410234Syasuko.eckert@amd.com double pmos_to_nmos_sizing_r; 82510234Syasuko.eckert@amd.com double load_nmos_width, load_pmos_width; 82610234Syasuko.eckert@amd.com double C_driver_load, R_wire_load; 82710234Syasuko.eckert@amd.com Area cell; 82810152Satgutier@umich.edu 82910234Syasuko.eckert@amd.com l_ip = *configure_interface; 83010234Syasuko.eckert@amd.com local_result = init_interface(&l_ip, name); 83110234Syasuko.eckert@amd.com cell.h = g_tp.cell_h_def; 83210234Syasuko.eckert@amd.com cell.w = g_tp.cell_h_def; 83310152Satgutier@umich.edu 83410234Syasuko.eckert@amd.com num_decoder_segments = (int)ceil(opcode_length / 18.0); 83510234Syasuko.eckert@amd.com if (opcode_length > 18) opcode_length = 18; 83610234Syasuko.eckert@amd.com num_decoded_signals = (int)pow(2.0, opcode_length); 83710234Syasuko.eckert@amd.com pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); 83810234Syasuko.eckert@amd.com load_nmos_width = g_tp.max_w_nmos_ / 2; 83910234Syasuko.eckert@amd.com load_pmos_width = g_tp.max_w_nmos_ * pmos_to_nmos_sizing_r; 84010234Syasuko.eckert@amd.com C_driver_load = 1024 * gate_C(load_nmos_width + load_pmos_width, 0, is_dram); 84110234Syasuko.eckert@amd.com R_wire_load = 3000 * l_ip.F_sz_um * g_tp.wire_outside_mat.R_per_um; 84210152Satgutier@umich.edu 84310234Syasuko.eckert@amd.com final_dec = new Decoder( 84410234Syasuko.eckert@amd.com num_decoded_signals, 84510234Syasuko.eckert@amd.com false, 84610234Syasuko.eckert@amd.com C_driver_load, 84710234Syasuko.eckert@amd.com R_wire_load, 84810234Syasuko.eckert@amd.com false/*is_fa*/, 84910234Syasuko.eckert@amd.com false/*is_dram*/, 85010234Syasuko.eckert@amd.com false/*wl_tr*/, //to use peri device 85110234Syasuko.eckert@amd.com cell); 85210234Syasuko.eckert@amd.com 85310234Syasuko.eckert@amd.com PredecBlk * predec_blk1 = new PredecBlk( 85410234Syasuko.eckert@amd.com num_decoded_signals, 85510234Syasuko.eckert@amd.com final_dec, 85610234Syasuko.eckert@amd.com 0,//Assuming predec and dec are back to back 85710234Syasuko.eckert@amd.com 0, 85810234Syasuko.eckert@amd.com 1,//Each Predec only drives one final dec 85910234Syasuko.eckert@amd.com false/*is_dram*/, 86010234Syasuko.eckert@amd.com true); 86110234Syasuko.eckert@amd.com PredecBlk * predec_blk2 = new PredecBlk( 86210234Syasuko.eckert@amd.com num_decoded_signals, 86310234Syasuko.eckert@amd.com final_dec, 86410234Syasuko.eckert@amd.com 0,//Assuming predec and dec are back to back 86510234Syasuko.eckert@amd.com 0, 86610234Syasuko.eckert@amd.com 1,//Each Predec only drives one final dec 86710234Syasuko.eckert@amd.com false/*is_dram*/, 86810234Syasuko.eckert@amd.com false); 86910234Syasuko.eckert@amd.com 87010234Syasuko.eckert@amd.com PredecBlkDrv * predec_blk_drv1 = new PredecBlkDrv(0, predec_blk1, false); 87110234Syasuko.eckert@amd.com PredecBlkDrv * predec_blk_drv2 = new PredecBlkDrv(0, predec_blk2, false); 87210234Syasuko.eckert@amd.com 87310234Syasuko.eckert@amd.com pre_dec = new Predec(predec_blk_drv1, predec_blk_drv2); 87410234Syasuko.eckert@amd.com 87510234Syasuko.eckert@amd.com double area_decoder = final_dec->area.get_area() * num_decoded_signals * 87610234Syasuko.eckert@amd.com num_decoder_segments * num_decoders; 87710234Syasuko.eckert@amd.com //double w_decoder = area_decoder / area.get_h(); 87810234Syasuko.eckert@amd.com double area_pre_dec = (predec_blk_drv1->area.get_area() + 87910234Syasuko.eckert@amd.com predec_blk_drv2->area.get_area() + 88010234Syasuko.eckert@amd.com predec_blk1->area.get_area() + 88110234Syasuko.eckert@amd.com predec_blk2->area.get_area()) * 88210234Syasuko.eckert@amd.com num_decoder_segments * num_decoders; 88310234Syasuko.eckert@amd.com area.set_area(area.get_area() + area_decoder + area_pre_dec); 88410234Syasuko.eckert@amd.com double macro_layout_overhead = g_tp.macro_layout_overhead; 88510234Syasuko.eckert@amd.com double chip_PR_overhead = g_tp.chip_layout_overhead; 88610234Syasuko.eckert@amd.com area.set_area(area.get_area()*macro_layout_overhead*chip_PR_overhead); 88710234Syasuko.eckert@amd.com 88810234Syasuko.eckert@amd.com inst_decoder_delay_power(); 88910234Syasuko.eckert@amd.com 89010234Syasuko.eckert@amd.com double sckRation = g_tp.sckt_co_eff; 89110234Syasuko.eckert@amd.com power.readOp.dynamic *= sckRation; 89210234Syasuko.eckert@amd.com power.writeOp.dynamic *= sckRation; 89310234Syasuko.eckert@amd.com power.searchOp.dynamic *= sckRation; 89410234Syasuko.eckert@amd.com 89510234Syasuko.eckert@amd.com double long_channel_device_reduction = 89610234Syasuko.eckert@amd.com longer_channel_device_reduction(device_ty, core_ty); 89710234Syasuko.eckert@amd.com power.readOp.longer_channel_leakage = power.readOp.leakage * 89810234Syasuko.eckert@amd.com long_channel_device_reduction; 89910234Syasuko.eckert@amd.com 90010234Syasuko.eckert@amd.com output_data.area = area.get_area() / 1e6; 90110234Syasuko.eckert@amd.com output_data.peak_dynamic_power = power.readOp.dynamic * clockRate; 90210234Syasuko.eckert@amd.com output_data.subthreshold_leakage_power = power.readOp.leakage; 90310234Syasuko.eckert@amd.com output_data.gate_leakage_power = power.readOp.gate_leakage; 90410152Satgutier@umich.edu} 90510152Satgutier@umich.edu 90610234Syasuko.eckert@amd.comvoid InstructionDecoder::inst_decoder_delay_power() { 90710152Satgutier@umich.edu 90810234Syasuko.eckert@amd.com double dec_outrisetime; 90910234Syasuko.eckert@amd.com double inrisetime = 0, outrisetime; 91010234Syasuko.eckert@amd.com double pppm_t[4] = {1, 1, 1, 1}; 91110234Syasuko.eckert@amd.com double squencer_passes = x86 ? 2 : 1; 91210152Satgutier@umich.edu 91310234Syasuko.eckert@amd.com outrisetime = pre_dec->compute_delays(inrisetime); 91410234Syasuko.eckert@amd.com dec_outrisetime = final_dec->compute_delays(outrisetime); 91510234Syasuko.eckert@amd.com set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments, squencer_passes*num_decoder_segments, num_decoder_segments); 91610234Syasuko.eckert@amd.com power = power + pre_dec->power * pppm_t; 91710234Syasuko.eckert@amd.com set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments*num_decoded_signals, 91810234Syasuko.eckert@amd.com num_decoder_segments*num_decoded_signals, squencer_passes*num_decoder_segments); 91910234Syasuko.eckert@amd.com power = power + final_dec->power * pppm_t; 92010152Satgutier@umich.edu} 92110152Satgutier@umich.edu 92210234Syasuko.eckert@amd.comvoid InstructionDecoder::leakage_feedback(double temperature) { 92310152Satgutier@umich.edu l_ip.temp = (unsigned int)round(temperature/10.0)*10; 92410234Syasuko.eckert@amd.com uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy 92510152Satgutier@umich.edu 92610152Satgutier@umich.edu final_dec->leakage_feedback(temperature); 92710152Satgutier@umich.edu pre_dec->leakage_feedback(temperature); 92810152Satgutier@umich.edu 92910152Satgutier@umich.edu double pppm_t[4] = {1,1,1,1}; 93010152Satgutier@umich.edu double squencer_passes = x86?2:1; 93110152Satgutier@umich.edu 93210152Satgutier@umich.edu set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments, squencer_passes*num_decoder_segments, num_decoder_segments); 93310152Satgutier@umich.edu power = pre_dec->power*pppm_t; 93410152Satgutier@umich.edu 93510152Satgutier@umich.edu set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments*num_decoded_signals,num_decoder_segments*num_decoded_signals, squencer_passes*num_decoder_segments); 93610152Satgutier@umich.edu power = power + final_dec->power*pppm_t; 93710152Satgutier@umich.edu 93810152Satgutier@umich.edu double sckRation = g_tp.sckt_co_eff; 93910152Satgutier@umich.edu 94010152Satgutier@umich.edu power.readOp.dynamic *= sckRation; 94110152Satgutier@umich.edu power.writeOp.dynamic *= sckRation; 94210152Satgutier@umich.edu power.searchOp.dynamic *= sckRation; 94310152Satgutier@umich.edu 94410152Satgutier@umich.edu double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); 94510152Satgutier@umich.edu power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; 94610152Satgutier@umich.edu} 94710152Satgutier@umich.edu 94810234Syasuko.eckert@amd.comInstructionDecoder::~InstructionDecoder() { 94910234Syasuko.eckert@amd.com local_result.cleanup(); 95010152Satgutier@umich.edu 95110234Syasuko.eckert@amd.com delete final_dec; 95210152Satgutier@umich.edu 95310234Syasuko.eckert@amd.com delete pre_dec->blk1; 95410234Syasuko.eckert@amd.com delete pre_dec->blk2; 95510234Syasuko.eckert@amd.com delete pre_dec->drv1; 95610234Syasuko.eckert@amd.com delete pre_dec->drv2; 95710234Syasuko.eckert@amd.com delete pre_dec; 95810152Satgutier@umich.edu} 959