htree2.cc (10152:52c552138ba1) | htree2.cc (10234:5cb711fa6176) |
---|---|
1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. | 1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. |
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. |
|
5 * All Rights Reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer; 11 * redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the --- 7 unchanged lines hidden (view full) --- 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 6 * All Rights Reserved 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are 10 * met: redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer; 12 * redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the --- 7 unchanged lines hidden (view full) --- 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” | 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 * 30 ***************************************************************************/ 31 32 33 34#include <cassert> 35#include <iostream> 36 37#include "htree2.h" 38#include "wire.h" 39 40Htree2::Htree2( 41 enum Wire_type wire_model, double mat_w, double mat_h, | 30 * 31 ***************************************************************************/ 32 33 34 35#include <cassert> 36#include <iostream> 37 38#include "htree2.h" 39#include "wire.h" 40 41Htree2::Htree2( 42 enum Wire_type wire_model, double mat_w, double mat_h, |
42 int a_bits, int d_inbits, int search_data_in, int d_outbits, int search_data_out, int bl, int wl, enum Htree_type htree_type, | 43 int a_bits, int d_inbits, int search_data_in, int d_outbits, 44 int search_data_out, int bl, int wl, enum Htree_type htree_type, |
43 bool uca_tree_, bool search_tree_, TechnologyParameter::DeviceType *dt) | 45 bool uca_tree_, bool search_tree_, TechnologyParameter::DeviceType *dt) |
44 :in_rise_time(0), out_rise_time(0), 45 tree_type(htree_type), mat_width(mat_w), mat_height(mat_h), 46 add_bits(a_bits), data_in_bits(d_inbits), search_data_in_bits(search_data_in),data_out_bits(d_outbits), 47 search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl), 48 uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), deviceType(dt) 49{ 50 assert(ndbl >= 2 && ndwl >= 2); | 46 : in_rise_time(0), out_rise_time(0), 47 tree_type(htree_type), mat_width(mat_w), mat_height(mat_h), 48 add_bits(a_bits), data_in_bits(d_inbits), 49 search_data_in_bits(search_data_in), data_out_bits(d_outbits), 50 search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl), 51 uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), 52 deviceType(dt) { 53 assert(ndbl >= 2 && ndwl >= 2); |
51 52// if (ndbl == 1 && ndwl == 1) 53// { 54// delay = 0; 55// power.readOp.dynamic = 0; 56// power.readOp.leakage = 0; 57// area.w = mat_w; 58// area.h = mat_h; 59// return; 60// } 61// if (ndwl == 1) ndwl++; 62// if (ndbl == 1) ndbl++; 63 | 54 55// if (ndbl == 1 && ndwl == 1) 56// { 57// delay = 0; 58// power.readOp.dynamic = 0; 59// power.readOp.leakage = 0; 60// area.w = mat_w; 61// area.h = mat_h; 62// return; 63// } 64// if (ndwl == 1) ndwl++; 65// if (ndbl == 1) ndbl++; 66 |
64 max_unpipelined_link_delay = 0; //TODO 65 min_w_nmos = g_tp.min_w_nmos_; 66 min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos; | 67 max_unpipelined_link_delay = 0; //TODO 68 min_w_nmos = g_tp.min_w_nmos_; 69 min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos; |
67 | 70 |
68 switch (htree_type) 69 { | 71 switch (htree_type) { |
70 case Add_htree: | 72 case Add_htree: |
71 wire_bw = init_wire_bw = add_bits; 72 in_htree(); 73 break; | 73 wire_bw = init_wire_bw = add_bits; 74 in_htree(); 75 break; |
74 case Data_in_htree: | 76 case Data_in_htree: |
75 wire_bw = init_wire_bw = data_in_bits; 76 in_htree(); 77 break; | 77 wire_bw = init_wire_bw = data_in_bits; 78 in_htree(); 79 break; |
78 case Data_out_htree: | 80 case Data_out_htree: |
79 wire_bw = init_wire_bw = data_out_bits; 80 out_htree(); 81 break; | 81 wire_bw = init_wire_bw = data_out_bits; 82 out_htree(); 83 break; |
82 case Search_in_htree: | 84 case Search_in_htree: |
83 wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not. 84 in_htree(); 85 break; | 85 wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not. 86 in_htree(); 87 break; |
86 case Search_out_htree: | 88 case Search_out_htree: |
87 wire_bw = init_wire_bw = search_data_out_bits; 88 out_htree(); 89 break; | 89 wire_bw = init_wire_bw = search_data_out_bits; 90 out_htree(); 91 break; |
90 default: | 92 default: |
91 assert(0); 92 break; 93 } | 93 assert(0); 94 break; 95 } |
94 | 96 |
95 power_bit = power; 96 power.readOp.dynamic *= init_wire_bw; | 97 power_bit = power; 98 power.readOp.dynamic *= init_wire_bw; |
97 | 99 |
98 assert(power.readOp.dynamic >= 0); 99 assert(power.readOp.leakage >= 0); | 100 assert(power.readOp.dynamic >= 0); 101 assert(power.readOp.leakage >= 0); |
100} 101 102 103 104// nand gate sizing calculation | 102} 103 104 105 106// nand gate sizing calculation |
105void Htree2::input_nand(double s1, double s2, double l_eff) 106{ 107 Wire w1(wt, l_eff); 108 double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; 109 // input capacitance of a repeater = input capacitance of nand. 110 double nsize = s1*(1 + pton_size)/(2 + pton_size); 111 nsize = (nsize < 1) ? 1 : nsize; | 107void Htree2::input_nand(double s1, double s2, double l_eff) { 108 Wire w1(wt, l_eff); 109 double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; 110 // input capacitance of a repeater = input capacitance of nand. 111 double nsize = s1 * (1 + pton_size) / (2 + pton_size); 112 nsize = (nsize < 1) ? 1 : nsize; |
112 | 113 |
113 double tc = 2*tr_R_on(nsize*min_w_nmos, NCH, 1) * 114 (drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)*2 + 115 2 * gate_C(s2*(min_w_nmos + min_w_pmos), 0)); 116 delay+= horowitz (w1.out_rise_time, tc, 117 deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); 118 power.readOp.dynamic += 0.5 * 119 (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 120 + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) 121 + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) * 122 deviceType->Vdd * deviceType->Vdd; | 114 double tc = 2 * tr_R_on(nsize * min_w_nmos, NCH, 1) * 115 (drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) * 2 + 116 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)); 117 delay += horowitz(w1.out_rise_time, tc, 118 deviceType->Vth / deviceType->Vdd, deviceType->Vth / 119 deviceType->Vdd, RISE); 120 power.readOp.dynamic += 0.5 * 121 (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 122 + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) 123 + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) * 124 deviceType->Vdd * deviceType->Vdd; |
123 124 power.searchOp.dynamic += 0.5 * | 125 126 power.searchOp.dynamic += 0.5 * |
125 (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 126 + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) 127 + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) * 128 deviceType->Vdd * deviceType->Vdd * wire_bw ; 129 power.readOp.leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd; 130 power.readOp.gate_leakage += (wire_bw*cmos_Ig_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd; | 127 (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 128 + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) 129 + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) * 130 deviceType->Vdd * deviceType->Vdd * wire_bw ; 131 power.readOp.leakage += (wire_bw * 132 cmos_Isub_leakage(min_w_nmos * (nsize * 2), 133 min_w_pmos * nsize * 2, 2, 134 nand)) * deviceType->Vdd; 135 power.readOp.gate_leakage += (wire_bw * 136 cmos_Ig_leakage(min_w_nmos * (nsize * 2), 137 min_w_pmos * nsize * 2, 2, 138 nand)) * deviceType->Vdd; |
131} 132 133 134 135// tristate buffer model consisting of not, nand, nor, and driver transistors | 139} 140 141 142 143// tristate buffer model consisting of not, nand, nor, and driver transistors |
136void Htree2::output_buffer(double s1, double s2, double l_eff) 137{ 138 Wire w1(wt, l_eff); 139 double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; 140 // input capacitance of repeater = input capacitance of nand + nor. 141 double size = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size); 142 double s_eff = //stage eff of a repeater in a wire 143 (gate_C(s2*(min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff*1e-6,true))/ 144 gate_C(s2*(min_w_nmos + min_w_pmos), 0); 145 double tr_size = gate_C(s1*(min_w_nmos + min_w_pmos), 0) * 1/2/(s_eff*gate_C(min_w_pmos, 0)); 146 size = (size < 1) ? 1 : size; | 144void Htree2::output_buffer(double s1, double s2, double l_eff) { 145 Wire w1(wt, l_eff); 146 double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; 147 // input capacitance of repeater = input capacitance of nand + nor. 148 double size = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size); 149 double s_eff = //stage eff of a repeater in a wire 150 (gate_C(s2 * (min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff * 1e-6, 151 true)) / 152 gate_C(s2 * (min_w_nmos + min_w_pmos), 0); 153 double tr_size = gate_C(s1 * (min_w_nmos + min_w_pmos), 0) * 1 / 2 / 154 (s_eff * gate_C(min_w_pmos, 0)); 155 size = (size < 1) ? 1 : size; |
147 | 156 |
148 double res_nor = 2*tr_R_on(size*min_w_pmos, PCH, 1); 149 double res_ptrans = tr_R_on(tr_size*min_w_nmos, NCH, 1); 150 double cap_nand_out = drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + 151 drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + 152 gate_C(tr_size*min_w_pmos, 0); 153 double cap_ptrans_out = 2 *(drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + 154 drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) + 155 gate_C(s1*(min_w_nmos + min_w_pmos), 0); | 157 double res_nor = 2 * tr_R_on(size * min_w_pmos, PCH, 1); 158 double res_ptrans = tr_R_on(tr_size * min_w_nmos, NCH, 1); 159 double cap_nand_out = 160 drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + 161 drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 + 162 gate_C(tr_size * min_w_pmos, 0); 163 double cap_ptrans_out = 2 * 164 (drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + 165 drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) + 166 gate_C(s1 * (min_w_nmos + min_w_pmos), 0); |
156 | 167 |
157 double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out; | 168 double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out; |
158 159 | 169 170 |
160 delay += horowitz (w1.out_rise_time, tc, 161 deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); | 171 delay += horowitz(w1.out_rise_time, tc, 172 deviceType->Vth / deviceType->Vdd, deviceType->Vth / 173 deviceType->Vdd, RISE); |
162 | 174 |
163 //nand 164 power.readOp.dynamic += 0.5 * 165 (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + 166 drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + 167 gate_C(tr_size*(min_w_pmos), 0)) * 168 deviceType->Vdd * deviceType->Vdd; | 175 //nand 176 power.readOp.dynamic += 0.5 * 177 (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + 178 drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + 179 gate_C(tr_size * (min_w_pmos), 0)) * 180 deviceType->Vdd * deviceType->Vdd; |
169 170 power.searchOp.dynamic += 0.5 * | 181 182 power.searchOp.dynamic += 0.5 * |
171 (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + 172 drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + 173 gate_C(tr_size*(min_w_pmos), 0)) * 174 deviceType->Vdd * deviceType->Vdd*init_wire_bw; | 183 (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + 184 drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + 185 gate_C(tr_size * (min_w_pmos), 0)) * 186 deviceType->Vdd * deviceType->Vdd * init_wire_bw; |
175 | 187 |
176 //not 177 power.readOp.dynamic += 0.5 * 178 (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 179 +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) 180 +gate_C(size*(min_w_nmos + min_w_pmos), 0)) * 181 deviceType->Vdd * deviceType->Vdd; | 188 //not 189 power.readOp.dynamic += 0.5 * 190 (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 191 + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) 192 + gate_C(size * (min_w_nmos + min_w_pmos), 0)) * 193 deviceType->Vdd * deviceType->Vdd; |
182 183 power.searchOp.dynamic += 0.5 * | 194 195 power.searchOp.dynamic += 0.5 * |
184 (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 185 +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) 186 +gate_C(size*(min_w_nmos + min_w_pmos), 0)) * 187 deviceType->Vdd * deviceType->Vdd*init_wire_bw; | 196 (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 197 + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) 198 + gate_C(size * (min_w_nmos + min_w_pmos), 0)) * 199 deviceType->Vdd * deviceType->Vdd * init_wire_bw; |
188 | 200 |
189 //nor 190 power.readOp.dynamic += 0.5 * 191 (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 192 + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) 193 +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) * 194 deviceType->Vdd * deviceType->Vdd; | 201 //nor 202 power.readOp.dynamic += 0.5 * 203 (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 204 + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) 205 + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) * 206 deviceType->Vdd * deviceType->Vdd; |
195 196 power.searchOp.dynamic += 0.5 * | 207 208 power.searchOp.dynamic += 0.5 * |
197 (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 198 + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) 199 +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) * 200 deviceType->Vdd * deviceType->Vdd*init_wire_bw; | 209 (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 210 + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) 211 + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) * 212 deviceType->Vdd * deviceType->Vdd * init_wire_bw; |
201 | 213 |
202 //output transistor 203 power.readOp.dynamic += 0.5 * 204 ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 205 +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2 206 + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) * 207 deviceType->Vdd * deviceType->Vdd; | 214 //output transistor 215 power.readOp.dynamic += 0.5 * 216 ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 217 + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * 2 218 + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) * 219 deviceType->Vdd * deviceType->Vdd; |
208 209 power.searchOp.dynamic += 0.5 * | 220 221 power.searchOp.dynamic += 0.5 * |
210 ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 211 +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2 212 + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) * 213 deviceType->Vdd * deviceType->Vdd*init_wire_bw; | 222 ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) 223 + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * 2 224 + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) * 225 deviceType->Vdd * deviceType->Vdd * init_wire_bw; |
214 | 226 |
215 if(uca_tree) { 216 power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ 217 power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand 218 power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor | 227 if (uca_tree) { 228 power.readOp.leakage += 229 cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 230 2, 1, inv) * 231 deviceType->Vdd * wire_bw;/*inverter + output tr*/ 232 power.readOp.leakage += 233 cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, 234 nand) * deviceType->Vdd * wire_bw;//nand 235 power.readOp.leakage += 236 cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, 237 nor) * deviceType->Vdd * wire_bw;//nor |
219 | 238 |
220 power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ 221 power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand 222 power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor 223 //power.readOp.gate_leakage *=; 224 } 225 else { 226 power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ 227 power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand 228 power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor | 239 power.readOp.gate_leakage += 240 cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 241 1, inv) * 242 deviceType->Vdd * wire_bw;/*inverter + output tr*/ 243 power.readOp.gate_leakage += 244 cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, 245 nand) * deviceType->Vdd * wire_bw;//nand 246 power.readOp.gate_leakage += 247 cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, 248 nor) * deviceType->Vdd * wire_bw;//nor 249 } else { 250 power.readOp.leakage += 251 cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 252 2, 1, inv) * 253 deviceType->Vdd * wire_bw;/*inverter + output tr*/ 254 power.readOp.leakage += 255 cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, 256 nand) * deviceType->Vdd * wire_bw;//nand 257 power.readOp.leakage += 258 cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, 259 nor) * deviceType->Vdd * wire_bw;//nor |
229 | 260 |
230 power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ 231 power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand 232 power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor 233 //power.readOp.gate_leakage *=deviceType->Vdd*wire_bw; 234 } | 261 power.readOp.gate_leakage += 262 cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 263 1, inv) * 264 deviceType->Vdd * wire_bw;/*inverter + output tr*/ 265 power.readOp.gate_leakage += 266 cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, 267 nand) * deviceType->Vdd * wire_bw;//nand 268 power.readOp.gate_leakage += 269 cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, 270 nor) * deviceType->Vdd * wire_bw;//nor 271 } |
235} 236 237 238 239/* calculates the input h-tree delay/power 240 * A nand gate is used at each node to 241 * limit the signal 242 * The area of an unbalanced htree (rows != columns) 243 * depends on how data is traversed. 244 * In the following function, if ( no. of rows < no. of columns), 245 * then data first traverse in excess hor. links until vertical 246 * and horizontal nodes are same. 247 * If no. of rows is bigger, then data traverse in 248 * a hor. link followed by a ver. link in a repeated 249 * fashion (similar to a balanced tree) until there are no 250 * hor. links left. After this it goes through the remaining vertical 251 * links. 252 */ | 272} 273 274 275 276/* calculates the input h-tree delay/power 277 * A nand gate is used at each node to 278 * limit the signal 279 * The area of an unbalanced htree (rows != columns) 280 * depends on how data is traversed. 281 * In the following function, if ( no. of rows < no. of columns), 282 * then data first traverse in excess hor. links until vertical 283 * and horizontal nodes are same. 284 * If no. of rows is bigger, then data traverse in 285 * a hor. link followed by a ver. link in a repeated 286 * fashion (similar to a balanced tree) until there are no 287 * hor. links left. After this it goes through the remaining vertical 288 * links. 289 */ |
253 void 254Htree2::in_htree() 255{ 256 //temp var 257 double s1 = 0, s2 = 0, s3 = 0; 258 double l_eff = 0; 259 Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; 260 double len = 0, ht = 0; 261 int option = 0; | 290void 291Htree2::in_htree() { 292 //temp var 293 double s1 = 0, s2 = 0, s3 = 0; 294 double l_eff = 0; 295 Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; 296 double len = 0, ht = 0; 297 int option = 0; |
262 | 298 |
263 int h = (int) _log2(ndwl/2); // horizontal nodes 264 int v = (int) _log2(ndbl/2); // vertical nodes 265 double len_temp; 266 double ht_temp; 267 if (uca_tree) 268 {//Sheng: this computation do not consider the wires that route from edge to middle. 269 ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */ 270 ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * 271 2 * (1-pow(0.5,h))))/2; 272 len_temp = (mat_width*ndwl/2 + 273 ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * 274 2 * (1-pow(0.5,v))))/2; 275 } 276 else 277 { 278 if (ndwl == ndbl) { 279 ht_temp = ((mat_height*ndbl/2) + 280 ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndbl/2-1) * g_tp.wire_outside_mat.pitch) + 281 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) 282 )/2; 283 len_temp = (mat_width*ndwl/2 + 284 ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + 285 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; | 299 int h = (int) _log2(ndwl / 2); // horizontal nodes 300 int v = (int) _log2(ndbl / 2); // vertical nodes 301 double len_temp; 302 double ht_temp; 303 if (uca_tree) { 304 //Sheng: this computation do not consider the wires that route from 305 //edge to middle. 306 ht_temp = (mat_height * ndbl / 2 + 307 /* since uca_tree models interbank tree, 308 mat_height => bank height */ 309 ((add_bits + data_in_bits + data_out_bits + 310 (search_data_in_bits + search_data_out_bits)) * 311 g_tp.wire_outside_mat.pitch * 312 2 * (1 - pow(0.5, h)))) / 2; 313 len_temp = (mat_width * ndwl / 2 + 314 ((add_bits + data_in_bits + data_out_bits + 315 (search_data_in_bits + search_data_out_bits)) * 316 g_tp.wire_outside_mat.pitch * 317 2 * (1 - pow(0.5, v)))) / 2; 318 } else { 319 if (ndwl == ndbl) { 320 ht_temp = ((mat_height * ndbl / 2) + 321 ((add_bits + (search_data_in_bits + 322 search_data_out_bits)) * (ndbl / 2 - 1) * 323 g_tp.wire_outside_mat.pitch) + 324 ((data_in_bits + data_out_bits) * 325 g_tp.wire_outside_mat.pitch * h) 326 ) / 2; 327 len_temp = (mat_width * ndwl / 2 + 328 ((add_bits + (search_data_in_bits + 329 search_data_out_bits)) * (ndwl / 2 - 1) * 330 g_tp.wire_outside_mat.pitch) + 331 ((data_in_bits + data_out_bits) * 332 g_tp.wire_outside_mat.pitch * v)) / 2; 333 } else if (ndwl > ndbl) { 334 double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2)); 335 ht_temp = ((mat_height * ndbl / 2) + 336 ((add_bits + + (search_data_in_bits + 337 search_data_out_bits)) * 338 ((ndbl / 2 - 1) + excess_part) * 339 g_tp.wire_outside_mat.pitch) + 340 (data_in_bits + data_out_bits) * 341 g_tp.wire_outside_mat.pitch * 342 (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2; 343 len_temp = (mat_width * ndwl / 2 + 344 ((add_bits + 345 (search_data_in_bits + search_data_out_bits)) * 346 (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + 347 ((data_in_bits + data_out_bits) * 348 g_tp.wire_outside_mat.pitch * v)) / 2; 349 } else { 350 double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2)); 351 ht_temp = ((mat_height * ndbl / 2) + 352 ((add_bits + 353 (search_data_in_bits + search_data_out_bits)) * 354 ((ndwl / 2 - 1) + excess_part) * 355 g_tp.wire_outside_mat.pitch) + 356 ((data_in_bits + data_out_bits) * 357 g_tp.wire_outside_mat.pitch * h) 358 ) / 2; 359 len_temp = (mat_width * ndwl / 2 + 360 ((add_bits + 361 (search_data_in_bits + search_data_out_bits)) * 362 ((ndwl / 2 - 1) + excess_part) * 363 g_tp.wire_outside_mat.pitch) + 364 (data_in_bits + data_out_bits) * 365 g_tp.wire_outside_mat.pitch * 366 (h + 2 * (1 - pow(0.5, v - h)))) / 2; 367 } |
286 } | 368 } |
287 else if (ndwl > ndbl) { 288 double excess_part = (_log2(ndwl/2) - _log2(ndbl/2)); 289 ht_temp = ((mat_height*ndbl/2) + 290 ((add_bits + + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + 291 (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * 292 (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2; 293 len_temp = (mat_width*ndwl/2 + 294 ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + 295 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; 296 } 297 else { 298 double excess_part = (_log2(ndbl/2) - _log2(ndwl/2)); 299 ht_temp = ((mat_height*ndbl/2) + 300 ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + 301 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) 302 )/2; 303 len_temp = (mat_width*ndwl/2 + 304 ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + 305 (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2; 306 } 307 } | |
308 | 369 |
309 area.h = ht_temp * 2; 310 area.w = len_temp * 2; 311 delay = 0; 312 power.readOp.dynamic = 0; 313 power.readOp.leakage = 0; 314 power.searchOp.dynamic =0; 315 len = len_temp; 316 ht = ht_temp/2; | 370 area.h = ht_temp * 2; 371 area.w = len_temp * 2; 372 delay = 0; 373 power.readOp.dynamic = 0; 374 power.readOp.leakage = 0; 375 power.searchOp.dynamic = 0; 376 len = len_temp; 377 ht = ht_temp / 2; |
317 | 378 |
318 while (v > 0 || h > 0) 319 { 320 if (wtemp1) delete wtemp1; 321 if (wtemp2) delete wtemp2; 322 if (wtemp3) delete wtemp3; | 379 while (v > 0 || h > 0) { 380 if (wtemp1) delete wtemp1; 381 if (wtemp2) delete wtemp2; 382 if (wtemp3) delete wtemp3; |
323 | 383 |
324 if (h > v) 325 { 326 //the iteration considers only one horizontal link 327 wtemp1 = new Wire(wt, len); // hor 328 wtemp2 = new Wire(wt, len/2); // ver 329 len_temp = len; 330 len /= 2; 331 wtemp3 = 0; 332 h--; 333 option = 0; 334 } 335 else if (v>0 && h>0) 336 { 337 //considers one horizontal link and one vertical link 338 wtemp1 = new Wire(wt, len); // hor 339 wtemp2 = new Wire(wt, ht); // ver 340 wtemp3 = new Wire(wt, len/2); // next hor 341 len_temp = len; 342 ht_temp = ht; 343 len /= 2; 344 ht /= 2; 345 v--; 346 h--; 347 option = 1; 348 } 349 else 350 { 351 // considers only one vertical link 352 assert(h == 0); 353 wtemp1 = new Wire(wt, ht); // ver 354 wtemp2 = new Wire(wt, ht/2); // hor 355 ht_temp = ht; 356 ht /= 2; 357 wtemp3 = 0; 358 v--; 359 option = 2; 360 } | 384 if (h > v) { 385 //the iteration considers only one horizontal link 386 wtemp1 = new Wire(wt, len); // hor 387 wtemp2 = new Wire(wt, len / 2); // ver 388 len_temp = len; 389 len /= 2; 390 wtemp3 = 0; 391 h--; 392 option = 0; 393 } else if (v > 0 && h > 0) { 394 //considers one horizontal link and one vertical link 395 wtemp1 = new Wire(wt, len); // hor 396 wtemp2 = new Wire(wt, ht); // ver 397 wtemp3 = new Wire(wt, len / 2); // next hor 398 len_temp = len; 399 ht_temp = ht; 400 len /= 2; 401 ht /= 2; 402 v--; 403 h--; 404 option = 1; 405 } else { 406 // considers only one vertical link 407 assert(h == 0); 408 wtemp1 = new Wire(wt, ht); // ver 409 wtemp2 = new Wire(wt, ht / 2); // hor 410 ht_temp = ht; 411 ht /= 2; 412 wtemp3 = 0; 413 v--; 414 option = 2; 415 } |
361 | 416 |
362 delay += wtemp1->delay; 363 power.readOp.dynamic += wtemp1->power.readOp.dynamic; 364 power.searchOp.dynamic += wtemp1->power.readOp.dynamic*wire_bw; 365 power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw; 366 power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw; 367 if ((uca_tree == false && option == 2) || search_tree==true) 368 { 369 wire_bw*=2; // wire bandwidth doubles only for vertical branches 370 } | 417 delay += wtemp1->delay; 418 power.readOp.dynamic += wtemp1->power.readOp.dynamic; 419 power.searchOp.dynamic += wtemp1->power.readOp.dynamic * wire_bw; 420 power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw; 421 power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw; 422 if ((uca_tree == false && option == 2) || search_tree == true) { 423 wire_bw *= 2; // wire bandwidth doubles only for vertical branches 424 } |
371 | 425 |
372 if (uca_tree == false) 373 { 374 if (len_temp > wtemp1->repeater_spacing) 375 { 376 s1 = wtemp1->repeater_size; 377 l_eff = wtemp1->repeater_spacing; 378 } 379 else 380 { 381 s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size; 382 l_eff = len_temp; 383 } | 426 if (uca_tree == false) { 427 if (len_temp > wtemp1->repeater_spacing) { 428 s1 = wtemp1->repeater_size; 429 l_eff = wtemp1->repeater_spacing; 430 } else { 431 s1 = (len_temp / wtemp1->repeater_spacing) * 432 wtemp1->repeater_size; 433 l_eff = len_temp; 434 } |
384 | 435 |
385 if (ht_temp > wtemp2->repeater_spacing) 386 { 387 s2 = wtemp2->repeater_size; 388 } 389 else 390 { 391 s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size; 392 } 393 // first level 394 input_nand(s1, s2, l_eff); 395 } | 436 if (ht_temp > wtemp2->repeater_spacing) { 437 s2 = wtemp2->repeater_size; 438 } else { 439 s2 = (len_temp / wtemp2->repeater_spacing) * 440 wtemp2->repeater_size; 441 } 442 // first level 443 input_nand(s1, s2, l_eff); 444 } |
396 397 | 445 446 |
398 if (option != 1) 399 { 400 continue; 401 } | 447 if (option != 1) { 448 continue; 449 } |
402 | 450 |
403 // second level 404 delay += wtemp2->delay; 405 power.readOp.dynamic += wtemp2->power.readOp.dynamic; 406 power.searchOp.dynamic += wtemp2->power.readOp.dynamic*wire_bw; 407 power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw; 408 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; | 451 // second level 452 delay += wtemp2->delay; 453 power.readOp.dynamic += wtemp2->power.readOp.dynamic; 454 power.searchOp.dynamic += wtemp2->power.readOp.dynamic * wire_bw; 455 power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw; 456 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw; |
409 | 457 |
410 if (uca_tree) 411 { 412 power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); 413 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; 414 } 415 else 416 { 417 power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); 418 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; 419 wire_bw*=2; | 458 if (uca_tree) { 459 power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw); 460 power.readOp.gate_leakage += 461 wtemp2->power.readOp.gate_leakage * wire_bw; 462 } else { 463 power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw); 464 power.readOp.gate_leakage += 465 wtemp2->power.readOp.gate_leakage * wire_bw; 466 wire_bw *= 2; |
420 | 467 |
421 if (ht_temp > wtemp3->repeater_spacing) 422 { 423 s3 = wtemp3->repeater_size; 424 l_eff = wtemp3->repeater_spacing; 425 } 426 else 427 { 428 s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size; 429 l_eff = ht_temp; 430 } | 468 if (ht_temp > wtemp3->repeater_spacing) { 469 s3 = wtemp3->repeater_size; 470 l_eff = wtemp3->repeater_spacing; 471 } else { 472 s3 = (len_temp / wtemp3->repeater_spacing) * 473 wtemp3->repeater_size; 474 l_eff = ht_temp; 475 } |
431 | 476 |
432 input_nand(s2, s3, l_eff); | 477 input_nand(s2, s3, l_eff); 478 } |
433 } | 479 } |
434 } | |
435 | 480 |
436 if (wtemp1) delete wtemp1; 437 if (wtemp2) delete wtemp2; 438 if (wtemp3) delete wtemp3; | 481 if (wtemp1) delete wtemp1; 482 if (wtemp2) delete wtemp2; 483 if (wtemp3) delete wtemp3; |
439} 440 441 442 443/* a tristate buffer is used to handle fan-ins 444 * The area of an unbalanced htree (rows != columns) 445 * depends on how data is traversed. 446 * In the following function, if ( no. of rows < no. of columns), 447 * then data first traverse in excess hor. links until vertical 448 * and horizontal nodes are same. 449 * If no. of rows is bigger, then data traverse in 450 * a hor. link followed by a ver. link in a repeated 451 * fashion (similar to a balanced tree) until there are no 452 * hor. links left. After this it goes through the remaining vertical 453 * links. 454 */ | 484} 485 486 487 488/* a tristate buffer is used to handle fan-ins 489 * The area of an unbalanced htree (rows != columns) 490 * depends on how data is traversed. 491 * In the following function, if ( no. of rows < no. of columns), 492 * then data first traverse in excess hor. links until vertical 493 * and horizontal nodes are same. 494 * If no. of rows is bigger, then data traverse in 495 * a hor. link followed by a ver. link in a repeated 496 * fashion (similar to a balanced tree) until there are no 497 * hor. links left. After this it goes through the remaining vertical 498 * links. 499 */ |
455void Htree2::out_htree() 456{ 457 //temp var 458 double s1 = 0, s2 = 0, s3 = 0; 459 double l_eff = 0; 460 Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; 461 double len = 0, ht = 0; 462 int option = 0; | 500void Htree2::out_htree() { 501 //temp var 502 double s1 = 0, s2 = 0, s3 = 0; 503 double l_eff = 0; 504 Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; 505 double len = 0, ht = 0; 506 int option = 0; |
463 | 507 |
464 int h = (int) _log2(ndwl/2); 465 int v = (int) _log2(ndbl/2); 466 double len_temp; 467 double ht_temp; 468 if (uca_tree) 469 { 470 ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */ 471 ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * 472 2 * (1-pow(0.5,h))))/2; 473 len_temp = (mat_width*ndwl/2 + 474 ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * 475 2 * (1-pow(0.5,v))))/2; 476 } 477 else 478 { 479 if (ndwl == ndbl) { 480 ht_temp = ((mat_height*ndbl/2) + 481 ((add_bits+ (search_data_in_bits + search_data_out_bits)) * (ndbl/2-1) * g_tp.wire_outside_mat.pitch) + 482 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) 483 )/2; 484 len_temp = (mat_width*ndwl/2 + 485 ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + 486 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; | 508 int h = (int) _log2(ndwl / 2); 509 int v = (int) _log2(ndbl / 2); 510 double len_temp; 511 double ht_temp; 512 if (uca_tree) { 513 ht_temp = (mat_height * ndbl / 2 + 514 /* since uca_tree models interbank tree, 515 mat_height => bank height */ 516 ((add_bits + data_in_bits + data_out_bits + 517 (search_data_in_bits + search_data_out_bits)) * 518 g_tp.wire_outside_mat.pitch * 519 2 * (1 - pow(0.5, h)))) / 2; 520 len_temp = (mat_width * ndwl / 2 + 521 ((add_bits + data_in_bits + data_out_bits + 522 (search_data_in_bits + search_data_out_bits)) * 523 g_tp.wire_outside_mat.pitch * 524 2 * (1 - pow(0.5, v)))) / 2; 525 } else { 526 if (ndwl == ndbl) { 527 ht_temp = ((mat_height * ndbl / 2) + 528 ((add_bits + (search_data_in_bits + 529 search_data_out_bits)) * 530 (ndbl / 2 - 1) * g_tp.wire_outside_mat.pitch) + 531 ((data_in_bits + data_out_bits) * 532 g_tp.wire_outside_mat.pitch * h) 533 ) / 2; 534 len_temp = (mat_width * ndwl / 2 + 535 ((add_bits + (search_data_in_bits + 536 search_data_out_bits)) * (ndwl / 2 - 1) * 537 g_tp.wire_outside_mat.pitch) + 538 ((data_in_bits + data_out_bits) * 539 g_tp.wire_outside_mat.pitch * v)) / 2; |
487 | 540 |
541 } else if (ndwl > ndbl) { 542 double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2)); 543 ht_temp = ((mat_height * ndbl / 2) + 544 ((add_bits + 545 (search_data_in_bits + search_data_out_bits)) * 546 ((ndbl / 2 - 1) + excess_part) * 547 g_tp.wire_outside_mat.pitch) + 548 (data_in_bits + data_out_bits) * 549 g_tp.wire_outside_mat.pitch * 550 (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2; 551 len_temp = (mat_width * ndwl / 2 + 552 ((add_bits + 553 (search_data_in_bits + search_data_out_bits)) * 554 (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + 555 ((data_in_bits + data_out_bits) * 556 g_tp.wire_outside_mat.pitch * v)) / 2; 557 } else { 558 double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2)); 559 ht_temp = ((mat_height * ndbl / 2) + 560 ((add_bits + 561 (search_data_in_bits + search_data_out_bits)) * 562 ((ndwl / 2 - 1) + excess_part) * 563 g_tp.wire_outside_mat.pitch) + 564 ((data_in_bits + data_out_bits) * 565 g_tp.wire_outside_mat.pitch * h) 566 ) / 2; 567 len_temp = (mat_width * ndwl / 2 + 568 ((add_bits + (search_data_in_bits + 569 search_data_out_bits)) * 570 ((ndwl / 2 - 1) + excess_part) * 571 g_tp.wire_outside_mat.pitch) + 572 (data_in_bits + data_out_bits) * 573 g_tp.wire_outside_mat.pitch * 574 (h + 2 * (1 - pow(0.5, v - h)))) / 2; 575 } |
|
488 } | 576 } |
489 else if (ndwl > ndbl) { 490 double excess_part = (_log2(ndwl/2) - _log2(ndbl/2)); 491 ht_temp = ((mat_height*ndbl/2) + 492 ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + 493 (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * 494 (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2; 495 len_temp = (mat_width*ndwl/2 + 496 ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + 497 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; 498 } 499 else { 500 double excess_part = (_log2(ndbl/2) - _log2(ndwl/2)); 501 ht_temp = ((mat_height*ndbl/2) + 502 ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + 503 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) 504 )/2; 505 len_temp = (mat_width*ndwl/2 + 506 ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + 507 (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2; 508 } 509 } 510 area.h = ht_temp * 2; 511 area.w = len_temp * 2; 512 delay = 0; 513 power.readOp.dynamic = 0; 514 power.readOp.leakage = 0; 515 power.readOp.gate_leakage = 0; 516 //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl; 517 len = len_temp; 518 ht = ht_temp/2; | 577 area.h = ht_temp * 2; 578 area.w = len_temp * 2; 579 delay = 0; 580 power.readOp.dynamic = 0; 581 power.readOp.leakage = 0; 582 power.readOp.gate_leakage = 0; 583 //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl; 584 len = len_temp; 585 ht = ht_temp / 2; |
519 | 586 |
520 while (v > 0 || h > 0) 521 { //finds delay/power of each link in the tree 522 if (wtemp1) delete wtemp1; 523 if (wtemp2) delete wtemp2; 524 if (wtemp3) delete wtemp3; | 587 while (v > 0 || h > 0) { //finds delay/power of each link in the tree 588 if (wtemp1) delete wtemp1; 589 if (wtemp2) delete wtemp2; 590 if (wtemp3) delete wtemp3; |
525 | 591 |
526 if(h > v) { 527 //the iteration considers only one horizontal link 528 wtemp1 = new Wire(wt, len); // hor 529 wtemp2 = new Wire(wt, len/2); // ver 530 len_temp = len; 531 len /= 2; 532 wtemp3 = 0; 533 h--; 534 option = 0; 535 } 536 else if (v>0 && h>0) { 537 //considers one horizontal link and one vertical link 538 wtemp1 = new Wire(wt, len); // hor 539 wtemp2 = new Wire(wt, ht); // ver 540 wtemp3 = new Wire(wt, len/2); // next hor 541 len_temp = len; 542 ht_temp = ht; 543 len /= 2; 544 ht /= 2; 545 v--; 546 h--; 547 option = 1; 548 } 549 else { 550 // considers only one vertical link 551 assert(h == 0); 552 wtemp1 = new Wire(wt, ht); // hor 553 wtemp2 = new Wire(wt, ht/2); // ver 554 ht_temp = ht; 555 ht /= 2; 556 wtemp3 = 0; 557 v--; 558 option = 2; 559 } 560 delay += wtemp1->delay; 561 power.readOp.dynamic += wtemp1->power.readOp.dynamic; 562 power.searchOp.dynamic += wtemp1->power.readOp.dynamic*init_wire_bw; 563 power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw; 564 power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw; 565 //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl; 566 if ((uca_tree == false && option == 2) || search_tree==true) 567 { 568 wire_bw*=2; 569 } | 592 if (h > v) { 593 //the iteration considers only one horizontal link 594 wtemp1 = new Wire(wt, len); // hor 595 wtemp2 = new Wire(wt, len / 2); // ver 596 len_temp = len; 597 len /= 2; 598 wtemp3 = 0; 599 h--; 600 option = 0; 601 } else if (v > 0 && h > 0) { 602 //considers one horizontal link and one vertical link 603 wtemp1 = new Wire(wt, len); // hor 604 wtemp2 = new Wire(wt, ht); // ver 605 wtemp3 = new Wire(wt, len / 2); // next hor 606 len_temp = len; 607 ht_temp = ht; 608 len /= 2; 609 ht /= 2; 610 v--; 611 h--; 612 option = 1; 613 } else { 614 // considers only one vertical link 615 assert(h == 0); 616 wtemp1 = new Wire(wt, ht); // hor 617 wtemp2 = new Wire(wt, ht / 2); // ver 618 ht_temp = ht; 619 ht /= 2; 620 wtemp3 = 0; 621 v--; 622 option = 2; 623 } 624 delay += wtemp1->delay; 625 power.readOp.dynamic += wtemp1->power.readOp.dynamic; 626 power.searchOp.dynamic += wtemp1->power.readOp.dynamic * init_wire_bw; 627 power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw; 628 power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw; 629 if ((uca_tree == false && option == 2) || search_tree == true) { 630 wire_bw *= 2; 631 } |
570 | 632 |
571 if (uca_tree == false) 572 { 573 if (len_temp > wtemp1->repeater_spacing) 574 { 575 s1 = wtemp1->repeater_size; 576 l_eff = wtemp1->repeater_spacing; 577 } 578 else 579 { 580 s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size; 581 l_eff = len_temp; 582 } 583 if (ht_temp > wtemp2->repeater_spacing) 584 { 585 s2 = wtemp2->repeater_size; 586 } 587 else 588 { 589 s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size; 590 } 591 // first level 592 output_buffer(s1, s2, l_eff); 593 } | 633 if (uca_tree == false) { 634 if (len_temp > wtemp1->repeater_spacing) { 635 s1 = wtemp1->repeater_size; 636 l_eff = wtemp1->repeater_spacing; 637 } else { 638 s1 = (len_temp / wtemp1->repeater_spacing) * 639 wtemp1->repeater_size; 640 l_eff = len_temp; 641 } 642 if (ht_temp > wtemp2->repeater_spacing) { 643 s2 = wtemp2->repeater_size; 644 } else { 645 s2 = (len_temp / wtemp2->repeater_spacing) * 646 wtemp2->repeater_size; 647 } 648 // first level 649 output_buffer(s1, s2, l_eff); 650 } |
594 595 | 651 652 |
596 if (option != 1) 597 { 598 continue; 599 } | 653 if (option != 1) { 654 continue; 655 } |
600 | 656 |
601 // second level 602 delay += wtemp2->delay; 603 power.readOp.dynamic += wtemp2->power.readOp.dynamic; 604 power.searchOp.dynamic += wtemp2->power.readOp.dynamic*init_wire_bw; 605 power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw; 606 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; 607 //cout<<"power.readOp.gate_leakage"< 609 { 610 power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); 611 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; 612 } 613 else 614 { 615 power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); 616 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; 617 wire_bw*=2; | 657 // second level 658 delay += wtemp2->delay; 659 power.readOp.dynamic += wtemp2->power.readOp.dynamic; 660 power.searchOp.dynamic += wtemp2->power.readOp.dynamic * init_wire_bw; 661 power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw; 662 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw; 663 //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl; 664 if (uca_tree) { 665 power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw); 666 power.readOp.gate_leakage += 667 wtemp2->power.readOp.gate_leakage * wire_bw; 668 } else { 669 power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw); 670 power.readOp.gate_leakage += 671 wtemp2->power.readOp.gate_leakage * wire_bw; 672 wire_bw *= 2; |
618 | 673 |
619 if (ht_temp > wtemp3->repeater_spacing) 620 { 621 s3 = wtemp3->repeater_size; 622 l_eff = wtemp3->repeater_spacing; 623 } 624 else 625 { 626 s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size; 627 l_eff = ht_temp; 628 } | 674 if (ht_temp > wtemp3->repeater_spacing) { 675 s3 = wtemp3->repeater_size; 676 l_eff = wtemp3->repeater_spacing; 677 } else { 678 s3 = (len_temp / wtemp3->repeater_spacing) * 679 wtemp3->repeater_size; 680 l_eff = ht_temp; 681 } |
629 | 682 |
630 output_buffer(s2, s3, l_eff); | 683 output_buffer(s2, s3, l_eff); 684 } 685 //cout<<"power.readOp.leakage"<<power.readOp.leakage<<endl; 686 //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl; 687 //cout<<"wtemp2->power.readOp.gate_leakage"<<wtemp2->power.readOp.gate_leakage<<endl; |
631 } | 688 } |
632 //cout<<"power.readOp.leakage"<<power.readOp.leakage<<endl; 633 //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl; 634 //cout<<"wtemp2->power.readOp.gate_leakage"<<wtemp2->power.readOp.gate_leakage<<endl; 635 } | |
636 | 689 |
637 if (wtemp1) delete wtemp1; 638 if (wtemp2) delete wtemp2; 639 if (wtemp3) delete wtemp3; | 690 if (wtemp1) delete wtemp1; 691 if (wtemp2) delete wtemp2; 692 if (wtemp3) delete wtemp3; |
640} 641 | 693} 694 |