htree2.cc revision 10234
1/*****************************************************************************
2 *                                McPAT/CACTI
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
6 *                          All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 ***************************************************************************/
32
33
34
35#include <cassert>
36#include <iostream>
37
38#include "htree2.h"
39#include "wire.h"
40
41Htree2::Htree2(
42    enum Wire_type wire_model, double mat_w, double mat_h,
43    int a_bits, int d_inbits, int search_data_in, int d_outbits,
44    int search_data_out, int bl, int wl, enum Htree_type htree_type,
45    bool uca_tree_, bool search_tree_, TechnologyParameter::DeviceType *dt)
46    : in_rise_time(0), out_rise_time(0),
47      tree_type(htree_type), mat_width(mat_w), mat_height(mat_h),
48      add_bits(a_bits), data_in_bits(d_inbits),
49      search_data_in_bits(search_data_in), data_out_bits(d_outbits),
50      search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl),
51      uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model),
52      deviceType(dt) {
53    assert(ndbl >= 2 && ndwl >= 2);
54
55//  if (ndbl == 1 && ndwl == 1)
56//  {
57//    delay = 0;
58//    power.readOp.dynamic = 0;
59//    power.readOp.leakage = 0;
60//    area.w = mat_w;
61//    area.h = mat_h;
62//    return;
63//  }
64//  if (ndwl == 1) ndwl++;
65//  if (ndbl == 1) ndbl++;
66
67    max_unpipelined_link_delay = 0; //TODO
68    min_w_nmos = g_tp.min_w_nmos_;
69    min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
70
71    switch (htree_type) {
72    case Add_htree:
73        wire_bw = init_wire_bw = add_bits;
74        in_htree();
75        break;
76    case Data_in_htree:
77        wire_bw = init_wire_bw = data_in_bits;
78        in_htree();
79        break;
80    case Data_out_htree:
81        wire_bw = init_wire_bw = data_out_bits;
82        out_htree();
83        break;
84    case Search_in_htree:
85        wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not.
86        in_htree();
87        break;
88    case Search_out_htree:
89        wire_bw = init_wire_bw = search_data_out_bits;
90        out_htree();
91        break;
92    default:
93        assert(0);
94        break;
95    }
96
97    power_bit = power;
98    power.readOp.dynamic *= init_wire_bw;
99
100    assert(power.readOp.dynamic >= 0);
101    assert(power.readOp.leakage >= 0);
102}
103
104
105
106// nand gate sizing calculation
107void Htree2::input_nand(double s1, double s2, double l_eff) {
108    Wire w1(wt, l_eff);
109    double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
110    // input capacitance of a repeater  = input capacitance of nand.
111    double nsize = s1 * (1 + pton_size) / (2 + pton_size);
112    nsize = (nsize < 1) ? 1 : nsize;
113
114    double tc = 2 * tr_R_on(nsize * min_w_nmos, NCH, 1) *
115        (drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) * 2 +
116         2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0));
117    delay += horowitz(w1.out_rise_time, tc,
118                      deviceType->Vth / deviceType->Vdd, deviceType->Vth /
119                      deviceType->Vdd, RISE);
120    power.readOp.dynamic += 0.5 *
121        (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
122         + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
123         + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) *
124        deviceType->Vdd * deviceType->Vdd;
125
126    power.searchOp.dynamic += 0.5 *
127        (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
128         + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
129         + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) *
130        deviceType->Vdd * deviceType->Vdd * wire_bw ;
131    power.readOp.leakage += (wire_bw *
132                             cmos_Isub_leakage(min_w_nmos * (nsize * 2),
133                                               min_w_pmos * nsize * 2, 2,
134                                               nand)) * deviceType->Vdd;
135    power.readOp.gate_leakage += (wire_bw *
136                                  cmos_Ig_leakage(min_w_nmos * (nsize * 2),
137                                                  min_w_pmos * nsize * 2, 2,
138                                                  nand)) * deviceType->Vdd;
139}
140
141
142
143// tristate buffer model consisting of not, nand, nor, and driver transistors
144void Htree2::output_buffer(double s1, double s2, double l_eff) {
145    Wire w1(wt, l_eff);
146    double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
147    // input capacitance of repeater = input capacitance of nand + nor.
148    double size = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size);
149    double s_eff =  //stage eff of a repeater in a wire
150        (gate_C(s2 * (min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff * 1e-6,
151                                                                 true)) /
152        gate_C(s2 * (min_w_nmos + min_w_pmos), 0);
153    double tr_size = gate_C(s1 * (min_w_nmos + min_w_pmos), 0) * 1 / 2 /
154        (s_eff * gate_C(min_w_pmos, 0));
155    size = (size < 1) ? 1 : size;
156
157    double res_nor = 2 * tr_R_on(size * min_w_pmos, PCH, 1);
158    double res_ptrans = tr_R_on(tr_size * min_w_nmos, NCH, 1);
159    double cap_nand_out =
160        drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
161        drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 +
162        gate_C(tr_size * min_w_pmos, 0);
163    double cap_ptrans_out = 2 *
164        (drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
165         drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) +
166        gate_C(s1 * (min_w_nmos + min_w_pmos), 0);
167
168    double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out;
169
170
171    delay += horowitz(w1.out_rise_time, tc,
172                      deviceType->Vth / deviceType->Vdd, deviceType->Vth /
173                      deviceType->Vdd, RISE);
174
175    //nand
176    power.readOp.dynamic += 0.5 *
177        (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
178         drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
179         gate_C(tr_size * (min_w_pmos), 0)) *
180        deviceType->Vdd * deviceType->Vdd;
181
182    power.searchOp.dynamic += 0.5 *
183        (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
184         drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
185         gate_C(tr_size * (min_w_pmos), 0)) *
186        deviceType->Vdd * deviceType->Vdd * init_wire_bw;
187
188    //not
189    power.readOp.dynamic += 0.5 *
190        (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
191         + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
192         + gate_C(size * (min_w_nmos + min_w_pmos), 0)) *
193        deviceType->Vdd * deviceType->Vdd;
194
195    power.searchOp.dynamic += 0.5 *
196        (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
197         + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
198         + gate_C(size * (min_w_nmos + min_w_pmos), 0)) *
199        deviceType->Vdd * deviceType->Vdd * init_wire_bw;
200
201    //nor
202    power.readOp.dynamic += 0.5 *
203        (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
204         + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
205         + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) *
206        deviceType->Vdd * deviceType->Vdd;
207
208    power.searchOp.dynamic += 0.5 *
209        (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
210         + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
211         + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) *
212        deviceType->Vdd * deviceType->Vdd * init_wire_bw;
213
214    //output transistor
215    power.readOp.dynamic += 0.5 *
216        ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
217          + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * 2
218         + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) *
219        deviceType->Vdd * deviceType->Vdd;
220
221    power.searchOp.dynamic += 0.5 *
222        ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
223          + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * 2
224         + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) *
225        deviceType->Vdd * deviceType->Vdd * init_wire_bw;
226
227    if (uca_tree) {
228        power.readOp.leakage +=
229            cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size *
230                              2, 1, inv) *
231            deviceType->Vdd * wire_bw;/*inverter + output tr*/
232        power.readOp.leakage +=
233            cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
234                              nand) * deviceType->Vdd * wire_bw;//nand
235        power.readOp.leakage +=
236            cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
237                              nor) * deviceType->Vdd * wire_bw;//nor
238
239        power.readOp.gate_leakage +=
240            cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2,
241                            1, inv) *
242            deviceType->Vdd * wire_bw;/*inverter + output tr*/
243        power.readOp.gate_leakage +=
244            cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
245                            nand) * deviceType->Vdd * wire_bw;//nand
246        power.readOp.gate_leakage +=
247            cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
248                            nor) * deviceType->Vdd * wire_bw;//nor
249    } else {
250        power.readOp.leakage +=
251            cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size *
252                              2, 1, inv) *
253            deviceType->Vdd * wire_bw;/*inverter + output tr*/
254        power.readOp.leakage +=
255            cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
256                              nand) * deviceType->Vdd * wire_bw;//nand
257        power.readOp.leakage +=
258            cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
259                              nor) * deviceType->Vdd * wire_bw;//nor
260
261        power.readOp.gate_leakage +=
262            cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2,
263                            1, inv) *
264            deviceType->Vdd * wire_bw;/*inverter + output tr*/
265        power.readOp.gate_leakage +=
266            cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
267                            nand) * deviceType->Vdd * wire_bw;//nand
268        power.readOp.gate_leakage +=
269            cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
270                            nor) * deviceType->Vdd * wire_bw;//nor
271    }
272}
273
274
275
276/* calculates the input h-tree delay/power
277 * A nand gate is used at each node to
278 * limit the signal
279 * The area of an unbalanced htree (rows != columns)
280 * depends on how data is traversed.
281 * In the following function, if ( no. of rows < no. of columns),
282 * then data first traverse in excess hor. links until vertical
283 * and horizontal nodes are same.
284 * If no. of rows is bigger, then data traverse in
285 * a hor. link followed by a ver. link in a repeated
286 * fashion (similar to a balanced tree) until there are no
287 * hor. links left. After this it goes through the remaining vertical
288 * links.
289 */
290void
291Htree2::in_htree() {
292    //temp var
293    double s1 = 0, s2 = 0, s3 = 0;
294    double l_eff = 0;
295    Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
296    double len = 0, ht = 0;
297    int option = 0;
298
299    int h = (int) _log2(ndwl / 2); // horizontal nodes
300    int v = (int) _log2(ndbl / 2); // vertical nodes
301    double len_temp;
302    double ht_temp;
303    if (uca_tree) {
304        //Sheng: this computation do not consider the wires that route from
305        //edge to middle.
306        ht_temp = (mat_height * ndbl / 2 +
307                   /* since uca_tree models interbank tree,
308                      mat_height => bank height */
309                   ((add_bits + data_in_bits + data_out_bits +
310                     (search_data_in_bits + search_data_out_bits)) *
311                    g_tp.wire_outside_mat.pitch *
312                    2 * (1 - pow(0.5, h)))) / 2;
313        len_temp = (mat_width * ndwl / 2 +
314                    ((add_bits + data_in_bits + data_out_bits +
315                      (search_data_in_bits + search_data_out_bits)) *
316                     g_tp.wire_outside_mat.pitch *
317                     2 * (1 - pow(0.5, v)))) / 2;
318    } else {
319        if (ndwl == ndbl) {
320            ht_temp = ((mat_height * ndbl / 2) +
321                       ((add_bits + (search_data_in_bits +
322                                     search_data_out_bits)) * (ndbl / 2 - 1) *
323                        g_tp.wire_outside_mat.pitch) +
324                       ((data_in_bits + data_out_bits) *
325                        g_tp.wire_outside_mat.pitch * h)
326                      ) / 2;
327            len_temp = (mat_width * ndwl / 2 +
328                        ((add_bits + (search_data_in_bits +
329                                      search_data_out_bits)) * (ndwl / 2 - 1) *
330                         g_tp.wire_outside_mat.pitch) +
331                        ((data_in_bits + data_out_bits) *
332                         g_tp.wire_outside_mat.pitch * v)) / 2;
333        } else if (ndwl > ndbl) {
334            double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2));
335            ht_temp = ((mat_height * ndbl / 2) +
336                       ((add_bits + + (search_data_in_bits +
337                                       search_data_out_bits)) *
338                        ((ndbl / 2 - 1) + excess_part) *
339                        g_tp.wire_outside_mat.pitch) +
340                       (data_in_bits + data_out_bits) *
341                       g_tp.wire_outside_mat.pitch *
342                       (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2;
343            len_temp = (mat_width * ndwl / 2 +
344                        ((add_bits +
345                          (search_data_in_bits + search_data_out_bits)) *
346                         (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
347                        ((data_in_bits + data_out_bits) *
348                         g_tp.wire_outside_mat.pitch * v)) / 2;
349        } else {
350            double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2));
351            ht_temp = ((mat_height * ndbl / 2) +
352                       ((add_bits +
353                         (search_data_in_bits + search_data_out_bits)) *
354                        ((ndwl / 2 - 1) + excess_part) *
355                        g_tp.wire_outside_mat.pitch) +
356                       ((data_in_bits + data_out_bits) *
357                        g_tp.wire_outside_mat.pitch * h)
358                      ) / 2;
359            len_temp = (mat_width * ndwl / 2 +
360                        ((add_bits +
361                          (search_data_in_bits + search_data_out_bits)) *
362                         ((ndwl / 2 - 1) + excess_part) *
363                         g_tp.wire_outside_mat.pitch) +
364                        (data_in_bits + data_out_bits) *
365                        g_tp.wire_outside_mat.pitch *
366                        (h + 2 * (1 - pow(0.5, v - h)))) / 2;
367        }
368    }
369
370    area.h   = ht_temp * 2;
371    area.w   = len_temp * 2;
372    delay = 0;
373    power.readOp.dynamic = 0;
374    power.readOp.leakage = 0;
375    power.searchOp.dynamic = 0;
376    len = len_temp;
377    ht  = ht_temp / 2;
378
379    while (v > 0 || h > 0) {
380        if (wtemp1) delete wtemp1;
381        if (wtemp2) delete wtemp2;
382        if (wtemp3) delete wtemp3;
383
384        if (h > v) {
385            //the iteration considers only one horizontal link
386            wtemp1 = new Wire(wt, len); // hor
387            wtemp2 = new Wire(wt, len / 2);  // ver
388            len_temp = len;
389            len /= 2;
390            wtemp3 = 0;
391            h--;
392            option = 0;
393        } else if (v > 0 && h > 0) {
394            //considers one horizontal link and one vertical link
395            wtemp1 = new Wire(wt, len); // hor
396            wtemp2 = new Wire(wt, ht);  // ver
397            wtemp3 = new Wire(wt, len / 2);  // next hor
398            len_temp = len;
399            ht_temp = ht;
400            len /= 2;
401            ht  /= 2;
402            v--;
403            h--;
404            option = 1;
405        } else {
406            // considers only one vertical link
407            assert(h == 0);
408            wtemp1 = new Wire(wt, ht); // ver
409            wtemp2 = new Wire(wt, ht / 2);  // hor
410            ht_temp = ht;
411            ht /= 2;
412            wtemp3 = 0;
413            v--;
414            option = 2;
415        }
416
417        delay += wtemp1->delay;
418        power.readOp.dynamic += wtemp1->power.readOp.dynamic;
419        power.searchOp.dynamic += wtemp1->power.readOp.dynamic * wire_bw;
420        power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw;
421        power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw;
422        if ((uca_tree == false && option == 2) || search_tree == true) {
423            wire_bw *= 2;  // wire bandwidth doubles only for vertical branches
424        }
425
426        if (uca_tree == false) {
427            if (len_temp > wtemp1->repeater_spacing) {
428                s1 = wtemp1->repeater_size;
429                l_eff = wtemp1->repeater_spacing;
430            } else {
431                s1 = (len_temp / wtemp1->repeater_spacing) *
432                    wtemp1->repeater_size;
433                l_eff = len_temp;
434            }
435
436            if (ht_temp > wtemp2->repeater_spacing) {
437                s2 = wtemp2->repeater_size;
438            } else {
439                s2 = (len_temp / wtemp2->repeater_spacing) *
440                    wtemp2->repeater_size;
441            }
442            // first level
443            input_nand(s1, s2, l_eff);
444        }
445
446
447        if (option != 1) {
448            continue;
449        }
450
451        // second level
452        delay += wtemp2->delay;
453        power.readOp.dynamic += wtemp2->power.readOp.dynamic;
454        power.searchOp.dynamic += wtemp2->power.readOp.dynamic * wire_bw;
455        power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw;
456        power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw;
457
458        if (uca_tree) {
459            power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
460            power.readOp.gate_leakage +=
461                wtemp2->power.readOp.gate_leakage * wire_bw;
462        } else {
463            power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
464            power.readOp.gate_leakage +=
465                wtemp2->power.readOp.gate_leakage * wire_bw;
466            wire_bw *= 2;
467
468            if (ht_temp > wtemp3->repeater_spacing) {
469                s3    = wtemp3->repeater_size;
470                l_eff = wtemp3->repeater_spacing;
471            } else {
472                s3 = (len_temp / wtemp3->repeater_spacing) *
473                    wtemp3->repeater_size;
474                l_eff = ht_temp;
475            }
476
477            input_nand(s2, s3, l_eff);
478        }
479    }
480
481    if (wtemp1) delete wtemp1;
482    if (wtemp2) delete wtemp2;
483    if (wtemp3) delete wtemp3;
484}
485
486
487
488/* a tristate buffer is used to handle fan-ins
489 * The area of an unbalanced htree (rows != columns)
490 * depends on how data is traversed.
491 * In the following function, if ( no. of rows < no. of columns),
492 * then data first traverse in excess hor. links until vertical
493 * and horizontal nodes are same.
494 * If no. of rows is bigger, then data traverse in
495 * a hor. link followed by a ver. link in a repeated
496 * fashion (similar to a balanced tree) until there are no
497 * hor. links left. After this it goes through the remaining vertical
498 * links.
499 */
500void Htree2::out_htree() {
501    //temp var
502    double s1 = 0, s2 = 0, s3 = 0;
503    double l_eff = 0;
504    Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
505    double len = 0, ht = 0;
506    int option = 0;
507
508    int h = (int) _log2(ndwl / 2);
509    int v = (int) _log2(ndbl / 2);
510    double len_temp;
511    double ht_temp;
512    if (uca_tree) {
513        ht_temp = (mat_height * ndbl / 2 +
514                   /* since uca_tree models interbank tree,
515                      mat_height => bank height */
516                   ((add_bits + data_in_bits + data_out_bits +
517                     (search_data_in_bits + search_data_out_bits)) *
518                    g_tp.wire_outside_mat.pitch *
519                    2 * (1 - pow(0.5, h)))) / 2;
520        len_temp = (mat_width * ndwl / 2 +
521                    ((add_bits + data_in_bits + data_out_bits +
522                      (search_data_in_bits + search_data_out_bits)) *
523                     g_tp.wire_outside_mat.pitch *
524                     2 * (1 - pow(0.5, v)))) / 2;
525    } else {
526        if (ndwl == ndbl) {
527            ht_temp = ((mat_height * ndbl / 2) +
528                       ((add_bits + (search_data_in_bits +
529                                     search_data_out_bits)) *
530                        (ndbl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
531                       ((data_in_bits + data_out_bits) *
532                        g_tp.wire_outside_mat.pitch * h)
533                ) / 2;
534            len_temp = (mat_width * ndwl / 2 +
535                        ((add_bits + (search_data_in_bits +
536                                      search_data_out_bits)) * (ndwl / 2 - 1) *
537                         g_tp.wire_outside_mat.pitch) +
538                        ((data_in_bits + data_out_bits) *
539                         g_tp.wire_outside_mat.pitch * v)) / 2;
540
541        } else if (ndwl > ndbl) {
542            double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2));
543            ht_temp = ((mat_height * ndbl / 2) +
544                       ((add_bits +
545                         (search_data_in_bits + search_data_out_bits)) *
546                        ((ndbl / 2 - 1) + excess_part) *
547                        g_tp.wire_outside_mat.pitch) +
548                       (data_in_bits + data_out_bits) *
549                       g_tp.wire_outside_mat.pitch *
550                       (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2;
551            len_temp = (mat_width * ndwl / 2 +
552                        ((add_bits +
553                          (search_data_in_bits + search_data_out_bits)) *
554                         (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
555                        ((data_in_bits + data_out_bits) *
556                         g_tp.wire_outside_mat.pitch * v)) / 2;
557        } else {
558            double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2));
559            ht_temp = ((mat_height * ndbl / 2) +
560                       ((add_bits +
561                         (search_data_in_bits + search_data_out_bits)) *
562                        ((ndwl / 2 - 1) + excess_part) *
563                        g_tp.wire_outside_mat.pitch) +
564                       ((data_in_bits + data_out_bits) *
565                        g_tp.wire_outside_mat.pitch * h)
566                      ) / 2;
567            len_temp = (mat_width * ndwl / 2 +
568                        ((add_bits + (search_data_in_bits +
569                                      search_data_out_bits)) *
570                         ((ndwl / 2 - 1) + excess_part) *
571                         g_tp.wire_outside_mat.pitch) +
572                        (data_in_bits + data_out_bits) *
573                        g_tp.wire_outside_mat.pitch *
574                        (h + 2 * (1 - pow(0.5, v - h)))) / 2;
575        }
576    }
577    area.h = ht_temp * 2;
578    area.w = len_temp * 2;
579    delay = 0;
580    power.readOp.dynamic = 0;
581    power.readOp.leakage = 0;
582    power.readOp.gate_leakage = 0;
583    //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
584    len = len_temp;
585    ht = ht_temp / 2;
586
587    while (v > 0 || h > 0) { //finds delay/power of each link in the tree
588        if (wtemp1) delete wtemp1;
589        if (wtemp2) delete wtemp2;
590        if (wtemp3) delete wtemp3;
591
592        if (h > v) {
593            //the iteration considers only one horizontal link
594            wtemp1 = new Wire(wt, len); // hor
595            wtemp2 = new Wire(wt, len / 2);  // ver
596            len_temp = len;
597            len /= 2;
598            wtemp3 = 0;
599            h--;
600            option = 0;
601        } else if (v > 0 && h > 0) {
602            //considers one horizontal link and one vertical link
603            wtemp1 = new Wire(wt, len); // hor
604            wtemp2 = new Wire(wt, ht);  // ver
605            wtemp3 = new Wire(wt, len / 2);  // next hor
606            len_temp = len;
607            ht_temp = ht;
608            len /= 2;
609            ht /= 2;
610            v--;
611            h--;
612            option = 1;
613        } else {
614            // considers only one vertical link
615            assert(h == 0);
616            wtemp1 = new Wire(wt, ht); // hor
617            wtemp2 = new Wire(wt, ht / 2);  // ver
618            ht_temp = ht;
619            ht /= 2;
620            wtemp3 = 0;
621            v--;
622            option = 2;
623        }
624        delay += wtemp1->delay;
625        power.readOp.dynamic += wtemp1->power.readOp.dynamic;
626        power.searchOp.dynamic += wtemp1->power.readOp.dynamic * init_wire_bw;
627        power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw;
628        power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw;
629        if ((uca_tree == false && option == 2) || search_tree == true) {
630            wire_bw *= 2;
631        }
632
633        if (uca_tree == false) {
634            if (len_temp > wtemp1->repeater_spacing) {
635                s1 = wtemp1->repeater_size;
636                l_eff = wtemp1->repeater_spacing;
637            } else {
638                s1 = (len_temp / wtemp1->repeater_spacing) *
639                    wtemp1->repeater_size;
640                l_eff = len_temp;
641            }
642            if (ht_temp > wtemp2->repeater_spacing) {
643                s2 = wtemp2->repeater_size;
644            } else {
645                s2 = (len_temp / wtemp2->repeater_spacing) *
646                    wtemp2->repeater_size;
647            }
648            // first level
649            output_buffer(s1, s2, l_eff);
650        }
651
652
653        if (option != 1) {
654            continue;
655        }
656
657        // second level
658        delay += wtemp2->delay;
659        power.readOp.dynamic += wtemp2->power.readOp.dynamic;
660        power.searchOp.dynamic += wtemp2->power.readOp.dynamic * init_wire_bw;
661        power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw;
662        power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw;
663        //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
664        if (uca_tree) {
665            power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
666            power.readOp.gate_leakage +=
667                wtemp2->power.readOp.gate_leakage * wire_bw;
668        } else {
669            power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
670            power.readOp.gate_leakage +=
671                wtemp2->power.readOp.gate_leakage * wire_bw;
672            wire_bw *= 2;
673
674            if (ht_temp > wtemp3->repeater_spacing) {
675                s3 = wtemp3->repeater_size;
676                l_eff = wtemp3->repeater_spacing;
677            } else {
678                s3 = (len_temp / wtemp3->repeater_spacing) *
679                    wtemp3->repeater_size;
680                l_eff = ht_temp;
681            }
682
683            output_buffer(s2, s3, l_eff);
684        }
685        //cout<<"power.readOp.leakage"<<power.readOp.leakage<<endl;
686        //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
687        //cout<<"wtemp2->power.readOp.gate_leakage"<<wtemp2->power.readOp.gate_leakage<<endl;
688    }
689
690    if (wtemp1) delete wtemp1;
691    if (wtemp2) delete wtemp2;
692    if (wtemp3) delete wtemp3;
693}
694
695