htree2.cc revision 10152
1/*****************************************************************************
2 *                                McPAT/CACTI
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *                          All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution;
14 * neither the name of the copyright holders nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 *
30 ***************************************************************************/
31
32
33
34#include <cassert>
35#include <iostream>
36
37#include "htree2.h"
38#include "wire.h"
39
40Htree2::Htree2(
41    enum Wire_type wire_model, double mat_w, double mat_h,
42    int a_bits, int d_inbits, int search_data_in, int d_outbits, int search_data_out, int bl, int wl, enum Htree_type htree_type,
43    bool uca_tree_, bool search_tree_, TechnologyParameter::DeviceType *dt)
44 :in_rise_time(0), out_rise_time(0),
45  tree_type(htree_type), mat_width(mat_w), mat_height(mat_h),
46  add_bits(a_bits), data_in_bits(d_inbits), search_data_in_bits(search_data_in),data_out_bits(d_outbits),
47  search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl),
48  uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), deviceType(dt)
49{
50  assert(ndbl >= 2 && ndwl >= 2);
51
52//  if (ndbl == 1 && ndwl == 1)
53//  {
54//    delay = 0;
55//    power.readOp.dynamic = 0;
56//    power.readOp.leakage = 0;
57//    area.w = mat_w;
58//    area.h = mat_h;
59//    return;
60//  }
61//  if (ndwl == 1) ndwl++;
62//  if (ndbl == 1) ndbl++;
63
64  max_unpipelined_link_delay = 0; //TODO
65  min_w_nmos = g_tp.min_w_nmos_;
66  min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
67
68  switch (htree_type)
69  {
70    case Add_htree:
71      wire_bw = init_wire_bw = add_bits;
72      in_htree();
73      break;
74    case Data_in_htree:
75      wire_bw = init_wire_bw = data_in_bits;
76      in_htree();
77      break;
78    case Data_out_htree:
79      wire_bw = init_wire_bw = data_out_bits;
80      out_htree();
81      break;
82    case Search_in_htree:
83      wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not.
84      in_htree();
85      break;
86    case Search_out_htree:
87      wire_bw = init_wire_bw = search_data_out_bits;
88      out_htree();
89      break;
90    default:
91      assert(0);
92      break;
93  }
94
95  power_bit = power;
96  power.readOp.dynamic *= init_wire_bw;
97
98  assert(power.readOp.dynamic >= 0);
99  assert(power.readOp.leakage >= 0);
100}
101
102
103
104// nand gate sizing calculation
105void Htree2::input_nand(double s1, double s2, double l_eff)
106{
107  Wire w1(wt, l_eff);
108  double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
109  // input capacitance of a repeater  = input capacitance of nand.
110  double nsize = s1*(1 + pton_size)/(2 + pton_size);
111  nsize = (nsize < 1) ? 1 : nsize;
112
113  double tc = 2*tr_R_on(nsize*min_w_nmos, NCH, 1) *
114    (drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
115     2 * gate_C(s2*(min_w_nmos + min_w_pmos), 0));
116  delay+= horowitz (w1.out_rise_time, tc,
117      deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
118  power.readOp.dynamic += 0.5 *
119    (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
120     + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
121     + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) *
122    deviceType->Vdd * deviceType->Vdd;
123
124    power.searchOp.dynamic += 0.5 *
125    (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
126     + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
127     + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) *
128    deviceType->Vdd * deviceType->Vdd * wire_bw ;
129  power.readOp.leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
130  power.readOp.gate_leakage += (wire_bw*cmos_Ig_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
131}
132
133
134
135// tristate buffer model consisting of not, nand, nor, and driver transistors
136void Htree2::output_buffer(double s1, double s2, double l_eff)
137{
138  Wire w1(wt, l_eff);
139  double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
140  // input capacitance of repeater = input capacitance of nand + nor.
141  double size = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
142  double s_eff =  //stage eff of a repeater in a wire
143    (gate_C(s2*(min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff*1e-6,true))/
144    gate_C(s2*(min_w_nmos + min_w_pmos), 0);
145  double tr_size = gate_C(s1*(min_w_nmos + min_w_pmos), 0) * 1/2/(s_eff*gate_C(min_w_pmos, 0));
146  size = (size < 1) ? 1 : size;
147
148  double res_nor = 2*tr_R_on(size*min_w_pmos, PCH, 1);
149  double res_ptrans = tr_R_on(tr_size*min_w_nmos, NCH, 1);
150  double cap_nand_out = drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
151                        drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
152                        gate_C(tr_size*min_w_pmos, 0);
153  double cap_ptrans_out = 2 *(drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
154                              drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) +
155                          gate_C(s1*(min_w_nmos + min_w_pmos), 0);
156
157  double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out;
158
159
160  delay += horowitz (w1.out_rise_time, tc,
161      deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
162
163  //nand
164  power.readOp.dynamic += 0.5 *
165    (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
166       drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
167     gate_C(tr_size*(min_w_pmos), 0)) *
168    deviceType->Vdd * deviceType->Vdd;
169
170    power.searchOp.dynamic += 0.5 *
171    (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
172       drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
173     gate_C(tr_size*(min_w_pmos), 0)) *
174    deviceType->Vdd * deviceType->Vdd*init_wire_bw;
175
176  //not
177  power.readOp.dynamic += 0.5 *
178    (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
179     +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
180     +gate_C(size*(min_w_nmos + min_w_pmos), 0)) *
181    deviceType->Vdd * deviceType->Vdd;
182
183    power.searchOp.dynamic += 0.5 *
184    (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
185     +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
186     +gate_C(size*(min_w_nmos + min_w_pmos), 0)) *
187    deviceType->Vdd * deviceType->Vdd*init_wire_bw;
188
189  //nor
190  power.readOp.dynamic += 0.5 *
191    (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
192     + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
193     +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) *
194    deviceType->Vdd * deviceType->Vdd;
195
196    power.searchOp.dynamic += 0.5 *
197    (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
198     + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
199     +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) *
200    deviceType->Vdd * deviceType->Vdd*init_wire_bw;
201
202  //output transistor
203  power.readOp.dynamic += 0.5 *
204    ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
205      +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2
206     + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) *
207    deviceType->Vdd * deviceType->Vdd;
208
209    power.searchOp.dynamic += 0.5 *
210    ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
211      +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2
212     + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) *
213    deviceType->Vdd * deviceType->Vdd*init_wire_bw;
214
215  if(uca_tree) {
216        power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
217        power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
218        power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
219
220        power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
221    power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
222    power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
223    //power.readOp.gate_leakage *=;
224  }
225  else {
226        power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
227        power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
228        power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
229
230        power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
231    power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
232    power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
233    //power.readOp.gate_leakage *=deviceType->Vdd*wire_bw;
234  }
235}
236
237
238
239/* calculates the input h-tree delay/power
240 * A nand gate is used at each node to
241 * limit the signal
242 * The area of an unbalanced htree (rows != columns)
243 * depends on how data is traversed.
244 * In the following function, if ( no. of rows < no. of columns),
245 * then data first traverse in excess hor. links until vertical
246 * and horizontal nodes are same.
247 * If no. of rows is bigger, then data traverse in
248 * a hor. link followed by a ver. link in a repeated
249 * fashion (similar to a balanced tree) until there are no
250 * hor. links left. After this it goes through the remaining vertical
251 * links.
252 */
253  void
254Htree2::in_htree()
255{
256  //temp var
257  double s1 = 0, s2 = 0, s3 = 0;
258  double l_eff = 0;
259  Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
260  double len = 0, ht = 0;
261  int option = 0;
262
263  int h = (int) _log2(ndwl/2); // horizontal nodes
264  int v = (int) _log2(ndbl/2); // vertical nodes
265  double len_temp;
266  double ht_temp;
267  if (uca_tree)
268  {//Sheng: this computation do not consider the wires that route from edge to middle.
269    ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
270        ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
271         2 * (1-pow(0.5,h))))/2;
272    len_temp = (mat_width*ndwl/2 +
273        ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
274         2 * (1-pow(0.5,v))))/2;
275  }
276  else
277  {
278    if (ndwl == ndbl) {
279      ht_temp = ((mat_height*ndbl/2) +
280          ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
281          ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
282          )/2;
283      len_temp = (mat_width*ndwl/2 +
284        ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
285        ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
286    }
287    else if (ndwl > ndbl) {
288      double excess_part = (_log2(ndwl/2) - _log2(ndbl/2));
289      ht_temp = ((mat_height*ndbl/2) +
290          ((add_bits + + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
291          (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
292          (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2;
293      len_temp = (mat_width*ndwl/2 +
294        ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
295        ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
296    }
297    else {
298       double excess_part = (_log2(ndbl/2) - _log2(ndwl/2));
299      ht_temp = ((mat_height*ndbl/2) +
300          ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
301          ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
302          )/2;
303      len_temp = (mat_width*ndwl/2 +
304          ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
305          (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
306    }
307  }
308
309  area.h   = ht_temp * 2;
310  area.w   = len_temp * 2;
311  delay = 0;
312  power.readOp.dynamic = 0;
313  power.readOp.leakage = 0;
314  power.searchOp.dynamic =0;
315  len = len_temp;
316  ht  = ht_temp/2;
317
318  while (v > 0 || h > 0)
319  {
320    if (wtemp1) delete wtemp1;
321    if (wtemp2) delete wtemp2;
322    if (wtemp3) delete wtemp3;
323
324    if (h > v)
325    {
326      //the iteration considers only one horizontal link
327      wtemp1 = new Wire(wt, len); // hor
328      wtemp2 = new Wire(wt, len/2);  // ver
329      len_temp = len;
330      len /= 2;
331      wtemp3 = 0;
332      h--;
333      option = 0;
334    }
335    else if (v>0 && h>0)
336    {
337      //considers one horizontal link and one vertical link
338      wtemp1 = new Wire(wt, len); // hor
339      wtemp2 = new Wire(wt, ht);  // ver
340      wtemp3 = new Wire(wt, len/2);  // next hor
341      len_temp = len;
342      ht_temp = ht;
343      len /= 2;
344      ht  /= 2;
345      v--;
346      h--;
347      option = 1;
348    }
349    else
350    {
351      // considers only one vertical link
352      assert(h == 0);
353      wtemp1 = new Wire(wt, ht); // ver
354      wtemp2 = new Wire(wt, ht/2);  // hor
355      ht_temp = ht;
356      ht /= 2;
357      wtemp3 = 0;
358      v--;
359      option = 2;
360    }
361
362    delay += wtemp1->delay;
363    power.readOp.dynamic += wtemp1->power.readOp.dynamic;
364    power.searchOp.dynamic += wtemp1->power.readOp.dynamic*wire_bw;
365    power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw;
366    power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw;
367    if ((uca_tree == false && option == 2) || search_tree==true)
368    {
369      wire_bw*=2;  // wire bandwidth doubles only for vertical branches
370    }
371
372    if (uca_tree == false)
373    {
374      if (len_temp > wtemp1->repeater_spacing)
375      {
376        s1 = wtemp1->repeater_size;
377        l_eff = wtemp1->repeater_spacing;
378      }
379      else
380      {
381        s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
382        l_eff = len_temp;
383      }
384
385      if (ht_temp > wtemp2->repeater_spacing)
386      {
387        s2 = wtemp2->repeater_size;
388      }
389      else
390      {
391        s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
392      }
393      // first level
394      input_nand(s1, s2, l_eff);
395    }
396
397
398    if (option != 1)
399    {
400      continue;
401    }
402
403    // second level
404    delay += wtemp2->delay;
405    power.readOp.dynamic += wtemp2->power.readOp.dynamic;
406    power.searchOp.dynamic += wtemp2->power.readOp.dynamic*wire_bw;
407    power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw;
408    power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
409
410    if (uca_tree)
411    {
412      power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
413      power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
414    }
415    else
416    {
417      power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
418      power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
419      wire_bw*=2;
420
421      if (ht_temp > wtemp3->repeater_spacing)
422      {
423        s3    = wtemp3->repeater_size;
424        l_eff = wtemp3->repeater_spacing;
425      }
426      else
427      {
428        s3    = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
429        l_eff = ht_temp;
430      }
431
432      input_nand(s2, s3, l_eff);
433    }
434  }
435
436  if (wtemp1) delete wtemp1;
437  if (wtemp2) delete wtemp2;
438  if (wtemp3) delete wtemp3;
439}
440
441
442
443/* a tristate buffer is used to handle fan-ins
444 * The area of an unbalanced htree (rows != columns)
445 * depends on how data is traversed.
446 * In the following function, if ( no. of rows < no. of columns),
447 * then data first traverse in excess hor. links until vertical
448 * and horizontal nodes are same.
449 * If no. of rows is bigger, then data traverse in
450 * a hor. link followed by a ver. link in a repeated
451 * fashion (similar to a balanced tree) until there are no
452 * hor. links left. After this it goes through the remaining vertical
453 * links.
454 */
455void Htree2::out_htree()
456{
457  //temp var
458  double s1 = 0, s2 = 0, s3 = 0;
459  double l_eff = 0;
460  Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
461  double len = 0, ht = 0;
462  int option = 0;
463
464  int h = (int) _log2(ndwl/2);
465  int v = (int) _log2(ndbl/2);
466  double len_temp;
467  double ht_temp;
468  if (uca_tree)
469  {
470    ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
471        ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
472         2 * (1-pow(0.5,h))))/2;
473    len_temp = (mat_width*ndwl/2 +
474        ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
475         2 * (1-pow(0.5,v))))/2;
476  }
477  else
478    {
479    if (ndwl == ndbl) {
480      ht_temp = ((mat_height*ndbl/2) +
481          ((add_bits+ (search_data_in_bits + search_data_out_bits)) * (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
482          ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
483          )/2;
484      len_temp = (mat_width*ndwl/2 +
485        ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
486        ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
487
488    }
489    else if (ndwl > ndbl) {
490      double excess_part = (_log2(ndwl/2) - _log2(ndbl/2));
491      ht_temp = ((mat_height*ndbl/2) +
492          ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
493          (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
494          (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2;
495      len_temp = (mat_width*ndwl/2 +
496        ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
497        ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
498    }
499    else {
500      double excess_part = (_log2(ndbl/2) - _log2(ndwl/2));
501      ht_temp = ((mat_height*ndbl/2) +
502          ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
503          ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
504          )/2;
505      len_temp = (mat_width*ndwl/2 +
506          ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
507          (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
508    }
509  }
510  area.h = ht_temp * 2;
511  area.w = len_temp * 2;
512  delay = 0;
513  power.readOp.dynamic = 0;
514  power.readOp.leakage = 0;
515  power.readOp.gate_leakage = 0;
516  //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
517  len = len_temp;
518  ht = ht_temp/2;
519
520  while (v > 0 || h > 0)
521  { //finds delay/power of each link in the tree
522    if (wtemp1) delete wtemp1;
523    if (wtemp2) delete wtemp2;
524    if (wtemp3) delete wtemp3;
525
526    if(h > v) {
527      //the iteration considers only one horizontal link
528      wtemp1 = new Wire(wt, len); // hor
529      wtemp2 = new Wire(wt, len/2);  // ver
530      len_temp = len;
531      len /= 2;
532      wtemp3 = 0;
533      h--;
534      option = 0;
535    }
536    else if (v>0 && h>0) {
537      //considers one horizontal link and one vertical link
538      wtemp1 = new Wire(wt, len); // hor
539      wtemp2 = new Wire(wt, ht);  // ver
540      wtemp3 = new Wire(wt, len/2);  // next hor
541      len_temp = len;
542      ht_temp = ht;
543      len /= 2;
544      ht /= 2;
545      v--;
546      h--;
547      option = 1;
548    }
549    else {
550      // considers only one vertical link
551      assert(h == 0);
552      wtemp1 = new Wire(wt, ht); // hor
553      wtemp2 = new Wire(wt, ht/2);  // ver
554      ht_temp = ht;
555      ht /= 2;
556      wtemp3 = 0;
557      v--;
558      option = 2;
559    }
560    delay += wtemp1->delay;
561    power.readOp.dynamic += wtemp1->power.readOp.dynamic;
562    power.searchOp.dynamic += wtemp1->power.readOp.dynamic*init_wire_bw;
563    power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw;
564    power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw;
565    //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
566    if ((uca_tree == false && option == 2) || search_tree==true)
567    {
568      wire_bw*=2;
569    }
570
571    if (uca_tree == false)
572    {
573      if (len_temp > wtemp1->repeater_spacing)
574      {
575        s1 = wtemp1->repeater_size;
576        l_eff = wtemp1->repeater_spacing;
577      }
578      else
579      {
580        s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
581        l_eff = len_temp;
582      }
583      if (ht_temp > wtemp2->repeater_spacing)
584      {
585        s2 = wtemp2->repeater_size;
586      }
587      else
588      {
589        s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
590      }
591      // first level
592      output_buffer(s1, s2, l_eff);
593    }
594
595
596    if (option != 1)
597    {
598      continue;
599    }
600
601    // second level
602    delay += wtemp2->delay;
603    power.readOp.dynamic += wtemp2->power.readOp.dynamic;
604    power.searchOp.dynamic += wtemp2->power.readOp.dynamic*init_wire_bw;
605    power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw;
606    power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
607    //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
608    if (uca_tree)
609    {
610      power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
611      power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
612    }
613    else
614    {
615      power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
616      power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
617      wire_bw*=2;
618
619      if (ht_temp > wtemp3->repeater_spacing)
620      {
621        s3 = wtemp3->repeater_size;
622        l_eff = wtemp3->repeater_spacing;
623      }
624      else
625      {
626        s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
627        l_eff = ht_temp;
628      }
629
630      output_buffer(s2, s3, l_eff);
631    }
632    //cout<<"power.readOp.leakage"<<power.readOp.leakage<<endl;
633    //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
634    //cout<<"wtemp2->power.readOp.gate_leakage"<<wtemp2->power.readOp.gate_leakage<<endl;
635  }
636
637  if (wtemp1) delete wtemp1;
638  if (wtemp2) delete wtemp2;
639  if (wtemp3) delete wtemp3;
640}
641
642