Deleted Added
sdiff udiff text old ( 10152:52c552138ba1 ) new ( 10234:5cb711fa6176 )
full compact
1/*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
5 * All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the

--- 7 unchanged lines hidden (view full) ---

20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 ***************************************************************************/
31
32
33
34#include <cassert>
35#include <iostream>
36
37#include "htree2.h"
38#include "wire.h"
39
40Htree2::Htree2(
41 enum Wire_type wire_model, double mat_w, double mat_h,
42 int a_bits, int d_inbits, int search_data_in, int d_outbits, int search_data_out, int bl, int wl, enum Htree_type htree_type,
43 bool uca_tree_, bool search_tree_, TechnologyParameter::DeviceType *dt)
44 :in_rise_time(0), out_rise_time(0),
45 tree_type(htree_type), mat_width(mat_w), mat_height(mat_h),
46 add_bits(a_bits), data_in_bits(d_inbits), search_data_in_bits(search_data_in),data_out_bits(d_outbits),
47 search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl),
48 uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), deviceType(dt)
49{
50 assert(ndbl >= 2 && ndwl >= 2);
51
52// if (ndbl == 1 && ndwl == 1)
53// {
54// delay = 0;
55// power.readOp.dynamic = 0;
56// power.readOp.leakage = 0;
57// area.w = mat_w;
58// area.h = mat_h;
59// return;
60// }
61// if (ndwl == 1) ndwl++;
62// if (ndbl == 1) ndbl++;
63
64 max_unpipelined_link_delay = 0; //TODO
65 min_w_nmos = g_tp.min_w_nmos_;
66 min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
67
68 switch (htree_type)
69 {
70 case Add_htree:
71 wire_bw = init_wire_bw = add_bits;
72 in_htree();
73 break;
74 case Data_in_htree:
75 wire_bw = init_wire_bw = data_in_bits;
76 in_htree();
77 break;
78 case Data_out_htree:
79 wire_bw = init_wire_bw = data_out_bits;
80 out_htree();
81 break;
82 case Search_in_htree:
83 wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not.
84 in_htree();
85 break;
86 case Search_out_htree:
87 wire_bw = init_wire_bw = search_data_out_bits;
88 out_htree();
89 break;
90 default:
91 assert(0);
92 break;
93 }
94
95 power_bit = power;
96 power.readOp.dynamic *= init_wire_bw;
97
98 assert(power.readOp.dynamic >= 0);
99 assert(power.readOp.leakage >= 0);
100}
101
102
103
104// nand gate sizing calculation
105void Htree2::input_nand(double s1, double s2, double l_eff)
106{
107 Wire w1(wt, l_eff);
108 double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
109 // input capacitance of a repeater = input capacitance of nand.
110 double nsize = s1*(1 + pton_size)/(2 + pton_size);
111 nsize = (nsize < 1) ? 1 : nsize;
112
113 double tc = 2*tr_R_on(nsize*min_w_nmos, NCH, 1) *
114 (drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
115 2 * gate_C(s2*(min_w_nmos + min_w_pmos), 0));
116 delay+= horowitz (w1.out_rise_time, tc,
117 deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
118 power.readOp.dynamic += 0.5 *
119 (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
120 + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
121 + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) *
122 deviceType->Vdd * deviceType->Vdd;
123
124 power.searchOp.dynamic += 0.5 *
125 (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
126 + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
127 + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) *
128 deviceType->Vdd * deviceType->Vdd * wire_bw ;
129 power.readOp.leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
130 power.readOp.gate_leakage += (wire_bw*cmos_Ig_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
131}
132
133
134
135// tristate buffer model consisting of not, nand, nor, and driver transistors
136void Htree2::output_buffer(double s1, double s2, double l_eff)
137{
138 Wire w1(wt, l_eff);
139 double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
140 // input capacitance of repeater = input capacitance of nand + nor.
141 double size = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
142 double s_eff = //stage eff of a repeater in a wire
143 (gate_C(s2*(min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff*1e-6,true))/
144 gate_C(s2*(min_w_nmos + min_w_pmos), 0);
145 double tr_size = gate_C(s1*(min_w_nmos + min_w_pmos), 0) * 1/2/(s_eff*gate_C(min_w_pmos, 0));
146 size = (size < 1) ? 1 : size;
147
148 double res_nor = 2*tr_R_on(size*min_w_pmos, PCH, 1);
149 double res_ptrans = tr_R_on(tr_size*min_w_nmos, NCH, 1);
150 double cap_nand_out = drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
151 drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
152 gate_C(tr_size*min_w_pmos, 0);
153 double cap_ptrans_out = 2 *(drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
154 drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) +
155 gate_C(s1*(min_w_nmos + min_w_pmos), 0);
156
157 double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out;
158
159
160 delay += horowitz (w1.out_rise_time, tc,
161 deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
162
163 //nand
164 power.readOp.dynamic += 0.5 *
165 (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
166 drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
167 gate_C(tr_size*(min_w_pmos), 0)) *
168 deviceType->Vdd * deviceType->Vdd;
169
170 power.searchOp.dynamic += 0.5 *
171 (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
172 drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
173 gate_C(tr_size*(min_w_pmos), 0)) *
174 deviceType->Vdd * deviceType->Vdd*init_wire_bw;
175
176 //not
177 power.readOp.dynamic += 0.5 *
178 (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
179 +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
180 +gate_C(size*(min_w_nmos + min_w_pmos), 0)) *
181 deviceType->Vdd * deviceType->Vdd;
182
183 power.searchOp.dynamic += 0.5 *
184 (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
185 +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
186 +gate_C(size*(min_w_nmos + min_w_pmos), 0)) *
187 deviceType->Vdd * deviceType->Vdd*init_wire_bw;
188
189 //nor
190 power.readOp.dynamic += 0.5 *
191 (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
192 + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
193 +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) *
194 deviceType->Vdd * deviceType->Vdd;
195
196 power.searchOp.dynamic += 0.5 *
197 (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
198 + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
199 +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) *
200 deviceType->Vdd * deviceType->Vdd*init_wire_bw;
201
202 //output transistor
203 power.readOp.dynamic += 0.5 *
204 ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
205 +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2
206 + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) *
207 deviceType->Vdd * deviceType->Vdd;
208
209 power.searchOp.dynamic += 0.5 *
210 ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
211 +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2
212 + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) *
213 deviceType->Vdd * deviceType->Vdd*init_wire_bw;
214
215 if(uca_tree) {
216 power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
217 power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
218 power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
219
220 power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
221 power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
222 power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
223 //power.readOp.gate_leakage *=;
224 }
225 else {
226 power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
227 power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
228 power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
229
230 power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
231 power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
232 power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
233 //power.readOp.gate_leakage *=deviceType->Vdd*wire_bw;
234 }
235}
236
237
238
239/* calculates the input h-tree delay/power
240 * A nand gate is used at each node to
241 * limit the signal
242 * The area of an unbalanced htree (rows != columns)
243 * depends on how data is traversed.
244 * In the following function, if ( no. of rows < no. of columns),
245 * then data first traverse in excess hor. links until vertical
246 * and horizontal nodes are same.
247 * If no. of rows is bigger, then data traverse in
248 * a hor. link followed by a ver. link in a repeated
249 * fashion (similar to a balanced tree) until there are no
250 * hor. links left. After this it goes through the remaining vertical
251 * links.
252 */
253 void
254Htree2::in_htree()
255{
256 //temp var
257 double s1 = 0, s2 = 0, s3 = 0;
258 double l_eff = 0;
259 Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
260 double len = 0, ht = 0;
261 int option = 0;
262
263 int h = (int) _log2(ndwl/2); // horizontal nodes
264 int v = (int) _log2(ndbl/2); // vertical nodes
265 double len_temp;
266 double ht_temp;
267 if (uca_tree)
268 {//Sheng: this computation do not consider the wires that route from edge to middle.
269 ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
270 ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
271 2 * (1-pow(0.5,h))))/2;
272 len_temp = (mat_width*ndwl/2 +
273 ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
274 2 * (1-pow(0.5,v))))/2;
275 }
276 else
277 {
278 if (ndwl == ndbl) {
279 ht_temp = ((mat_height*ndbl/2) +
280 ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
281 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
282 )/2;
283 len_temp = (mat_width*ndwl/2 +
284 ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
285 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
286 }
287 else if (ndwl > ndbl) {
288 double excess_part = (_log2(ndwl/2) - _log2(ndbl/2));
289 ht_temp = ((mat_height*ndbl/2) +
290 ((add_bits + + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
291 (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
292 (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2;
293 len_temp = (mat_width*ndwl/2 +
294 ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
295 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
296 }
297 else {
298 double excess_part = (_log2(ndbl/2) - _log2(ndwl/2));
299 ht_temp = ((mat_height*ndbl/2) +
300 ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
301 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
302 )/2;
303 len_temp = (mat_width*ndwl/2 +
304 ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
305 (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
306 }
307 }
308
309 area.h = ht_temp * 2;
310 area.w = len_temp * 2;
311 delay = 0;
312 power.readOp.dynamic = 0;
313 power.readOp.leakage = 0;
314 power.searchOp.dynamic =0;
315 len = len_temp;
316 ht = ht_temp/2;
317
318 while (v > 0 || h > 0)
319 {
320 if (wtemp1) delete wtemp1;
321 if (wtemp2) delete wtemp2;
322 if (wtemp3) delete wtemp3;
323
324 if (h > v)
325 {
326 //the iteration considers only one horizontal link
327 wtemp1 = new Wire(wt, len); // hor
328 wtemp2 = new Wire(wt, len/2); // ver
329 len_temp = len;
330 len /= 2;
331 wtemp3 = 0;
332 h--;
333 option = 0;
334 }
335 else if (v>0 && h>0)
336 {
337 //considers one horizontal link and one vertical link
338 wtemp1 = new Wire(wt, len); // hor
339 wtemp2 = new Wire(wt, ht); // ver
340 wtemp3 = new Wire(wt, len/2); // next hor
341 len_temp = len;
342 ht_temp = ht;
343 len /= 2;
344 ht /= 2;
345 v--;
346 h--;
347 option = 1;
348 }
349 else
350 {
351 // considers only one vertical link
352 assert(h == 0);
353 wtemp1 = new Wire(wt, ht); // ver
354 wtemp2 = new Wire(wt, ht/2); // hor
355 ht_temp = ht;
356 ht /= 2;
357 wtemp3 = 0;
358 v--;
359 option = 2;
360 }
361
362 delay += wtemp1->delay;
363 power.readOp.dynamic += wtemp1->power.readOp.dynamic;
364 power.searchOp.dynamic += wtemp1->power.readOp.dynamic*wire_bw;
365 power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw;
366 power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw;
367 if ((uca_tree == false && option == 2) || search_tree==true)
368 {
369 wire_bw*=2; // wire bandwidth doubles only for vertical branches
370 }
371
372 if (uca_tree == false)
373 {
374 if (len_temp > wtemp1->repeater_spacing)
375 {
376 s1 = wtemp1->repeater_size;
377 l_eff = wtemp1->repeater_spacing;
378 }
379 else
380 {
381 s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
382 l_eff = len_temp;
383 }
384
385 if (ht_temp > wtemp2->repeater_spacing)
386 {
387 s2 = wtemp2->repeater_size;
388 }
389 else
390 {
391 s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
392 }
393 // first level
394 input_nand(s1, s2, l_eff);
395 }
396
397
398 if (option != 1)
399 {
400 continue;
401 }
402
403 // second level
404 delay += wtemp2->delay;
405 power.readOp.dynamic += wtemp2->power.readOp.dynamic;
406 power.searchOp.dynamic += wtemp2->power.readOp.dynamic*wire_bw;
407 power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw;
408 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
409
410 if (uca_tree)
411 {
412 power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
413 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
414 }
415 else
416 {
417 power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
418 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
419 wire_bw*=2;
420
421 if (ht_temp > wtemp3->repeater_spacing)
422 {
423 s3 = wtemp3->repeater_size;
424 l_eff = wtemp3->repeater_spacing;
425 }
426 else
427 {
428 s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
429 l_eff = ht_temp;
430 }
431
432 input_nand(s2, s3, l_eff);
433 }
434 }
435
436 if (wtemp1) delete wtemp1;
437 if (wtemp2) delete wtemp2;
438 if (wtemp3) delete wtemp3;
439}
440
441
442
443/* a tristate buffer is used to handle fan-ins
444 * The area of an unbalanced htree (rows != columns)
445 * depends on how data is traversed.
446 * In the following function, if ( no. of rows < no. of columns),
447 * then data first traverse in excess hor. links until vertical
448 * and horizontal nodes are same.
449 * If no. of rows is bigger, then data traverse in
450 * a hor. link followed by a ver. link in a repeated
451 * fashion (similar to a balanced tree) until there are no
452 * hor. links left. After this it goes through the remaining vertical
453 * links.
454 */
455void Htree2::out_htree()
456{
457 //temp var
458 double s1 = 0, s2 = 0, s3 = 0;
459 double l_eff = 0;
460 Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
461 double len = 0, ht = 0;
462 int option = 0;
463
464 int h = (int) _log2(ndwl/2);
465 int v = (int) _log2(ndbl/2);
466 double len_temp;
467 double ht_temp;
468 if (uca_tree)
469 {
470 ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
471 ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
472 2 * (1-pow(0.5,h))))/2;
473 len_temp = (mat_width*ndwl/2 +
474 ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
475 2 * (1-pow(0.5,v))))/2;
476 }
477 else
478 {
479 if (ndwl == ndbl) {
480 ht_temp = ((mat_height*ndbl/2) +
481 ((add_bits+ (search_data_in_bits + search_data_out_bits)) * (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
482 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
483 )/2;
484 len_temp = (mat_width*ndwl/2 +
485 ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
486 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
487
488 }
489 else if (ndwl > ndbl) {
490 double excess_part = (_log2(ndwl/2) - _log2(ndbl/2));
491 ht_temp = ((mat_height*ndbl/2) +
492 ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
493 (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
494 (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2;
495 len_temp = (mat_width*ndwl/2 +
496 ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
497 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
498 }
499 else {
500 double excess_part = (_log2(ndbl/2) - _log2(ndwl/2));
501 ht_temp = ((mat_height*ndbl/2) +
502 ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
503 ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
504 )/2;
505 len_temp = (mat_width*ndwl/2 +
506 ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
507 (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
508 }
509 }
510 area.h = ht_temp * 2;
511 area.w = len_temp * 2;
512 delay = 0;
513 power.readOp.dynamic = 0;
514 power.readOp.leakage = 0;
515 power.readOp.gate_leakage = 0;
516 //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
517 len = len_temp;
518 ht = ht_temp/2;
519
520 while (v > 0 || h > 0)
521 { //finds delay/power of each link in the tree
522 if (wtemp1) delete wtemp1;
523 if (wtemp2) delete wtemp2;
524 if (wtemp3) delete wtemp3;
525
526 if(h > v) {
527 //the iteration considers only one horizontal link
528 wtemp1 = new Wire(wt, len); // hor
529 wtemp2 = new Wire(wt, len/2); // ver
530 len_temp = len;
531 len /= 2;
532 wtemp3 = 0;
533 h--;
534 option = 0;
535 }
536 else if (v>0 && h>0) {
537 //considers one horizontal link and one vertical link
538 wtemp1 = new Wire(wt, len); // hor
539 wtemp2 = new Wire(wt, ht); // ver
540 wtemp3 = new Wire(wt, len/2); // next hor
541 len_temp = len;
542 ht_temp = ht;
543 len /= 2;
544 ht /= 2;
545 v--;
546 h--;
547 option = 1;
548 }
549 else {
550 // considers only one vertical link
551 assert(h == 0);
552 wtemp1 = new Wire(wt, ht); // hor
553 wtemp2 = new Wire(wt, ht/2); // ver
554 ht_temp = ht;
555 ht /= 2;
556 wtemp3 = 0;
557 v--;
558 option = 2;
559 }
560 delay += wtemp1->delay;
561 power.readOp.dynamic += wtemp1->power.readOp.dynamic;
562 power.searchOp.dynamic += wtemp1->power.readOp.dynamic*init_wire_bw;
563 power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw;
564 power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw;
565 //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
566 if ((uca_tree == false && option == 2) || search_tree==true)
567 {
568 wire_bw*=2;
569 }
570
571 if (uca_tree == false)
572 {
573 if (len_temp > wtemp1->repeater_spacing)
574 {
575 s1 = wtemp1->repeater_size;
576 l_eff = wtemp1->repeater_spacing;
577 }
578 else
579 {
580 s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
581 l_eff = len_temp;
582 }
583 if (ht_temp > wtemp2->repeater_spacing)
584 {
585 s2 = wtemp2->repeater_size;
586 }
587 else
588 {
589 s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
590 }
591 // first level
592 output_buffer(s1, s2, l_eff);
593 }
594
595
596 if (option != 1)
597 {
598 continue;
599 }
600
601 // second level
602 delay += wtemp2->delay;
603 power.readOp.dynamic += wtemp2->power.readOp.dynamic;
604 power.searchOp.dynamic += wtemp2->power.readOp.dynamic*init_wire_bw;
605 power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw;
606 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
607 //cout<<"power.readOp.gate_leakage"<608 if (uca_tree)
609 {
610 power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
611 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
612 }
613 else
614 {
615 power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
616 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
617 wire_bw*=2;
618
619 if (ht_temp > wtemp3->repeater_spacing)
620 {
621 s3 = wtemp3->repeater_size;
622 l_eff = wtemp3->repeater_spacing;
623 }
624 else
625 {
626 s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
627 l_eff = ht_temp;
628 }
629
630 output_buffer(s2, s3, l_eff);
631 }
632 //cout<<"power.readOp.leakage"<<power.readOp.leakage<<endl;
633 //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
634 //cout<<"wtemp2->power.readOp.gate_leakage"<<wtemp2->power.readOp.gate_leakage<<endl;
635 }
636
637 if (wtemp1) delete wtemp1;
638 if (wtemp2) delete wtemp2;
639 if (wtemp3) delete wtemp3;
640}
641