Deleted Added
sdiff udiff text old ( 10152:52c552138ba1 ) new ( 10234:5cb711fa6176 )
full compact
1/*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
6 * All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the

--- 7 unchanged lines hidden (view full) ---

21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 ***************************************************************************/
32
33
34
35#include <cassert>
36#include <iostream>
37
38#include "htree2.h"
39#include "wire.h"
40
41Htree2::Htree2(
42 enum Wire_type wire_model, double mat_w, double mat_h,
43 int a_bits, int d_inbits, int search_data_in, int d_outbits,
44 int search_data_out, int bl, int wl, enum Htree_type htree_type,
45 bool uca_tree_, bool search_tree_, TechnologyParameter::DeviceType *dt)
46 : in_rise_time(0), out_rise_time(0),
47 tree_type(htree_type), mat_width(mat_w), mat_height(mat_h),
48 add_bits(a_bits), data_in_bits(d_inbits),
49 search_data_in_bits(search_data_in), data_out_bits(d_outbits),
50 search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl),
51 uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model),
52 deviceType(dt) {
53 assert(ndbl >= 2 && ndwl >= 2);
54
55// if (ndbl == 1 && ndwl == 1)
56// {
57// delay = 0;
58// power.readOp.dynamic = 0;
59// power.readOp.leakage = 0;
60// area.w = mat_w;
61// area.h = mat_h;
62// return;
63// }
64// if (ndwl == 1) ndwl++;
65// if (ndbl == 1) ndbl++;
66
67 max_unpipelined_link_delay = 0; //TODO
68 min_w_nmos = g_tp.min_w_nmos_;
69 min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
70
71 switch (htree_type) {
72 case Add_htree:
73 wire_bw = init_wire_bw = add_bits;
74 in_htree();
75 break;
76 case Data_in_htree:
77 wire_bw = init_wire_bw = data_in_bits;
78 in_htree();
79 break;
80 case Data_out_htree:
81 wire_bw = init_wire_bw = data_out_bits;
82 out_htree();
83 break;
84 case Search_in_htree:
85 wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not.
86 in_htree();
87 break;
88 case Search_out_htree:
89 wire_bw = init_wire_bw = search_data_out_bits;
90 out_htree();
91 break;
92 default:
93 assert(0);
94 break;
95 }
96
97 power_bit = power;
98 power.readOp.dynamic *= init_wire_bw;
99
100 assert(power.readOp.dynamic >= 0);
101 assert(power.readOp.leakage >= 0);
102}
103
104
105
106// nand gate sizing calculation
107void Htree2::input_nand(double s1, double s2, double l_eff) {
108 Wire w1(wt, l_eff);
109 double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
110 // input capacitance of a repeater = input capacitance of nand.
111 double nsize = s1 * (1 + pton_size) / (2 + pton_size);
112 nsize = (nsize < 1) ? 1 : nsize;
113
114 double tc = 2 * tr_R_on(nsize * min_w_nmos, NCH, 1) *
115 (drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) * 2 +
116 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0));
117 delay += horowitz(w1.out_rise_time, tc,
118 deviceType->Vth / deviceType->Vdd, deviceType->Vth /
119 deviceType->Vdd, RISE);
120 power.readOp.dynamic += 0.5 *
121 (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
122 + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
123 + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) *
124 deviceType->Vdd * deviceType->Vdd;
125
126 power.searchOp.dynamic += 0.5 *
127 (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
128 + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
129 + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) *
130 deviceType->Vdd * deviceType->Vdd * wire_bw ;
131 power.readOp.leakage += (wire_bw *
132 cmos_Isub_leakage(min_w_nmos * (nsize * 2),
133 min_w_pmos * nsize * 2, 2,
134 nand)) * deviceType->Vdd;
135 power.readOp.gate_leakage += (wire_bw *
136 cmos_Ig_leakage(min_w_nmos * (nsize * 2),
137 min_w_pmos * nsize * 2, 2,
138 nand)) * deviceType->Vdd;
139}
140
141
142
143// tristate buffer model consisting of not, nand, nor, and driver transistors
144void Htree2::output_buffer(double s1, double s2, double l_eff) {
145 Wire w1(wt, l_eff);
146 double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
147 // input capacitance of repeater = input capacitance of nand + nor.
148 double size = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size);
149 double s_eff = //stage eff of a repeater in a wire
150 (gate_C(s2 * (min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff * 1e-6,
151 true)) /
152 gate_C(s2 * (min_w_nmos + min_w_pmos), 0);
153 double tr_size = gate_C(s1 * (min_w_nmos + min_w_pmos), 0) * 1 / 2 /
154 (s_eff * gate_C(min_w_pmos, 0));
155 size = (size < 1) ? 1 : size;
156
157 double res_nor = 2 * tr_R_on(size * min_w_pmos, PCH, 1);
158 double res_ptrans = tr_R_on(tr_size * min_w_nmos, NCH, 1);
159 double cap_nand_out =
160 drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
161 drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 +
162 gate_C(tr_size * min_w_pmos, 0);
163 double cap_ptrans_out = 2 *
164 (drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
165 drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) +
166 gate_C(s1 * (min_w_nmos + min_w_pmos), 0);
167
168 double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out;
169
170
171 delay += horowitz(w1.out_rise_time, tc,
172 deviceType->Vth / deviceType->Vdd, deviceType->Vth /
173 deviceType->Vdd, RISE);
174
175 //nand
176 power.readOp.dynamic += 0.5 *
177 (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
178 drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
179 gate_C(tr_size * (min_w_pmos), 0)) *
180 deviceType->Vdd * deviceType->Vdd;
181
182 power.searchOp.dynamic += 0.5 *
183 (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
184 drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
185 gate_C(tr_size * (min_w_pmos), 0)) *
186 deviceType->Vdd * deviceType->Vdd * init_wire_bw;
187
188 //not
189 power.readOp.dynamic += 0.5 *
190 (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
191 + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
192 + gate_C(size * (min_w_nmos + min_w_pmos), 0)) *
193 deviceType->Vdd * deviceType->Vdd;
194
195 power.searchOp.dynamic += 0.5 *
196 (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
197 + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
198 + gate_C(size * (min_w_nmos + min_w_pmos), 0)) *
199 deviceType->Vdd * deviceType->Vdd * init_wire_bw;
200
201 //nor
202 power.readOp.dynamic += 0.5 *
203 (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
204 + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
205 + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) *
206 deviceType->Vdd * deviceType->Vdd;
207
208 power.searchOp.dynamic += 0.5 *
209 (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
210 + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
211 + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) *
212 deviceType->Vdd * deviceType->Vdd * init_wire_bw;
213
214 //output transistor
215 power.readOp.dynamic += 0.5 *
216 ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
217 + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * 2
218 + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) *
219 deviceType->Vdd * deviceType->Vdd;
220
221 power.searchOp.dynamic += 0.5 *
222 ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
223 + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * 2
224 + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) *
225 deviceType->Vdd * deviceType->Vdd * init_wire_bw;
226
227 if (uca_tree) {
228 power.readOp.leakage +=
229 cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size *
230 2, 1, inv) *
231 deviceType->Vdd * wire_bw;/*inverter + output tr*/
232 power.readOp.leakage +=
233 cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
234 nand) * deviceType->Vdd * wire_bw;//nand
235 power.readOp.leakage +=
236 cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
237 nor) * deviceType->Vdd * wire_bw;//nor
238
239 power.readOp.gate_leakage +=
240 cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2,
241 1, inv) *
242 deviceType->Vdd * wire_bw;/*inverter + output tr*/
243 power.readOp.gate_leakage +=
244 cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
245 nand) * deviceType->Vdd * wire_bw;//nand
246 power.readOp.gate_leakage +=
247 cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
248 nor) * deviceType->Vdd * wire_bw;//nor
249 } else {
250 power.readOp.leakage +=
251 cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size *
252 2, 1, inv) *
253 deviceType->Vdd * wire_bw;/*inverter + output tr*/
254 power.readOp.leakage +=
255 cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
256 nand) * deviceType->Vdd * wire_bw;//nand
257 power.readOp.leakage +=
258 cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
259 nor) * deviceType->Vdd * wire_bw;//nor
260
261 power.readOp.gate_leakage +=
262 cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2,
263 1, inv) *
264 deviceType->Vdd * wire_bw;/*inverter + output tr*/
265 power.readOp.gate_leakage +=
266 cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
267 nand) * deviceType->Vdd * wire_bw;//nand
268 power.readOp.gate_leakage +=
269 cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
270 nor) * deviceType->Vdd * wire_bw;//nor
271 }
272}
273
274
275
276/* calculates the input h-tree delay/power
277 * A nand gate is used at each node to
278 * limit the signal
279 * The area of an unbalanced htree (rows != columns)
280 * depends on how data is traversed.
281 * In the following function, if ( no. of rows < no. of columns),
282 * then data first traverse in excess hor. links until vertical
283 * and horizontal nodes are same.
284 * If no. of rows is bigger, then data traverse in
285 * a hor. link followed by a ver. link in a repeated
286 * fashion (similar to a balanced tree) until there are no
287 * hor. links left. After this it goes through the remaining vertical
288 * links.
289 */
290void
291Htree2::in_htree() {
292 //temp var
293 double s1 = 0, s2 = 0, s3 = 0;
294 double l_eff = 0;
295 Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
296 double len = 0, ht = 0;
297 int option = 0;
298
299 int h = (int) _log2(ndwl / 2); // horizontal nodes
300 int v = (int) _log2(ndbl / 2); // vertical nodes
301 double len_temp;
302 double ht_temp;
303 if (uca_tree) {
304 //Sheng: this computation do not consider the wires that route from
305 //edge to middle.
306 ht_temp = (mat_height * ndbl / 2 +
307 /* since uca_tree models interbank tree,
308 mat_height => bank height */
309 ((add_bits + data_in_bits + data_out_bits +
310 (search_data_in_bits + search_data_out_bits)) *
311 g_tp.wire_outside_mat.pitch *
312 2 * (1 - pow(0.5, h)))) / 2;
313 len_temp = (mat_width * ndwl / 2 +
314 ((add_bits + data_in_bits + data_out_bits +
315 (search_data_in_bits + search_data_out_bits)) *
316 g_tp.wire_outside_mat.pitch *
317 2 * (1 - pow(0.5, v)))) / 2;
318 } else {
319 if (ndwl == ndbl) {
320 ht_temp = ((mat_height * ndbl / 2) +
321 ((add_bits + (search_data_in_bits +
322 search_data_out_bits)) * (ndbl / 2 - 1) *
323 g_tp.wire_outside_mat.pitch) +
324 ((data_in_bits + data_out_bits) *
325 g_tp.wire_outside_mat.pitch * h)
326 ) / 2;
327 len_temp = (mat_width * ndwl / 2 +
328 ((add_bits + (search_data_in_bits +
329 search_data_out_bits)) * (ndwl / 2 - 1) *
330 g_tp.wire_outside_mat.pitch) +
331 ((data_in_bits + data_out_bits) *
332 g_tp.wire_outside_mat.pitch * v)) / 2;
333 } else if (ndwl > ndbl) {
334 double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2));
335 ht_temp = ((mat_height * ndbl / 2) +
336 ((add_bits + + (search_data_in_bits +
337 search_data_out_bits)) *
338 ((ndbl / 2 - 1) + excess_part) *
339 g_tp.wire_outside_mat.pitch) +
340 (data_in_bits + data_out_bits) *
341 g_tp.wire_outside_mat.pitch *
342 (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2;
343 len_temp = (mat_width * ndwl / 2 +
344 ((add_bits +
345 (search_data_in_bits + search_data_out_bits)) *
346 (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
347 ((data_in_bits + data_out_bits) *
348 g_tp.wire_outside_mat.pitch * v)) / 2;
349 } else {
350 double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2));
351 ht_temp = ((mat_height * ndbl / 2) +
352 ((add_bits +
353 (search_data_in_bits + search_data_out_bits)) *
354 ((ndwl / 2 - 1) + excess_part) *
355 g_tp.wire_outside_mat.pitch) +
356 ((data_in_bits + data_out_bits) *
357 g_tp.wire_outside_mat.pitch * h)
358 ) / 2;
359 len_temp = (mat_width * ndwl / 2 +
360 ((add_bits +
361 (search_data_in_bits + search_data_out_bits)) *
362 ((ndwl / 2 - 1) + excess_part) *
363 g_tp.wire_outside_mat.pitch) +
364 (data_in_bits + data_out_bits) *
365 g_tp.wire_outside_mat.pitch *
366 (h + 2 * (1 - pow(0.5, v - h)))) / 2;
367 }
368 }
369
370 area.h = ht_temp * 2;
371 area.w = len_temp * 2;
372 delay = 0;
373 power.readOp.dynamic = 0;
374 power.readOp.leakage = 0;
375 power.searchOp.dynamic = 0;
376 len = len_temp;
377 ht = ht_temp / 2;
378
379 while (v > 0 || h > 0) {
380 if (wtemp1) delete wtemp1;
381 if (wtemp2) delete wtemp2;
382 if (wtemp3) delete wtemp3;
383
384 if (h > v) {
385 //the iteration considers only one horizontal link
386 wtemp1 = new Wire(wt, len); // hor
387 wtemp2 = new Wire(wt, len / 2); // ver
388 len_temp = len;
389 len /= 2;
390 wtemp3 = 0;
391 h--;
392 option = 0;
393 } else if (v > 0 && h > 0) {
394 //considers one horizontal link and one vertical link
395 wtemp1 = new Wire(wt, len); // hor
396 wtemp2 = new Wire(wt, ht); // ver
397 wtemp3 = new Wire(wt, len / 2); // next hor
398 len_temp = len;
399 ht_temp = ht;
400 len /= 2;
401 ht /= 2;
402 v--;
403 h--;
404 option = 1;
405 } else {
406 // considers only one vertical link
407 assert(h == 0);
408 wtemp1 = new Wire(wt, ht); // ver
409 wtemp2 = new Wire(wt, ht / 2); // hor
410 ht_temp = ht;
411 ht /= 2;
412 wtemp3 = 0;
413 v--;
414 option = 2;
415 }
416
417 delay += wtemp1->delay;
418 power.readOp.dynamic += wtemp1->power.readOp.dynamic;
419 power.searchOp.dynamic += wtemp1->power.readOp.dynamic * wire_bw;
420 power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw;
421 power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw;
422 if ((uca_tree == false && option == 2) || search_tree == true) {
423 wire_bw *= 2; // wire bandwidth doubles only for vertical branches
424 }
425
426 if (uca_tree == false) {
427 if (len_temp > wtemp1->repeater_spacing) {
428 s1 = wtemp1->repeater_size;
429 l_eff = wtemp1->repeater_spacing;
430 } else {
431 s1 = (len_temp / wtemp1->repeater_spacing) *
432 wtemp1->repeater_size;
433 l_eff = len_temp;
434 }
435
436 if (ht_temp > wtemp2->repeater_spacing) {
437 s2 = wtemp2->repeater_size;
438 } else {
439 s2 = (len_temp / wtemp2->repeater_spacing) *
440 wtemp2->repeater_size;
441 }
442 // first level
443 input_nand(s1, s2, l_eff);
444 }
445
446
447 if (option != 1) {
448 continue;
449 }
450
451 // second level
452 delay += wtemp2->delay;
453 power.readOp.dynamic += wtemp2->power.readOp.dynamic;
454 power.searchOp.dynamic += wtemp2->power.readOp.dynamic * wire_bw;
455 power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw;
456 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw;
457
458 if (uca_tree) {
459 power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
460 power.readOp.gate_leakage +=
461 wtemp2->power.readOp.gate_leakage * wire_bw;
462 } else {
463 power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
464 power.readOp.gate_leakage +=
465 wtemp2->power.readOp.gate_leakage * wire_bw;
466 wire_bw *= 2;
467
468 if (ht_temp > wtemp3->repeater_spacing) {
469 s3 = wtemp3->repeater_size;
470 l_eff = wtemp3->repeater_spacing;
471 } else {
472 s3 = (len_temp / wtemp3->repeater_spacing) *
473 wtemp3->repeater_size;
474 l_eff = ht_temp;
475 }
476
477 input_nand(s2, s3, l_eff);
478 }
479 }
480
481 if (wtemp1) delete wtemp1;
482 if (wtemp2) delete wtemp2;
483 if (wtemp3) delete wtemp3;
484}
485
486
487
488/* a tristate buffer is used to handle fan-ins
489 * The area of an unbalanced htree (rows != columns)
490 * depends on how data is traversed.
491 * In the following function, if ( no. of rows < no. of columns),
492 * then data first traverse in excess hor. links until vertical
493 * and horizontal nodes are same.
494 * If no. of rows is bigger, then data traverse in
495 * a hor. link followed by a ver. link in a repeated
496 * fashion (similar to a balanced tree) until there are no
497 * hor. links left. After this it goes through the remaining vertical
498 * links.
499 */
500void Htree2::out_htree() {
501 //temp var
502 double s1 = 0, s2 = 0, s3 = 0;
503 double l_eff = 0;
504 Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
505 double len = 0, ht = 0;
506 int option = 0;
507
508 int h = (int) _log2(ndwl / 2);
509 int v = (int) _log2(ndbl / 2);
510 double len_temp;
511 double ht_temp;
512 if (uca_tree) {
513 ht_temp = (mat_height * ndbl / 2 +
514 /* since uca_tree models interbank tree,
515 mat_height => bank height */
516 ((add_bits + data_in_bits + data_out_bits +
517 (search_data_in_bits + search_data_out_bits)) *
518 g_tp.wire_outside_mat.pitch *
519 2 * (1 - pow(0.5, h)))) / 2;
520 len_temp = (mat_width * ndwl / 2 +
521 ((add_bits + data_in_bits + data_out_bits +
522 (search_data_in_bits + search_data_out_bits)) *
523 g_tp.wire_outside_mat.pitch *
524 2 * (1 - pow(0.5, v)))) / 2;
525 } else {
526 if (ndwl == ndbl) {
527 ht_temp = ((mat_height * ndbl / 2) +
528 ((add_bits + (search_data_in_bits +
529 search_data_out_bits)) *
530 (ndbl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
531 ((data_in_bits + data_out_bits) *
532 g_tp.wire_outside_mat.pitch * h)
533 ) / 2;
534 len_temp = (mat_width * ndwl / 2 +
535 ((add_bits + (search_data_in_bits +
536 search_data_out_bits)) * (ndwl / 2 - 1) *
537 g_tp.wire_outside_mat.pitch) +
538 ((data_in_bits + data_out_bits) *
539 g_tp.wire_outside_mat.pitch * v)) / 2;
540
541 } else if (ndwl > ndbl) {
542 double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2));
543 ht_temp = ((mat_height * ndbl / 2) +
544 ((add_bits +
545 (search_data_in_bits + search_data_out_bits)) *
546 ((ndbl / 2 - 1) + excess_part) *
547 g_tp.wire_outside_mat.pitch) +
548 (data_in_bits + data_out_bits) *
549 g_tp.wire_outside_mat.pitch *
550 (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2;
551 len_temp = (mat_width * ndwl / 2 +
552 ((add_bits +
553 (search_data_in_bits + search_data_out_bits)) *
554 (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
555 ((data_in_bits + data_out_bits) *
556 g_tp.wire_outside_mat.pitch * v)) / 2;
557 } else {
558 double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2));
559 ht_temp = ((mat_height * ndbl / 2) +
560 ((add_bits +
561 (search_data_in_bits + search_data_out_bits)) *
562 ((ndwl / 2 - 1) + excess_part) *
563 g_tp.wire_outside_mat.pitch) +
564 ((data_in_bits + data_out_bits) *
565 g_tp.wire_outside_mat.pitch * h)
566 ) / 2;
567 len_temp = (mat_width * ndwl / 2 +
568 ((add_bits + (search_data_in_bits +
569 search_data_out_bits)) *
570 ((ndwl / 2 - 1) + excess_part) *
571 g_tp.wire_outside_mat.pitch) +
572 (data_in_bits + data_out_bits) *
573 g_tp.wire_outside_mat.pitch *
574 (h + 2 * (1 - pow(0.5, v - h)))) / 2;
575 }
576 }
577 area.h = ht_temp * 2;
578 area.w = len_temp * 2;
579 delay = 0;
580 power.readOp.dynamic = 0;
581 power.readOp.leakage = 0;
582 power.readOp.gate_leakage = 0;
583 //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
584 len = len_temp;
585 ht = ht_temp / 2;
586
587 while (v > 0 || h > 0) { //finds delay/power of each link in the tree
588 if (wtemp1) delete wtemp1;
589 if (wtemp2) delete wtemp2;
590 if (wtemp3) delete wtemp3;
591
592 if (h > v) {
593 //the iteration considers only one horizontal link
594 wtemp1 = new Wire(wt, len); // hor
595 wtemp2 = new Wire(wt, len / 2); // ver
596 len_temp = len;
597 len /= 2;
598 wtemp3 = 0;
599 h--;
600 option = 0;
601 } else if (v > 0 && h > 0) {
602 //considers one horizontal link and one vertical link
603 wtemp1 = new Wire(wt, len); // hor
604 wtemp2 = new Wire(wt, ht); // ver
605 wtemp3 = new Wire(wt, len / 2); // next hor
606 len_temp = len;
607 ht_temp = ht;
608 len /= 2;
609 ht /= 2;
610 v--;
611 h--;
612 option = 1;
613 } else {
614 // considers only one vertical link
615 assert(h == 0);
616 wtemp1 = new Wire(wt, ht); // hor
617 wtemp2 = new Wire(wt, ht / 2); // ver
618 ht_temp = ht;
619 ht /= 2;
620 wtemp3 = 0;
621 v--;
622 option = 2;
623 }
624 delay += wtemp1->delay;
625 power.readOp.dynamic += wtemp1->power.readOp.dynamic;
626 power.searchOp.dynamic += wtemp1->power.readOp.dynamic * init_wire_bw;
627 power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw;
628 power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw;
629 if ((uca_tree == false && option == 2) || search_tree == true) {
630 wire_bw *= 2;
631 }
632
633 if (uca_tree == false) {
634 if (len_temp > wtemp1->repeater_spacing) {
635 s1 = wtemp1->repeater_size;
636 l_eff = wtemp1->repeater_spacing;
637 } else {
638 s1 = (len_temp / wtemp1->repeater_spacing) *
639 wtemp1->repeater_size;
640 l_eff = len_temp;
641 }
642 if (ht_temp > wtemp2->repeater_spacing) {
643 s2 = wtemp2->repeater_size;
644 } else {
645 s2 = (len_temp / wtemp2->repeater_spacing) *
646 wtemp2->repeater_size;
647 }
648 // first level
649 output_buffer(s1, s2, l_eff);
650 }
651
652
653 if (option != 1) {
654 continue;
655 }
656
657 // second level
658 delay += wtemp2->delay;
659 power.readOp.dynamic += wtemp2->power.readOp.dynamic;
660 power.searchOp.dynamic += wtemp2->power.readOp.dynamic * init_wire_bw;
661 power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw;
662 power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw;
663 //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
664 if (uca_tree) {
665 power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
666 power.readOp.gate_leakage +=
667 wtemp2->power.readOp.gate_leakage * wire_bw;
668 } else {
669 power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
670 power.readOp.gate_leakage +=
671 wtemp2->power.readOp.gate_leakage * wire_bw;
672 wire_bw *= 2;
673
674 if (ht_temp > wtemp3->repeater_spacing) {
675 s3 = wtemp3->repeater_size;
676 l_eff = wtemp3->repeater_spacing;
677 } else {
678 s3 = (len_temp / wtemp3->repeater_spacing) *
679 wtemp3->repeater_size;
680 l_eff = ht_temp;
681 }
682
683 output_buffer(s2, s3, l_eff);
684 }
685 //cout<<"power.readOp.leakage"<<power.readOp.leakage<<endl;
686 //cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
687 //cout<<"wtemp2->power.readOp.gate_leakage"<<wtemp2->power.readOp.gate_leakage<<endl;
688 }
689
690 if (wtemp1) delete wtemp1;
691 if (wtemp2) delete wtemp2;
692 if (wtemp3) delete wtemp3;
693}
694