1/*****************************************************************************
2 *                                McPAT/CACTI
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
6 *                          All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 ***************************************************************************/
32
33
34
35#include <iomanip>
36#include <iostream>
37#include <string>
38
39#include "area.h"
40#include "parameter.h"
41
42using namespace std;
43
44
45InputParameter * g_ip;
46TechnologyParameter g_tp;
47
48
49
50void TechnologyParameter::DeviceType::display(uint32_t indent) {
51    string indent_str(indent, ' ');
52
53    cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl;
54    cout << indent_str << "C_fringe  = " << setw(12) << C_fringe  << " F/um" << endl;
55    cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl;
56    cout << indent_str << "C_junc    = " << setw(12) << C_junc    << " F/um^2" << endl;
57    cout << indent_str << "l_phy     = " << setw(12) << l_phy     << " um" << endl;
58    cout << indent_str << "l_elec    = " << setw(12) << l_elec    << " um" << endl;
59    cout << indent_str << "R_nch_on  = " << setw(12) << R_nch_on  << " ohm-um" << endl;
60    cout << indent_str << "R_pch_on  = " << setw(12) << R_pch_on  << " ohm-um" << endl;
61    cout << indent_str << "Vdd       = " << setw(12) << Vdd       << " V" << endl;
62    cout << indent_str << "Vth       = " << setw(12) << Vth       << " V" << endl;
63    cout << indent_str << "I_on_n    = " << setw(12) << I_on_n    << " A/um" << endl;
64    cout << indent_str << "I_on_p    = " << setw(12) << I_on_p    << " A/um" << endl;
65    cout << indent_str << "I_off_n   = " << setw(12) << I_off_n   << " A/um" << endl;
66    cout << indent_str << "I_off_p   = " << setw(12) << I_off_p   << " A/um" << endl;
67    cout << indent_str << "C_ox      = " << setw(12) << C_ox      << " F/um^2" << endl;
68    cout << indent_str << "t_ox      = " << setw(12) << t_ox      << " um" << endl;
69    cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl;
70}
71
72
73
74void TechnologyParameter::InterconnectType::display(uint32_t indent) {
75    string indent_str(indent, ' ');
76
77    cout << indent_str << "pitch    = " << setw(12) << pitch    << " um" << endl;
78    cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl;
79    cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl;
80}
81
82void TechnologyParameter::ScalingFactor::display(uint32_t indent) {
83    string indent_str(indent, ' ');
84
85    cout << indent_str << "logic_scaling_co_eff    = " << setw(12) << logic_scaling_co_eff << endl;
86    cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl;
87}
88
89void TechnologyParameter::MemoryType::display(uint32_t indent) {
90    string indent_str(indent, ' ');
91
92    cout << indent_str << "b_w         = " << setw(12) << b_w << " um" << endl;
93    cout << indent_str << "b_h         = " << setw(12) << b_h << " um" << endl;
94    cout << indent_str << "cell_a_w    = " << setw(12) << cell_a_w << " um" << endl;
95    cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl;
96    cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl;
97    cout << indent_str << "Vbitpre     = " << setw(12) << Vbitpre << " V" << endl;
98}
99
100
101
102void TechnologyParameter::display(uint32_t indent) {
103    string indent_str(indent, ' ');
104
105    cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl;
106    cout << indent_str << "min_w_nmos_                = " << setw(12) << min_w_nmos_                << " um" << endl;
107    cout << indent_str << "max_w_nmos_                = " << setw(12) << max_w_nmos_                << " um" << endl;
108    cout << indent_str << "unit_len_wire_del          = " << setw(12) << unit_len_wire_del          << " s/um^2" << endl;
109    cout << indent_str << "FO4                        = " << setw(12) << FO4                        << " s" << endl;
110    cout << indent_str << "kinv                       = " << setw(12) << kinv                       << " s" << endl;
111    cout << indent_str << "vpp                        = " << setw(12) << vpp                        << " V" << endl;
112    cout << indent_str << "w_sense_en                 = " << setw(12) << w_sense_en                 << " um" << endl;
113    cout << indent_str << "w_sense_n                  = " << setw(12) << w_sense_n                  << " um" << endl;
114    cout << indent_str << "w_sense_p                  = " << setw(12) << w_sense_p                  << " um" << endl;
115    cout << indent_str << "w_iso                      = " << setw(12) << w_iso                      << " um" << endl;
116    cout << indent_str << "w_poly_contact             = " << setw(12) << w_poly_contact             << " um" << endl;
117    cout << indent_str << "spacing_poly_to_poly       = " << setw(12) << spacing_poly_to_poly       << " um" << endl;
118    cout << indent_str << "spacing_poly_to_contact    = " << setw(12) << spacing_poly_to_contact    << " um" << endl;
119    cout << endl;
120    cout << indent_str << "w_comp_inv_p1              = " << setw(12) << w_comp_inv_p1 << " um" << endl;
121    cout << indent_str << "w_comp_inv_p2              = " << setw(12) << w_comp_inv_p2 << " um" << endl;
122    cout << indent_str << "w_comp_inv_p3              = " << setw(12) << w_comp_inv_p3 << " um" << endl;
123    cout << indent_str << "w_comp_inv_n1              = " << setw(12) << w_comp_inv_n1 << " um" << endl;
124    cout << indent_str << "w_comp_inv_n2              = " << setw(12) << w_comp_inv_n2 << " um" << endl;
125    cout << indent_str << "w_comp_inv_n3              = " << setw(12) << w_comp_inv_n3 << " um" << endl;
126    cout << indent_str << "w_eval_inv_p               = " << setw(12) << w_eval_inv_p  << " um" << endl;
127    cout << indent_str << "w_eval_inv_n               = " << setw(12) << w_eval_inv_n  << " um" << endl;
128    cout << indent_str << "w_comp_n                   = " << setw(12) << w_comp_n      << " um" << endl;
129    cout << indent_str << "w_comp_p                   = " << setw(12) << w_comp_p      << " um" << endl;
130    cout << endl;
131    cout << indent_str << "dram_cell_I_on             = " << setw(12) << dram_cell_I_on << " A/um" << endl;
132    cout << indent_str << "dram_cell_Vdd              = " << setw(12) << dram_cell_Vdd  << " V" << endl;
133    cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl;
134    cout << indent_str << "dram_cell_C                = " << setw(12) << dram_cell_C               << " F" << endl;
135    cout << indent_str << "gm_sense_amp_latch         = " << setw(12) << gm_sense_amp_latch        << " F/s" << endl;
136    cout << endl;
137    cout << indent_str << "w_nmos_b_mux               = " << setw(12) << w_nmos_b_mux              << " um" << endl;
138    cout << indent_str << "w_nmos_sa_mux              = " << setw(12) << w_nmos_sa_mux             << " um" << endl;
139    cout << indent_str << "w_pmos_bl_precharge        = " << setw(12) << w_pmos_bl_precharge       << " um" << endl;
140    cout << indent_str << "w_pmos_bl_eq               = " << setw(12) << w_pmos_bl_eq              << " um" << endl;
141    cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS  = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl;
142    cout << indent_str << "HPOWERRAIL                 = " << setw(12) << HPOWERRAIL                << " um" << endl;
143    cout << indent_str << "cell_h_def                 = " << setw(12) << cell_h_def                << " um" << endl;
144
145    cout << endl;
146    cout << indent_str << "SRAM cell transistor: " << endl;
147    sram_cell.display(indent + 2);
148
149    cout << endl;
150    cout << indent_str << "DRAM access transistor: " << endl;
151    dram_acc.display(indent + 2);
152
153    cout << endl;
154    cout << indent_str << "DRAM wordline transistor: " << endl;
155    dram_wl.display(indent + 2);
156
157    cout << endl;
158    cout << indent_str << "peripheral global transistor: " << endl;
159    peri_global.display(indent + 2);
160
161    cout << endl;
162    cout << indent_str << "wire local" << endl;
163    wire_local.display(indent + 2);
164
165    cout << endl;
166    cout << indent_str << "wire inside mat" << endl;
167    wire_inside_mat.display(indent + 2);
168
169    cout << endl;
170    cout << indent_str << "wire outside mat" << endl;
171    wire_outside_mat.display(indent + 2);
172
173    cout << endl;
174    cout << indent_str << "SRAM" << endl;
175    sram.display(indent + 2);
176
177    cout << endl;
178    cout << indent_str << "DRAM" << endl;
179    dram.display(indent + 2);
180}
181
182
183DynamicParameter::DynamicParameter():
184        use_inp_params(0), cell(), is_valid(true) {
185}
186
187
188
189DynamicParameter::DynamicParameter(
190    bool is_tag_,
191    int pure_ram_,
192    int pure_cam_,
193    double Nspd_,
194    unsigned int Ndwl_,
195    unsigned int Ndbl_,
196    unsigned int Ndcm_,
197    unsigned int Ndsam_lev_1_,
198    unsigned int Ndsam_lev_2_,
199    bool is_main_mem_):
200    is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0),
201    Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_), Ndcm(Ndcm_),
202    Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_),
203    number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0),
204    is_main_mem(is_main_mem_), cell(), is_valid(false) {
205    ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
206    is_dram            = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
207
208    unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS;  // capacity per stacked die layer
209    const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local;
210    fully_assoc = (g_ip->fully_assoc) ? true : false;
211
212    // fully-assocative cache -- ref: CACTi 2.0 report
213    if (fully_assoc || pure_cam) {
214        if (Ndwl != 1 ||            //Ndwl is fixed to 1 for FA
215                Ndcm != 1 ||            //Ndcm is fixed to 1 for FA
216                Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA
217                Ndsam_lev_1 != 1 ||     //Ndsam_lev_1 is fixed to one
218                Ndsam_lev_2 != 1 ||     //Ndsam_lev_2 is fixed to one
219                Ndbl < 2) {
220            return;
221        }
222    }
223
224    if ((is_dram) && (!is_tag) && (Ndcm > 1)) {
225        return;  // For a DRAM array, each bitline has its own sense-amp
226    }
227
228    // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be
229    // at least two because an array is assumed to have at least one mat. And a mat
230    // is formed out of two horizontal subarrays and two vertical subarrays
231    if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1)) {
232        return;
233    }
234
235    //***********compute row, col of an subarray
236    if (!(fully_assoc || pure_cam)) {
237        //Not fully_asso nor cam
238        // if data array, let tagbits = 0
239        if (is_tag) {
240            if (g_ip->specific_tag) {
241                tagbits = g_ip->tag_w;
242            } else {
243                tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) +
244                          _log2(g_ip->tag_assoc * 2 - 1) - _log2(g_ip->nbanks);
245
246            }
247            tagbits = (((tagbits + 3) >> 2) << 2);
248
249            num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
250                                       g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON);
251            num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON);
252            //burst_length = 1;
253        } else {
254            num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
255                                       g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON);
256            num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON);
257            // burst_length = g_ip->block_sz * 8 / g_ip->out_w;
258        }
259
260        if (num_r_subarray < MINSUBARRAYROWS) return;
261        if (num_r_subarray == 0) return;
262        if (num_r_subarray > MAXSUBARRAYROWS) return;
263        if (num_c_subarray < MINSUBARRAYCOLS) return;
264        if (num_c_subarray > MAXSUBARRAYCOLS) return;
265
266    }
267
268    else {//either fully-asso or cam
269        if (pure_cam) {
270            if (g_ip->specific_tag) {
271                tagbits = int(ceil(g_ip->tag_w / 8.0) * 8);
272            } else {
273                tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS) / 8.0) * 8);
274//			  cout<<"Pure CAM needs tag width to be specified"<<endl;
275//			  exit(0);
276            }
277            //tagbits = (((tagbits + 3) >> 2) << 2);
278
279            //TODO: error check input of tagbits and blocksize
280            //TODO: for pure CAM, g_ip->block should be number of entries.
281            tag_num_r_subarray = (int)ceil(capacity_per_die /
282                                           (g_ip->nbanks * tagbits / 8.0 * Ndbl));
283            //tag_num_c_subarray = (int)(tagbits  + EPSILON);
284            tag_num_c_subarray = tagbits;
285            if (tag_num_r_subarray == 0) return;
286            if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
287            if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
288            if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
289            num_r_subarray = tag_num_r_subarray;
290        } else { //fully associative
291            if (g_ip->specific_tag) {
292                tagbits = g_ip->tag_w;
293            } else {
294                tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem.
295            }
296            tagbits = (((tagbits + 3) >> 2) << 2);
297
298            tag_num_r_subarray = (int)(capacity_per_die /
299                                       (g_ip->nbanks * g_ip->block_sz * Ndbl));
300            tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON);
301            if (tag_num_r_subarray == 0) return;
302            if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
303            if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
304            if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
305
306            data_num_r_subarray = tag_num_r_subarray;
307            data_num_c_subarray = 8 * g_ip->block_sz;
308            if (data_num_r_subarray == 0) return;
309            if (data_num_r_subarray > MAXSUBARRAYROWS) return;
310            if (data_num_c_subarray < MINSUBARRAYCOLS) return;
311            if (data_num_c_subarray > MAXSUBARRAYCOLS) return;
312            num_r_subarray = tag_num_r_subarray;
313        }
314    }
315
316    num_subarrays = Ndwl * Ndbl;
317    //****************end of computation of row, col of an subarray
318
319    // calculate wire parameters
320    if (fully_assoc || pure_cam) {
321        cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch *
322            (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
323            + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) +
324            wire_local.pitch * g_ip->num_se_rd_ports;
325        cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch *
326            (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
327            + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) +
328            wire_local.pitch * g_ip->num_se_rd_ports;
329
330        cell.h = g_tp.sram.b_h + 2 * wire_local.pitch *
331            (g_ip->num_wr_ports + g_ip->num_rw_ports - 1 + g_ip->num_rd_ports)
332            + 2 * wire_local.pitch * (g_ip->num_search_ports - 1);
333        cell.w = g_tp.sram.b_w + 2 * wire_local.pitch *
334            (g_ip->num_rw_ports - 1 + (g_ip->num_rd_ports -
335                                       g_ip->num_se_rd_ports)
336             + g_ip->num_wr_ports) + g_tp.wire_local.pitch *
337            g_ip->num_se_rd_ports + 2 * wire_local.pitch *
338            (g_ip->num_search_ports - 1);
339    } else {
340        if (is_tag) {
341            cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports +
342                     g_ip->num_wr_ports);
343            cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports +
344                     (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) +
345                     wire_local.pitch * g_ip->num_se_rd_ports;
346        } else {
347            if (is_dram) {
348                cell.h = g_tp.dram.b_h;
349                cell.w = g_tp.dram.b_w;
350            } else {
351                cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +
352                         g_ip->num_rw_ports - 1 + g_ip->num_rd_ports);
353                cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 +
354                         (g_ip->num_rd_ports - g_ip->num_se_rd_ports) +
355                         g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports;
356            }
357        }
358    }
359
360    double c_b_metal = cell.h * wire_local.C_per_um;
361    double C_bl;
362
363    if (!(fully_assoc || pure_cam)) {
364        if (is_dram) {
365            deg_bl_muxing = 1;
366            if (ram_cell_tech_type == comm_dram) {
367                C_bl  = num_r_subarray * c_b_metal;
368                V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C /
369                    (g_tp.dram_cell_C + C_bl);
370                if (V_b_sense < VBITSENSEMIN) {
371                    return;
372                }
373                V_b_sense = VBITSENSEMIN;  // in any case, we fix sense amp input signal to a constant value
374                dram_refresh_period = 64e-3;
375            } else {
376                double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0;
377                C_bl  = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
378                V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C /
379                    (g_tp.dram_cell_C + C_bl);
380
381                if (V_b_sense < VBITSENSEMIN) {
382                    return; //Sense amp input signal is smaller that minimum allowable sense amp input signal
383                }
384                V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
385                //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C;
386                //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp;
387                dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp;
388            }
389        } else { //SRAM
390            V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
391            deg_bl_muxing = Ndcm;
392            // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
393            // contacts in a physical layout
394            double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0;
395            C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
396            dram_refresh_period = 0;
397        }
398    } else {
399        c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM
400        V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
401        deg_bl_muxing = 1;//FA fix as 1
402        // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
403        // contacts in a physical layout
404        double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines
405        C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
406        dram_refresh_period = 0;
407    }
408
409
410    // do/di: data in/out, for fully associative they are the data width for normal read and write
411    // so/si: search data in/out, for fully associative they are the data width for the search ops
412    // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write)
413    // so/si needs broadcase while do/di do not
414
415    if (fully_assoc || pure_cam) {
416        switch (Ndbl) {
417        case (0):
418            cout <<  "   Invalid Ndbl \n" << endl;
419            exit(0);
420            break;
421        case (1):
422            num_mats_h_dir = 1;//one subarray per mat
423            num_mats_v_dir = 1;
424            break;
425        case (2):
426            num_mats_h_dir = 1;//two subarrays per mat
427            num_mats_v_dir = 1;
428            break;
429        default:
430            num_mats_h_dir = int(floor(sqrt(Ndbl / 4.0)));//4 subbarrys per mat
431            num_mats_v_dir = int(Ndbl / 4.0 / num_mats_h_dir);
432        }
433        num_mats = num_mats_h_dir * num_mats_v_dir;
434
435        if (fully_assoc) {
436            num_so_b_mat   = data_num_c_subarray;
437            num_do_b_mat   = data_num_c_subarray + tagbits;
438        } else {
439            num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
440            num_do_b_mat = tagbits;
441        }
442    } else {
443        num_mats_h_dir = MAX(Ndwl / 2, 1);
444        num_mats_v_dir = MAX(Ndbl / 2, 1);
445        num_mats       = num_mats_h_dir * num_mats_v_dir;
446        num_do_b_mat = MAX((num_subarrays / num_mats) * num_c_subarray /
447                           (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1);
448    }
449
450    if (!(fully_assoc || pure_cam) && (num_do_b_mat <
451                                       (num_subarrays / num_mats))) {
452        return;
453    }
454
455
456    int deg_sa_mux_l1_non_assoc;
457    //TODO:the i/o for subbank is not necessary and should be removed.
458    if (!(fully_assoc || pure_cam)) {
459        if (!is_tag) {
460            if (is_main_mem == true) {
461                num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w;
462                deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
463            } else {
464                if (g_ip->fast_access == true) {
465                    num_do_b_subbank = g_ip->out_w * g_ip->data_assoc;
466                    deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
467                } else {
468
469                    num_do_b_subbank = g_ip->out_w;
470                    deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc;
471                    if (deg_sa_mux_l1_non_assoc < 1) {
472                        return;
473                    }
474
475                }
476            }
477        } else {
478            num_do_b_subbank = tagbits * g_ip->tag_assoc;
479            if (num_do_b_mat < tagbits) {
480                return;
481            }
482            deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
483            //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir;
484        }
485    } else {
486        if (fully_assoc) {
487            num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa
488            num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray;
489        } else {
490            num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
491            num_do_b_subbank = tag_num_c_subarray;
492        }
493
494        deg_sa_mux_l1_non_assoc = 1;
495    }
496
497    deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc;
498
499    if (fully_assoc || pure_cam) {
500        num_act_mats_hor_dir = 1;
501        num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used
502    } else {
503        num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat;
504        if (num_act_mats_hor_dir == 0) {
505            return;
506        }
507    }
508
509    //compute num_do_mat for tag
510    if (is_tag) {
511        if (!(fully_assoc || pure_cam)) {
512            num_do_b_mat     = g_ip->tag_assoc / num_act_mats_hor_dir;
513            num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat;
514        }
515    }
516
517    if ((g_ip->is_cache == false && is_main_mem == true) ||
518        (PAGE_MODE == 1 && is_dram)) {
519        if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 !=
520            (int)g_ip->page_sz_bits) {
521            return;
522        }
523    }
524
525//  if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays
526    if (is_tag == false && g_ip->is_main_mem == true &&
527        num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 <
528        ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc)) {
529        return;
530    }
531
532    if (num_act_mats_hor_dir > num_mats_h_dir) {
533        return;
534    }
535
536
537    //compute di for mat subbank and bank
538    if (!(fully_assoc || pure_cam)) {
539        if (!is_tag) {
540            if (g_ip->fast_access == true) {
541                num_di_b_mat = num_do_b_mat / g_ip->data_assoc;
542            } else {
543                num_di_b_mat = num_do_b_mat;
544            }
545        } else {
546            num_di_b_mat = tagbits;
547        }
548    } else {
549        if (fully_assoc) {
550            num_di_b_mat = num_do_b_mat;
551            //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache,
552            //but inside the mat wire tracks need to be reserved for search data bus
553            num_si_b_mat = tagbits;
554        } else {
555            num_di_b_mat = tagbits;
556            num_si_b_mat = tagbits;//*num_subarrays/num_mats;
557        }
558
559    }
560
561    num_di_b_subbank       = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA
562    num_si_b_subbank       = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast
563
564    int num_addr_b_row_dec     = _log2(num_r_subarray);
565    if  ((fully_assoc || pure_cam))
566        num_addr_b_row_dec     += _log2(num_subarrays / num_mats);
567    int number_subbanks        = num_mats / num_act_mats_hor_dir;
568    number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM
569
570    num_rw_ports = g_ip->num_rw_ports;
571    num_rd_ports = g_ip->num_rd_ports;
572    num_wr_ports = g_ip->num_wr_ports;
573    num_se_rd_ports = g_ip->num_se_rd_ports;
574    num_search_ports = g_ip->num_search_ports;
575
576    if (is_dram && is_main_mem) {
577        number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec,
578                                   _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2));
579    } else {
580        number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) +
581                               _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2);
582    }
583
584    if (!(fully_assoc || pure_cam)) {
585        if (is_tag) {
586            num_di_b_bank_per_port = tagbits;
587            num_do_b_bank_per_port = g_ip->data_assoc;
588        } else {
589            num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc;
590            num_do_b_bank_per_port = g_ip->out_w;
591        }
592    } else {
593        if (fully_assoc) {
594            num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz?
595            num_si_b_bank_per_port = tagbits;
596            num_do_b_bank_per_port = g_ip->out_w + tagbits;
597            num_so_b_bank_per_port = g_ip->out_w;
598        } else {
599            num_di_b_bank_per_port = tagbits;
600            num_si_b_bank_per_port = tagbits;
601            num_do_b_bank_per_port = tagbits;
602            num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));
603        }
604    }
605
606    if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) {
607        number_way_select_signals_mat = g_ip->data_assoc;
608    }
609
610    // add ECC adjustment to all data signals that traverse on H-trees.
611    if (g_ip->add_ecc_b_ == true) {
612        num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_));
613        num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_));
614        num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_));
615        num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_));
616        num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_));
617        num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_));
618
619        num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_));
620        num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_));
621        num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_));
622        num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_));
623        num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_));
624        num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_));
625    }
626
627    is_valid = true;
628}
629
630