parameter.cc revision 10152:52c552138ba1
1/*****************************************************************************
2 *                                McPAT/CACTI
3 *                      SOFTWARE LICENSE AGREEMENT
4 *            Copyright 2012 Hewlett-Packard Development Company, L.P.
5 *                          All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution;
14 * neither the name of the copyright holders nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 *
30 ***************************************************************************/
31
32
33
34#include <iomanip>
35#include <iostream>
36#include <string>
37
38#include "area.h"
39#include "parameter.h"
40
41using namespace std;
42
43
44InputParameter * g_ip;
45TechnologyParameter g_tp;
46
47
48
49void TechnologyParameter::DeviceType::display(uint32_t indent)
50{
51  string indent_str(indent, ' ');
52
53  cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl;
54  cout << indent_str << "C_fringe  = " << setw(12) << C_fringe  << " F/um" << endl;
55  cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl;
56  cout << indent_str << "C_junc    = " << setw(12) << C_junc    << " F/um^2" << endl;
57  cout << indent_str << "l_phy     = " << setw(12) << l_phy     << " um" << endl;
58  cout << indent_str << "l_elec    = " << setw(12) << l_elec    << " um" << endl;
59  cout << indent_str << "R_nch_on  = " << setw(12) << R_nch_on  << " ohm-um" << endl;
60  cout << indent_str << "R_pch_on  = " << setw(12) << R_pch_on  << " ohm-um" << endl;
61  cout << indent_str << "Vdd       = " << setw(12) << Vdd       << " V" << endl;
62  cout << indent_str << "Vth       = " << setw(12) << Vth       << " V" << endl;
63  cout << indent_str << "I_on_n    = " << setw(12) << I_on_n    << " A/um" << endl;
64  cout << indent_str << "I_on_p    = " << setw(12) << I_on_p    << " A/um" << endl;
65  cout << indent_str << "I_off_n   = " << setw(12) << I_off_n   << " A/um" << endl;
66  cout << indent_str << "I_off_p   = " << setw(12) << I_off_p   << " A/um" << endl;
67  cout << indent_str << "C_ox      = " << setw(12) << C_ox      << " F/um^2" << endl;
68  cout << indent_str << "t_ox      = " << setw(12) << t_ox      << " um" << endl;
69  cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl;
70}
71
72
73
74void TechnologyParameter::InterconnectType::display(uint32_t indent)
75{
76  string indent_str(indent, ' ');
77
78  cout << indent_str << "pitch    = " << setw(12) << pitch    << " um" << endl;
79  cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl;
80  cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl;
81}
82
83void TechnologyParameter::ScalingFactor::display(uint32_t indent)
84{
85  string indent_str(indent, ' ');
86
87  cout << indent_str << "logic_scaling_co_eff    = " << setw(12) << logic_scaling_co_eff << endl;
88  cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl;
89}
90
91void TechnologyParameter::MemoryType::display(uint32_t indent)
92{
93  string indent_str(indent, ' ');
94
95  cout << indent_str << "b_w         = " << setw(12) << b_w << " um" << endl;
96  cout << indent_str << "b_h         = " << setw(12) << b_h << " um" << endl;
97  cout << indent_str << "cell_a_w    = " << setw(12) << cell_a_w << " um" << endl;
98  cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl;
99  cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl;
100  cout << indent_str << "Vbitpre     = " << setw(12) << Vbitpre << " V" << endl;
101}
102
103
104
105void TechnologyParameter::display(uint32_t indent)
106{
107  string indent_str(indent, ' ');
108
109  cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl;
110  cout << indent_str << "min_w_nmos_                = " << setw(12) << min_w_nmos_                << " um" << endl;
111  cout << indent_str << "max_w_nmos_                = " << setw(12) << max_w_nmos_                << " um" << endl;
112  cout << indent_str << "unit_len_wire_del          = " << setw(12) << unit_len_wire_del          << " s/um^2" << endl;
113  cout << indent_str << "FO4                        = " << setw(12) << FO4                        << " s" << endl;
114  cout << indent_str << "kinv                       = " << setw(12) << kinv                       << " s" << endl;
115  cout << indent_str << "vpp                        = " << setw(12) << vpp                        << " V" << endl;
116  cout << indent_str << "w_sense_en                 = " << setw(12) << w_sense_en                 << " um" << endl;
117  cout << indent_str << "w_sense_n                  = " << setw(12) << w_sense_n                  << " um" << endl;
118  cout << indent_str << "w_sense_p                  = " << setw(12) << w_sense_p                  << " um" << endl;
119  cout << indent_str << "w_iso                      = " << setw(12) << w_iso                      << " um" << endl;
120  cout << indent_str << "w_poly_contact             = " << setw(12) << w_poly_contact             << " um" << endl;
121  cout << indent_str << "spacing_poly_to_poly       = " << setw(12) << spacing_poly_to_poly       << " um" << endl;
122  cout << indent_str << "spacing_poly_to_contact    = " << setw(12) << spacing_poly_to_contact    << " um" << endl;
123  cout << endl;
124  cout << indent_str << "w_comp_inv_p1              = " << setw(12) << w_comp_inv_p1 << " um" << endl;
125  cout << indent_str << "w_comp_inv_p2              = " << setw(12) << w_comp_inv_p2 << " um" << endl;
126  cout << indent_str << "w_comp_inv_p3              = " << setw(12) << w_comp_inv_p3 << " um" << endl;
127  cout << indent_str << "w_comp_inv_n1              = " << setw(12) << w_comp_inv_n1 << " um" << endl;
128  cout << indent_str << "w_comp_inv_n2              = " << setw(12) << w_comp_inv_n2 << " um" << endl;
129  cout << indent_str << "w_comp_inv_n3              = " << setw(12) << w_comp_inv_n3 << " um" << endl;
130  cout << indent_str << "w_eval_inv_p               = " << setw(12) << w_eval_inv_p  << " um" << endl;
131  cout << indent_str << "w_eval_inv_n               = " << setw(12) << w_eval_inv_n  << " um" << endl;
132  cout << indent_str << "w_comp_n                   = " << setw(12) << w_comp_n      << " um" << endl;
133  cout << indent_str << "w_comp_p                   = " << setw(12) << w_comp_p      << " um" << endl;
134  cout << endl;
135  cout << indent_str << "dram_cell_I_on             = " << setw(12) << dram_cell_I_on << " A/um" << endl;
136  cout << indent_str << "dram_cell_Vdd              = " << setw(12) << dram_cell_Vdd  << " V" << endl;
137  cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl;
138  cout << indent_str << "dram_cell_C                = " << setw(12) << dram_cell_C               << " F" << endl;
139  cout << indent_str << "gm_sense_amp_latch         = " << setw(12) << gm_sense_amp_latch        << " F/s" << endl;
140  cout << endl;
141  cout << indent_str << "w_nmos_b_mux               = " << setw(12) << w_nmos_b_mux              << " um" << endl;
142  cout << indent_str << "w_nmos_sa_mux              = " << setw(12) << w_nmos_sa_mux             << " um" << endl;
143  cout << indent_str << "w_pmos_bl_precharge        = " << setw(12) << w_pmos_bl_precharge       << " um" << endl;
144  cout << indent_str << "w_pmos_bl_eq               = " << setw(12) << w_pmos_bl_eq              << " um" << endl;
145  cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS  = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl;
146  cout << indent_str << "HPOWERRAIL                 = " << setw(12) << HPOWERRAIL                << " um" << endl;
147  cout << indent_str << "cell_h_def                 = " << setw(12) << cell_h_def                << " um" << endl;
148
149  cout << endl;
150  cout << indent_str << "SRAM cell transistor: " << endl;
151  sram_cell.display(indent + 2);
152
153  cout << endl;
154  cout << indent_str << "DRAM access transistor: " << endl;
155  dram_acc.display(indent + 2);
156
157  cout << endl;
158  cout << indent_str << "DRAM wordline transistor: " << endl;
159  dram_wl.display(indent + 2);
160
161  cout << endl;
162  cout << indent_str << "peripheral global transistor: " << endl;
163  peri_global.display(indent + 2);
164
165  cout << endl;
166  cout << indent_str << "wire local" << endl;
167  wire_local.display(indent + 2);
168
169  cout << endl;
170  cout << indent_str << "wire inside mat" << endl;
171  wire_inside_mat.display(indent + 2);
172
173  cout << endl;
174  cout << indent_str << "wire outside mat" << endl;
175  wire_outside_mat.display(indent + 2);
176
177  cout << endl;
178  cout << indent_str << "SRAM" << endl;
179  sram.display(indent + 2);
180
181  cout << endl;
182  cout << indent_str << "DRAM" << endl;
183  dram.display(indent + 2);
184}
185
186
187DynamicParameter::DynamicParameter():
188  use_inp_params(0), cell(), is_valid(true)
189{
190}
191
192
193
194DynamicParameter::DynamicParameter(
195    bool is_tag_,
196    int pure_ram_,
197    int pure_cam_,
198    double Nspd_,
199    unsigned int Ndwl_,
200    unsigned int Ndbl_,
201    unsigned int Ndcm_,
202    unsigned int Ndsam_lev_1_,
203    unsigned int Ndsam_lev_2_,
204    bool is_main_mem_):
205  is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_),Ndcm(Ndcm_),
206  Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_),
207  number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0),
208  is_main_mem(is_main_mem_), cell(), is_valid(false)
209{
210  ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
211  is_dram            = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
212
213  unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS;  // capacity per stacked die layer
214  const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local;
215  fully_assoc = (g_ip->fully_assoc) ? true : false;
216
217  if (fully_assoc || pure_cam)
218  { // fully-assocative cache -- ref: CACTi 2.0 report
219          if (Ndwl != 1 ||            //Ndwl is fixed to 1 for FA
220                          Ndcm != 1 ||            //Ndcm is fixed to 1 for FA
221                          Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA
222                          Ndsam_lev_1 != 1 ||     //Ndsam_lev_1 is fixed to one
223                          Ndsam_lev_2 != 1 ||     //Ndsam_lev_2 is fixed to one
224                          Ndbl < 2)
225          {
226          return;
227          }
228  }
229
230  if ((is_dram) && (!is_tag) && (Ndcm > 1))
231  {
232          return;  // For a DRAM array, each bitline has its own sense-amp
233  }
234
235  // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be
236  // at least two because an array is assumed to have at least one mat. And a mat
237  // is formed out of two horizontal subarrays and two vertical subarrays
238  if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1))
239  {
240          return;
241  }
242
243  //***********compute row, col of an subarray
244  if (!(fully_assoc || pure_cam))//Not fully_asso nor cam
245  {
246          // if data array, let tagbits = 0
247          if (is_tag)
248          {
249                  if (g_ip->specific_tag)
250                  {
251                          tagbits = g_ip->tag_w;
252                  }
253                  else
254                  {
255                          tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) +
256                          _log2(g_ip->tag_assoc*2 - 1) - _log2(g_ip->nbanks);
257
258                  }
259                  tagbits = (((tagbits + 3) >> 2) << 2);
260
261                  num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
262                                  g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON);
263                  num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON);
264                  //burst_length = 1;
265          }
266          else
267          {
268                  num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
269                                  g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON);
270                  num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON);
271                  // burst_length = g_ip->block_sz * 8 / g_ip->out_w;
272          }
273
274          if (num_r_subarray < MINSUBARRAYROWS) return;
275          if (num_r_subarray == 0) return;
276          if (num_r_subarray > MAXSUBARRAYROWS) return;
277          if (num_c_subarray < MINSUBARRAYCOLS) return;
278          if (num_c_subarray > MAXSUBARRAYCOLS) return;
279
280  }
281
282  else
283  {//either fully-asso or cam
284          if (pure_cam)
285          {
286                  if (g_ip->specific_tag)
287                  {
288                          tagbits = int(ceil(g_ip->tag_w/8.0)*8);
289                  }
290                  else
291                  {
292                          tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS)/8.0)*8);
293//			  cout<<"Pure CAM needs tag width to be specified"<<endl;
294//			  exit(0);
295                  }
296                  //tagbits = (((tagbits + 3) >> 2) << 2);
297
298                  tag_num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks*tagbits/8.0 * Ndbl));//TODO: error check input of tagbits and blocksize //TODO: for pure CAM, g_ip->block should be number of entries.
299                  //tag_num_c_subarray = (int)(tagbits  + EPSILON);
300                  tag_num_c_subarray = tagbits;
301                  if (tag_num_r_subarray == 0) return;
302                  if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
303                  if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
304                  if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
305                  num_r_subarray = tag_num_r_subarray;
306          }
307          else //fully associative
308          {
309                  if (g_ip->specific_tag)
310                  {
311                          tagbits = g_ip->tag_w;
312                  }
313                  else
314                  {
315                          tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem.
316                  }
317                  tagbits = (((tagbits + 3) >> 2) << 2);
318
319                  tag_num_r_subarray = (int)(capacity_per_die / (g_ip->nbanks*g_ip->block_sz * Ndbl));
320                  tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON);
321                  if (tag_num_r_subarray == 0) return;
322                  if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
323                  if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
324                  if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
325
326                  data_num_r_subarray = tag_num_r_subarray;
327                  data_num_c_subarray = 8 * g_ip->block_sz;
328                  if (data_num_r_subarray == 0) return;
329                  if (data_num_r_subarray > MAXSUBARRAYROWS) return;
330                  if (data_num_c_subarray < MINSUBARRAYCOLS) return;
331                  if (data_num_c_subarray > MAXSUBARRAYCOLS) return;
332                  num_r_subarray = tag_num_r_subarray;
333          }
334  }
335
336  num_subarrays = Ndwl * Ndbl;
337  //****************end of computation of row, col of an subarray
338
339  // calculate wire parameters
340  if (fully_assoc || pure_cam)
341  {
342          cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
343          + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports;
344          cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
345          + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports;
346
347          cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +g_ip->num_rw_ports-1 + g_ip->num_rd_ports)
348          + 2 * wire_local.pitch*(g_ip->num_search_ports-1);
349          cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports -1 + (g_ip->num_rd_ports - g_ip->num_se_rd_ports)
350                          + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + 2 * wire_local.pitch*(g_ip->num_search_ports-1);
351  }
352  else
353  {
354          if(is_tag)
355          {
356                  cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports +
357                                  g_ip->num_wr_ports);
358                  cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports +
359                                  (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) +
360                                  wire_local.pitch * g_ip->num_se_rd_ports;
361          }
362          else
363          {
364                  if (is_dram)
365                  {
366                          cell.h = g_tp.dram.b_h;
367                          cell.w = g_tp.dram.b_w;
368                  }
369                  else
370                  {
371                          cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +
372                                          g_ip->num_rw_ports - 1 + g_ip->num_rd_ports);
373                          cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 +
374                                          (g_ip->num_rd_ports - g_ip->num_se_rd_ports) +
375                                          g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports;
376                  }
377          }
378  }
379
380  double c_b_metal = cell.h * wire_local.C_per_um;
381  double C_bl;
382
383  if (!(fully_assoc || pure_cam))
384  {
385          if (is_dram)
386          {
387                  deg_bl_muxing = 1;
388                  if (ram_cell_tech_type == comm_dram)
389                  {
390                          C_bl  = num_r_subarray * c_b_metal;
391                          V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl);
392                          if (V_b_sense < VBITSENSEMIN)
393                          {
394                                  return;
395                          }
396                          V_b_sense = VBITSENSEMIN;  // in any case, we fix sense amp input signal to a constant value
397                          dram_refresh_period = 64e-3;
398                  }
399                  else
400                  {
401                          double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0;
402                          C_bl  = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
403                          V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl);
404
405                          if (V_b_sense < VBITSENSEMIN)
406                          {
407                                  return; //Sense amp input signal is smaller that minimum allowable sense amp input signal
408                          }
409                          V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
410                          //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C;
411                          //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp;
412                          dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp;
413                  }
414          }
415          else
416          { //SRAM
417                  V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
418                  deg_bl_muxing = Ndcm;
419                  // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
420                  // contacts in a physical layout
421                  double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0;
422                  C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
423                  dram_refresh_period = 0;
424          }
425  }
426  else
427  {
428          c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM
429          V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
430          deg_bl_muxing = 1;//FA fix as 1
431          // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
432          // contacts in a physical layout
433          double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines
434          C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
435          dram_refresh_period = 0;
436  }
437
438
439  // do/di: data in/out, for fully associative they are the data width for normal read and write
440  // so/si: search data in/out, for fully associative they are the data width for the search ops
441  // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write)
442  // so/si needs broadcase while do/di do not
443
444  if (fully_assoc || pure_cam)
445  {
446            switch (Ndbl) {
447              case (0):
448                cout <<  "   Invalid Ndbl \n"<<endl;
449                exit(0);
450                break;
451              case (1):
452                  num_mats_h_dir = 1;//one subarray per mat
453                  num_mats_v_dir = 1;
454                break;
455              case (2):
456                  num_mats_h_dir = 1;//two subarrays per mat
457                  num_mats_v_dir = 1;
458                  break;
459              default:
460                  num_mats_h_dir = int(floor(sqrt(Ndbl/4.0)));//4 subbarrys per mat
461                  num_mats_v_dir = int(Ndbl/4.0 / num_mats_h_dir);
462            }
463            num_mats = num_mats_h_dir * num_mats_v_dir;
464
465            if (fully_assoc)
466            {
467                num_so_b_mat   = data_num_c_subarray;
468                num_do_b_mat   = data_num_c_subarray + tagbits;
469            }
470            else
471            {
472                num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
473                num_do_b_mat = tagbits;
474            }
475  }
476  else
477  {
478          num_mats_h_dir = MAX(Ndwl / 2, 1);
479          num_mats_v_dir = MAX(Ndbl / 2, 1);
480          num_mats       = num_mats_h_dir * num_mats_v_dir;
481          num_do_b_mat   = MAX((num_subarrays/num_mats) * num_c_subarray / (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1);
482  }
483
484  if (!(fully_assoc|| pure_cam) && (num_do_b_mat < (num_subarrays/num_mats)))
485  {
486          return;
487  }
488
489
490  int deg_sa_mux_l1_non_assoc;
491  //TODO:the i/o for subbank is not necessary and should be removed.
492  if (!(fully_assoc || pure_cam))
493  {
494          if (!is_tag)
495          {
496                  if (is_main_mem == true)
497                  {
498                          num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w;
499                          deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
500                  }
501                  else
502                  {
503                          if (g_ip->fast_access == true)
504                          {
505                                  num_do_b_subbank = g_ip->out_w * g_ip->data_assoc;
506                                  deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
507                          }
508                          else
509                          {
510
511                                  num_do_b_subbank = g_ip->out_w;
512                                  deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc;
513                                  if (deg_sa_mux_l1_non_assoc < 1)
514                                  {
515                                          return;
516                                  }
517
518                          }
519                  }
520          }
521          else
522          {
523                  num_do_b_subbank = tagbits * g_ip->tag_assoc;
524                  if (num_do_b_mat < tagbits)
525                  {
526                          return;
527                  }
528                  deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
529                  //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir;
530          }
531  }
532  else
533  {
534          if (fully_assoc)
535          {
536                  num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa
537                  num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray;
538          }
539          else
540          {
541                  num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
542                  num_do_b_subbank = tag_num_c_subarray;
543          }
544
545          deg_sa_mux_l1_non_assoc = 1;
546  }
547
548  deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc;
549
550  if (fully_assoc || pure_cam)
551  {
552          num_act_mats_hor_dir = 1;
553          num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used
554  }
555  else
556  {
557          num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat;
558          if (num_act_mats_hor_dir == 0)
559          {
560                  return;
561          }
562  }
563
564  //compute num_do_mat for tag
565  if (is_tag)
566  {
567          if (!(fully_assoc || pure_cam))
568          {
569                  num_do_b_mat     = g_ip->tag_assoc / num_act_mats_hor_dir;
570                  num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat;
571          }
572  }
573
574  if ((g_ip->is_cache == false && is_main_mem == true) || (PAGE_MODE == 1 && is_dram))
575  {
576          if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != (int)g_ip->page_sz_bits)
577          {
578                  return;
579          }
580  }
581
582//  if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays
583  if (is_tag == false && g_ip->is_main_mem == true &&
584                  num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc))
585  {
586          return;
587  }
588
589  if (num_act_mats_hor_dir > num_mats_h_dir)
590  {
591          return;
592  }
593
594
595  //compute di for mat subbank and bank
596  if (!(fully_assoc ||pure_cam))
597  {
598          if(!is_tag)
599          {
600                  if(g_ip->fast_access == true)
601                  {
602                          num_di_b_mat = num_do_b_mat / g_ip->data_assoc;
603                  }
604                  else
605                  {
606                          num_di_b_mat = num_do_b_mat;
607                  }
608          }
609          else
610          {
611                  num_di_b_mat = tagbits;
612          }
613  }
614  else
615  {
616          if (fully_assoc)
617          {
618                  num_di_b_mat = num_do_b_mat;
619                  //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache,
620                  //but inside the mat wire tracks need to be reserved for search data bus
621                  num_si_b_mat = tagbits;
622          }
623          else
624          {
625                  num_di_b_mat = tagbits;
626                  num_si_b_mat = tagbits;//*num_subarrays/num_mats;
627          }
628
629  }
630
631  num_di_b_subbank       = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA
632  num_si_b_subbank       = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast
633
634  int num_addr_b_row_dec     = _log2(num_r_subarray);
635  if  ((fully_assoc ||pure_cam))
636          num_addr_b_row_dec     +=_log2(num_subarrays/num_mats);
637  int number_subbanks        = num_mats / num_act_mats_hor_dir;
638  number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM
639
640  num_rw_ports = g_ip->num_rw_ports;
641  num_rd_ports = g_ip->num_rd_ports;
642  num_wr_ports = g_ip->num_wr_ports;
643  num_se_rd_ports = g_ip->num_se_rd_ports;
644  num_search_ports = g_ip->num_search_ports;
645
646  if (is_dram && is_main_mem)
647  {
648          number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec,
649                          _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2));
650  }
651  else
652  {
653          number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) +
654          _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2);
655  }
656
657  if (!(fully_assoc ||pure_cam))
658  {
659          if (is_tag)
660          {
661                  num_di_b_bank_per_port = tagbits;
662                  num_do_b_bank_per_port = g_ip->data_assoc;
663          }
664          else
665          {
666                  num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc;
667                  num_do_b_bank_per_port = g_ip->out_w;
668          }
669  }
670  else
671  {
672          if (fully_assoc)
673          {
674                  num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz?
675                  num_si_b_bank_per_port = tagbits;
676                  num_do_b_bank_per_port = g_ip->out_w + tagbits;
677                  num_so_b_bank_per_port = g_ip->out_w;
678          }
679          else
680          {
681                  num_di_b_bank_per_port = tagbits;
682                  num_si_b_bank_per_port = tagbits;
683                  num_do_b_bank_per_port = tagbits;
684                  num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));
685          }
686  }
687
688  if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access))
689  {
690          number_way_select_signals_mat = g_ip->data_assoc;
691  }
692
693  // add ECC adjustment to all data signals that traverse on H-trees.
694  if (g_ip->add_ecc_b_ == true)
695  {
696          num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_));
697          num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_));
698          num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_));
699          num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_));
700          num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_));
701          num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_));
702
703          num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_));
704          num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_));
705          num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_));
706          num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_));
707          num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_));
708          num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_));
709  }
710
711  is_valid = true;
712}
713
714