parameter.cc (10152:52c552138ba1) parameter.cc (10234:5cb711fa6176)
1/*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
1/*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
5 * All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the

--- 7 unchanged lines hidden (view full) ---

20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
6 * All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the

--- 7 unchanged lines hidden (view full) ---

21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 ***************************************************************************/
31
32
33
34#include <iomanip>
35#include <iostream>
36#include <string>

--- 4 unchanged lines hidden (view full) ---

41using namespace std;
42
43
44InputParameter * g_ip;
45TechnologyParameter g_tp;
46
47
48
30 *
31 ***************************************************************************/
32
33
34
35#include <iomanip>
36#include <iostream>
37#include <string>

--- 4 unchanged lines hidden (view full) ---

42using namespace std;
43
44
45InputParameter * g_ip;
46TechnologyParameter g_tp;
47
48
49
49void TechnologyParameter::DeviceType::display(uint32_t indent)
50{
51 string indent_str(indent, ' ');
50void TechnologyParameter::DeviceType::display(uint32_t indent) {
51 string indent_str(indent, ' ');
52
52
53 cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl;
54 cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl;
55 cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl;
56 cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl;
57 cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl;
58 cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl;
59 cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl;
60 cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl;
61 cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl;
62 cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl;
63 cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl;
64 cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl;
65 cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl;
66 cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl;
67 cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl;
68 cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl;
69 cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl;
53 cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl;
54 cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl;
55 cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl;
56 cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl;
57 cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl;
58 cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl;
59 cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl;
60 cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl;
61 cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl;
62 cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl;
63 cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl;
64 cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl;
65 cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl;
66 cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl;
67 cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl;
68 cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl;
69 cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl;
70}
71
72
73
70}
71
72
73
74void TechnologyParameter::InterconnectType::display(uint32_t indent)
75{
76 string indent_str(indent, ' ');
74void TechnologyParameter::InterconnectType::display(uint32_t indent) {
75 string indent_str(indent, ' ');
77
76
78 cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl;
79 cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl;
80 cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl;
77 cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl;
78 cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl;
79 cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl;
81}
82
80}
81
83void TechnologyParameter::ScalingFactor::display(uint32_t indent)
84{
85 string indent_str(indent, ' ');
82void TechnologyParameter::ScalingFactor::display(uint32_t indent) {
83 string indent_str(indent, ' ');
86
84
87 cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl;
88 cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl;
85 cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl;
86 cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl;
89}
90
87}
88
91void TechnologyParameter::MemoryType::display(uint32_t indent)
92{
93 string indent_str(indent, ' ');
89void TechnologyParameter::MemoryType::display(uint32_t indent) {
90 string indent_str(indent, ' ');
94
91
95 cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl;
96 cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl;
97 cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl;
98 cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl;
99 cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl;
100 cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl;
92 cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl;
93 cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl;
94 cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl;
95 cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl;
96 cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl;
97 cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl;
101}
102
103
104
98}
99
100
101
105void TechnologyParameter::display(uint32_t indent)
106{
107 string indent_str(indent, ' ');
102void TechnologyParameter::display(uint32_t indent) {
103 string indent_str(indent, ' ');
108
104
109 cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl;
110 cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl;
111 cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl;
112 cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl;
113 cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl;
114 cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl;
115 cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl;
116 cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl;
117 cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl;
118 cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl;
119 cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl;
120 cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl;
121 cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl;
122 cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl;
123 cout << endl;
124 cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl;
125 cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl;
126 cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl;
127 cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl;
128 cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl;
129 cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl;
130 cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl;
131 cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl;
132 cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl;
133 cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl;
134 cout << endl;
135 cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl;
136 cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl;
137 cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl;
138 cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl;
139 cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl;
140 cout << endl;
141 cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl;
142 cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl;
143 cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl;
144 cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl;
145 cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl;
146 cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl;
147 cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl;
105 cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl;
106 cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl;
107 cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl;
108 cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl;
109 cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl;
110 cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl;
111 cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl;
112 cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl;
113 cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl;
114 cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl;
115 cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl;
116 cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl;
117 cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl;
118 cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl;
119 cout << endl;
120 cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl;
121 cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl;
122 cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl;
123 cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl;
124 cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl;
125 cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl;
126 cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl;
127 cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl;
128 cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl;
129 cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl;
130 cout << endl;
131 cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl;
132 cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl;
133 cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl;
134 cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl;
135 cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl;
136 cout << endl;
137 cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl;
138 cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl;
139 cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl;
140 cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl;
141 cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl;
142 cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl;
143 cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl;
148
144
149 cout << endl;
150 cout << indent_str << "SRAM cell transistor: " << endl;
151 sram_cell.display(indent + 2);
145 cout << endl;
146 cout << indent_str << "SRAM cell transistor: " << endl;
147 sram_cell.display(indent + 2);
152
148
153 cout << endl;
154 cout << indent_str << "DRAM access transistor: " << endl;
155 dram_acc.display(indent + 2);
149 cout << endl;
150 cout << indent_str << "DRAM access transistor: " << endl;
151 dram_acc.display(indent + 2);
156
152
157 cout << endl;
158 cout << indent_str << "DRAM wordline transistor: " << endl;
159 dram_wl.display(indent + 2);
153 cout << endl;
154 cout << indent_str << "DRAM wordline transistor: " << endl;
155 dram_wl.display(indent + 2);
160
156
161 cout << endl;
162 cout << indent_str << "peripheral global transistor: " << endl;
163 peri_global.display(indent + 2);
157 cout << endl;
158 cout << indent_str << "peripheral global transistor: " << endl;
159 peri_global.display(indent + 2);
164
160
165 cout << endl;
166 cout << indent_str << "wire local" << endl;
167 wire_local.display(indent + 2);
161 cout << endl;
162 cout << indent_str << "wire local" << endl;
163 wire_local.display(indent + 2);
168
164
169 cout << endl;
170 cout << indent_str << "wire inside mat" << endl;
171 wire_inside_mat.display(indent + 2);
165 cout << endl;
166 cout << indent_str << "wire inside mat" << endl;
167 wire_inside_mat.display(indent + 2);
172
168
173 cout << endl;
174 cout << indent_str << "wire outside mat" << endl;
175 wire_outside_mat.display(indent + 2);
169 cout << endl;
170 cout << indent_str << "wire outside mat" << endl;
171 wire_outside_mat.display(indent + 2);
176
172
177 cout << endl;
178 cout << indent_str << "SRAM" << endl;
179 sram.display(indent + 2);
173 cout << endl;
174 cout << indent_str << "SRAM" << endl;
175 sram.display(indent + 2);
180
176
181 cout << endl;
182 cout << indent_str << "DRAM" << endl;
183 dram.display(indent + 2);
177 cout << endl;
178 cout << indent_str << "DRAM" << endl;
179 dram.display(indent + 2);
184}
185
186
187DynamicParameter::DynamicParameter():
180}
181
182
183DynamicParameter::DynamicParameter():
188 use_inp_params(0), cell(), is_valid(true)
189{
184 use_inp_params(0), cell(), is_valid(true) {
190}
191
192
193
194DynamicParameter::DynamicParameter(
195 bool is_tag_,
196 int pure_ram_,
197 int pure_cam_,
198 double Nspd_,
199 unsigned int Ndwl_,
200 unsigned int Ndbl_,
201 unsigned int Ndcm_,
202 unsigned int Ndsam_lev_1_,
203 unsigned int Ndsam_lev_2_,
204 bool is_main_mem_):
185}
186
187
188
189DynamicParameter::DynamicParameter(
190 bool is_tag_,
191 int pure_ram_,
192 int pure_cam_,
193 double Nspd_,
194 unsigned int Ndwl_,
195 unsigned int Ndbl_,
196 unsigned int Ndcm_,
197 unsigned int Ndsam_lev_1_,
198 unsigned int Ndsam_lev_2_,
199 bool is_main_mem_):
205 is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_),Ndcm(Ndcm_),
206 Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_),
207 number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0),
208 is_main_mem(is_main_mem_), cell(), is_valid(false)
209{
210 ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
211 is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
200 is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0),
201 Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_), Ndcm(Ndcm_),
202 Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_),
203 number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0),
204 is_main_mem(is_main_mem_), cell(), is_valid(false) {
205 ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
206 is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
212
207
213 unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer
214 const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local;
215 fully_assoc = (g_ip->fully_assoc) ? true : false;
208 unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer
209 const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local;
210 fully_assoc = (g_ip->fully_assoc) ? true : false;
216
211
217 if (fully_assoc || pure_cam)
218 { // fully-assocative cache -- ref: CACTi 2.0 report
219 if (Ndwl != 1 || //Ndwl is fixed to 1 for FA
220 Ndcm != 1 || //Ndcm is fixed to 1 for FA
221 Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA
222 Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one
223 Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one
224 Ndbl < 2)
225 {
226 return;
227 }
228 }
212 // fully-assocative cache -- ref: CACTi 2.0 report
213 if (fully_assoc || pure_cam) {
214 if (Ndwl != 1 || //Ndwl is fixed to 1 for FA
215 Ndcm != 1 || //Ndcm is fixed to 1 for FA
216 Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA
217 Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one
218 Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one
219 Ndbl < 2) {
220 return;
221 }
222 }
229
223
230 if ((is_dram) && (!is_tag) && (Ndcm > 1))
231 {
232 return; // For a DRAM array, each bitline has its own sense-amp
233 }
224 if ((is_dram) && (!is_tag) && (Ndcm > 1)) {
225 return; // For a DRAM array, each bitline has its own sense-amp
226 }
234
227
235 // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be
236 // at least two because an array is assumed to have at least one mat. And a mat
237 // is formed out of two horizontal subarrays and two vertical subarrays
238 if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1))
239 {
240 return;
241 }
228 // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be
229 // at least two because an array is assumed to have at least one mat. And a mat
230 // is formed out of two horizontal subarrays and two vertical subarrays
231 if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1)) {
232 return;
233 }
242
234
243 //***********compute row, col of an subarray
244 if (!(fully_assoc || pure_cam))//Not fully_asso nor cam
245 {
246 // if data array, let tagbits = 0
247 if (is_tag)
248 {
249 if (g_ip->specific_tag)
250 {
251 tagbits = g_ip->tag_w;
252 }
253 else
254 {
255 tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) +
256 _log2(g_ip->tag_assoc*2 - 1) - _log2(g_ip->nbanks);
235 //***********compute row, col of an subarray
236 if (!(fully_assoc || pure_cam)) {
237 //Not fully_asso nor cam
238 // if data array, let tagbits = 0
239 if (is_tag) {
240 if (g_ip->specific_tag) {
241 tagbits = g_ip->tag_w;
242 } else {
243 tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) +
244 _log2(g_ip->tag_assoc * 2 - 1) - _log2(g_ip->nbanks);
257
245
258 }
259 tagbits = (((tagbits + 3) >> 2) << 2);
246 }
247 tagbits = (((tagbits + 3) >> 2) << 2);
260
248
261 num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
262 g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON);
263 num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON);
264 //burst_length = 1;
265 }
266 else
267 {
268 num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
269 g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON);
270 num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON);
271 // burst_length = g_ip->block_sz * 8 / g_ip->out_w;
272 }
249 num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
250 g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON);
251 num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON);
252 //burst_length = 1;
253 } else {
254 num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
255 g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON);
256 num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON);
257 // burst_length = g_ip->block_sz * 8 / g_ip->out_w;
258 }
273
259
274 if (num_r_subarray < MINSUBARRAYROWS) return;
275 if (num_r_subarray == 0) return;
276 if (num_r_subarray > MAXSUBARRAYROWS) return;
277 if (num_c_subarray < MINSUBARRAYCOLS) return;
278 if (num_c_subarray > MAXSUBARRAYCOLS) return;
260 if (num_r_subarray < MINSUBARRAYROWS) return;
261 if (num_r_subarray == 0) return;
262 if (num_r_subarray > MAXSUBARRAYROWS) return;
263 if (num_c_subarray < MINSUBARRAYCOLS) return;
264 if (num_c_subarray > MAXSUBARRAYCOLS) return;
279
265
280 }
266 }
281
267
282 else
283 {//either fully-asso or cam
284 if (pure_cam)
285 {
286 if (g_ip->specific_tag)
287 {
288 tagbits = int(ceil(g_ip->tag_w/8.0)*8);
289 }
290 else
291 {
292 tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS)/8.0)*8);
268 else {//either fully-asso or cam
269 if (pure_cam) {
270 if (g_ip->specific_tag) {
271 tagbits = int(ceil(g_ip->tag_w / 8.0) * 8);
272 } else {
273 tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS) / 8.0) * 8);
293// cout<<"Pure CAM needs tag width to be specified"<<endl;
294// exit(0);
274// cout<<"Pure CAM needs tag width to be specified"<<endl;
275// exit(0);
295 }
296 //tagbits = (((tagbits + 3) >> 2) << 2);
276 }
277 //tagbits = (((tagbits + 3) >> 2) << 2);
297
278
298 tag_num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks*tagbits/8.0 * Ndbl));//TODO: error check input of tagbits and blocksize //TODO: for pure CAM, g_ip->block should be number of entries.
299 //tag_num_c_subarray = (int)(tagbits + EPSILON);
300 tag_num_c_subarray = tagbits;
301 if (tag_num_r_subarray == 0) return;
302 if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
303 if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
304 if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
305 num_r_subarray = tag_num_r_subarray;
306 }
307 else //fully associative
308 {
309 if (g_ip->specific_tag)
310 {
311 tagbits = g_ip->tag_w;
312 }
313 else
314 {
315 tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem.
316 }
317 tagbits = (((tagbits + 3) >> 2) << 2);
279 //TODO: error check input of tagbits and blocksize
280 //TODO: for pure CAM, g_ip->block should be number of entries.
281 tag_num_r_subarray = (int)ceil(capacity_per_die /
282 (g_ip->nbanks * tagbits / 8.0 * Ndbl));
283 //tag_num_c_subarray = (int)(tagbits + EPSILON);
284 tag_num_c_subarray = tagbits;
285 if (tag_num_r_subarray == 0) return;
286 if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
287 if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
288 if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
289 num_r_subarray = tag_num_r_subarray;
290 } else { //fully associative
291 if (g_ip->specific_tag) {
292 tagbits = g_ip->tag_w;
293 } else {
294 tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem.
295 }
296 tagbits = (((tagbits + 3) >> 2) << 2);
318
297
319 tag_num_r_subarray = (int)(capacity_per_die / (g_ip->nbanks*g_ip->block_sz * Ndbl));
320 tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON);
321 if (tag_num_r_subarray == 0) return;
322 if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
323 if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
324 if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
298 tag_num_r_subarray = (int)(capacity_per_die /
299 (g_ip->nbanks * g_ip->block_sz * Ndbl));
300 tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON);
301 if (tag_num_r_subarray == 0) return;
302 if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
303 if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
304 if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
325
305
326 data_num_r_subarray = tag_num_r_subarray;
327 data_num_c_subarray = 8 * g_ip->block_sz;
328 if (data_num_r_subarray == 0) return;
329 if (data_num_r_subarray > MAXSUBARRAYROWS) return;
330 if (data_num_c_subarray < MINSUBARRAYCOLS) return;
331 if (data_num_c_subarray > MAXSUBARRAYCOLS) return;
332 num_r_subarray = tag_num_r_subarray;
333 }
334 }
306 data_num_r_subarray = tag_num_r_subarray;
307 data_num_c_subarray = 8 * g_ip->block_sz;
308 if (data_num_r_subarray == 0) return;
309 if (data_num_r_subarray > MAXSUBARRAYROWS) return;
310 if (data_num_c_subarray < MINSUBARRAYCOLS) return;
311 if (data_num_c_subarray > MAXSUBARRAYCOLS) return;
312 num_r_subarray = tag_num_r_subarray;
313 }
314 }
335
315
336 num_subarrays = Ndwl * Ndbl;
337 //****************end of computation of row, col of an subarray
316 num_subarrays = Ndwl * Ndbl;
317 //****************end of computation of row, col of an subarray
338
318
339 // calculate wire parameters
340 if (fully_assoc || pure_cam)
341 {
342 cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
343 + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports;
344 cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
345 + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports;
319 // calculate wire parameters
320 if (fully_assoc || pure_cam) {
321 cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch *
322 (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
323 + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) +
324 wire_local.pitch * g_ip->num_se_rd_ports;
325 cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch *
326 (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
327 + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) +
328 wire_local.pitch * g_ip->num_se_rd_ports;
346
329
347 cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +g_ip->num_rw_ports-1 + g_ip->num_rd_ports)
348 + 2 * wire_local.pitch*(g_ip->num_search_ports-1);
349 cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports -1 + (g_ip->num_rd_ports - g_ip->num_se_rd_ports)
350 + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + 2 * wire_local.pitch*(g_ip->num_search_ports-1);
351 }
352 else
353 {
354 if(is_tag)
355 {
356 cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports +
357 g_ip->num_wr_ports);
358 cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports +
359 (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) +
360 wire_local.pitch * g_ip->num_se_rd_ports;
361 }
362 else
363 {
364 if (is_dram)
365 {
366 cell.h = g_tp.dram.b_h;
367 cell.w = g_tp.dram.b_w;
368 }
369 else
370 {
371 cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +
372 g_ip->num_rw_ports - 1 + g_ip->num_rd_ports);
373 cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 +
374 (g_ip->num_rd_ports - g_ip->num_se_rd_ports) +
375 g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports;
376 }
377 }
378 }
330 cell.h = g_tp.sram.b_h + 2 * wire_local.pitch *
331 (g_ip->num_wr_ports + g_ip->num_rw_ports - 1 + g_ip->num_rd_ports)
332 + 2 * wire_local.pitch * (g_ip->num_search_ports - 1);
333 cell.w = g_tp.sram.b_w + 2 * wire_local.pitch *
334 (g_ip->num_rw_ports - 1 + (g_ip->num_rd_ports -
335 g_ip->num_se_rd_ports)
336 + g_ip->num_wr_ports) + g_tp.wire_local.pitch *
337 g_ip->num_se_rd_ports + 2 * wire_local.pitch *
338 (g_ip->num_search_ports - 1);
339 } else {
340 if (is_tag) {
341 cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports +
342 g_ip->num_wr_ports);
343 cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports +
344 (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) +
345 wire_local.pitch * g_ip->num_se_rd_ports;
346 } else {
347 if (is_dram) {
348 cell.h = g_tp.dram.b_h;
349 cell.w = g_tp.dram.b_w;
350 } else {
351 cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +
352 g_ip->num_rw_ports - 1 + g_ip->num_rd_ports);
353 cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 +
354 (g_ip->num_rd_ports - g_ip->num_se_rd_ports) +
355 g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports;
356 }
357 }
358 }
379
359
380 double c_b_metal = cell.h * wire_local.C_per_um;
381 double C_bl;
360 double c_b_metal = cell.h * wire_local.C_per_um;
361 double C_bl;
382
362
383 if (!(fully_assoc || pure_cam))
384 {
385 if (is_dram)
386 {
387 deg_bl_muxing = 1;
388 if (ram_cell_tech_type == comm_dram)
389 {
390 C_bl = num_r_subarray * c_b_metal;
391 V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl);
392 if (V_b_sense < VBITSENSEMIN)
393 {
394 return;
395 }
396 V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
397 dram_refresh_period = 64e-3;
398 }
399 else
400 {
401 double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0;
402 C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
403 V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl);
363 if (!(fully_assoc || pure_cam)) {
364 if (is_dram) {
365 deg_bl_muxing = 1;
366 if (ram_cell_tech_type == comm_dram) {
367 C_bl = num_r_subarray * c_b_metal;
368 V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C /
369 (g_tp.dram_cell_C + C_bl);
370 if (V_b_sense < VBITSENSEMIN) {
371 return;
372 }
373 V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
374 dram_refresh_period = 64e-3;
375 } else {
376 double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0;
377 C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
378 V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C /
379 (g_tp.dram_cell_C + C_bl);
404
380
405 if (V_b_sense < VBITSENSEMIN)
406 {
407 return; //Sense amp input signal is smaller that minimum allowable sense amp input signal
408 }
409 V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
410 //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C;
411 //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp;
412 dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp;
413 }
414 }
415 else
416 { //SRAM
417 V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
418 deg_bl_muxing = Ndcm;
419 // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
420 // contacts in a physical layout
421 double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0;
422 C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
423 dram_refresh_period = 0;
424 }
425 }
426 else
427 {
428 c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM
429 V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
430 deg_bl_muxing = 1;//FA fix as 1
431 // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
432 // contacts in a physical layout
433 double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines
434 C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
435 dram_refresh_period = 0;
436 }
381 if (V_b_sense < VBITSENSEMIN) {
382 return; //Sense amp input signal is smaller that minimum allowable sense amp input signal
383 }
384 V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
385 //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C;
386 //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp;
387 dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp;
388 }
389 } else { //SRAM
390 V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
391 deg_bl_muxing = Ndcm;
392 // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
393 // contacts in a physical layout
394 double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0;
395 C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
396 dram_refresh_period = 0;
397 }
398 } else {
399 c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM
400 V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
401 deg_bl_muxing = 1;//FA fix as 1
402 // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
403 // contacts in a physical layout
404 double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines
405 C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
406 dram_refresh_period = 0;
407 }
437
438
408
409
439 // do/di: data in/out, for fully associative they are the data width for normal read and write
440 // so/si: search data in/out, for fully associative they are the data width for the search ops
441 // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write)
442 // so/si needs broadcase while do/di do not
410 // do/di: data in/out, for fully associative they are the data width for normal read and write
411 // so/si: search data in/out, for fully associative they are the data width for the search ops
412 // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write)
413 // so/si needs broadcase while do/di do not
443
414
444 if (fully_assoc || pure_cam)
445 {
446 switch (Ndbl) {
447 case (0):
448 cout << " Invalid Ndbl \n"<<endl;
449 exit(0);
450 break;
451 case (1):
452 num_mats_h_dir = 1;//one subarray per mat
453 num_mats_v_dir = 1;
454 break;
455 case (2):
456 num_mats_h_dir = 1;//two subarrays per mat
457 num_mats_v_dir = 1;
458 break;
459 default:
460 num_mats_h_dir = int(floor(sqrt(Ndbl/4.0)));//4 subbarrys per mat
461 num_mats_v_dir = int(Ndbl/4.0 / num_mats_h_dir);
462 }
463 num_mats = num_mats_h_dir * num_mats_v_dir;
415 if (fully_assoc || pure_cam) {
416 switch (Ndbl) {
417 case (0):
418 cout << " Invalid Ndbl \n" << endl;
419 exit(0);
420 break;
421 case (1):
422 num_mats_h_dir = 1;//one subarray per mat
423 num_mats_v_dir = 1;
424 break;
425 case (2):
426 num_mats_h_dir = 1;//two subarrays per mat
427 num_mats_v_dir = 1;
428 break;
429 default:
430 num_mats_h_dir = int(floor(sqrt(Ndbl / 4.0)));//4 subbarrys per mat
431 num_mats_v_dir = int(Ndbl / 4.0 / num_mats_h_dir);
432 }
433 num_mats = num_mats_h_dir * num_mats_v_dir;
464
434
465 if (fully_assoc)
466 {
467 num_so_b_mat = data_num_c_subarray;
468 num_do_b_mat = data_num_c_subarray + tagbits;
469 }
470 else
471 {
472 num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
473 num_do_b_mat = tagbits;
474 }
475 }
476 else
477 {
478 num_mats_h_dir = MAX(Ndwl / 2, 1);
479 num_mats_v_dir = MAX(Ndbl / 2, 1);
480 num_mats = num_mats_h_dir * num_mats_v_dir;
481 num_do_b_mat = MAX((num_subarrays/num_mats) * num_c_subarray / (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1);
482 }
435 if (fully_assoc) {
436 num_so_b_mat = data_num_c_subarray;
437 num_do_b_mat = data_num_c_subarray + tagbits;
438 } else {
439 num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
440 num_do_b_mat = tagbits;
441 }
442 } else {
443 num_mats_h_dir = MAX(Ndwl / 2, 1);
444 num_mats_v_dir = MAX(Ndbl / 2, 1);
445 num_mats = num_mats_h_dir * num_mats_v_dir;
446 num_do_b_mat = MAX((num_subarrays / num_mats) * num_c_subarray /
447 (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1);
448 }
483
449
484 if (!(fully_assoc|| pure_cam) && (num_do_b_mat < (num_subarrays/num_mats)))
485 {
486 return;
487 }
450 if (!(fully_assoc || pure_cam) && (num_do_b_mat <
451 (num_subarrays / num_mats))) {
452 return;
453 }
488
489
454
455
490 int deg_sa_mux_l1_non_assoc;
491 //TODO:the i/o for subbank is not necessary and should be removed.
492 if (!(fully_assoc || pure_cam))
493 {
494 if (!is_tag)
495 {
496 if (is_main_mem == true)
497 {
498 num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w;
499 deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
500 }
501 else
502 {
503 if (g_ip->fast_access == true)
504 {
505 num_do_b_subbank = g_ip->out_w * g_ip->data_assoc;
506 deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
507 }
508 else
509 {
456 int deg_sa_mux_l1_non_assoc;
457 //TODO:the i/o for subbank is not necessary and should be removed.
458 if (!(fully_assoc || pure_cam)) {
459 if (!is_tag) {
460 if (is_main_mem == true) {
461 num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w;
462 deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
463 } else {
464 if (g_ip->fast_access == true) {
465 num_do_b_subbank = g_ip->out_w * g_ip->data_assoc;
466 deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
467 } else {
510
468
511 num_do_b_subbank = g_ip->out_w;
512 deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc;
513 if (deg_sa_mux_l1_non_assoc < 1)
514 {
515 return;
516 }
469 num_do_b_subbank = g_ip->out_w;
470 deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc;
471 if (deg_sa_mux_l1_non_assoc < 1) {
472 return;
473 }
517
474
518 }
519 }
520 }
521 else
522 {
523 num_do_b_subbank = tagbits * g_ip->tag_assoc;
524 if (num_do_b_mat < tagbits)
525 {
526 return;
527 }
528 deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
529 //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir;
530 }
531 }
532 else
533 {
534 if (fully_assoc)
535 {
536 num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa
537 num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray;
538 }
539 else
540 {
541 num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
542 num_do_b_subbank = tag_num_c_subarray;
543 }
475 }
476 }
477 } else {
478 num_do_b_subbank = tagbits * g_ip->tag_assoc;
479 if (num_do_b_mat < tagbits) {
480 return;
481 }
482 deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
483 //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir;
484 }
485 } else {
486 if (fully_assoc) {
487 num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa
488 num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray;
489 } else {
490 num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
491 num_do_b_subbank = tag_num_c_subarray;
492 }
544
493
545 deg_sa_mux_l1_non_assoc = 1;
546 }
494 deg_sa_mux_l1_non_assoc = 1;
495 }
547
496
548 deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc;
497 deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc;
549
498
550 if (fully_assoc || pure_cam)
551 {
552 num_act_mats_hor_dir = 1;
553 num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used
554 }
555 else
556 {
557 num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat;
558 if (num_act_mats_hor_dir == 0)
559 {
560 return;
561 }
562 }
499 if (fully_assoc || pure_cam) {
500 num_act_mats_hor_dir = 1;
501 num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used
502 } else {
503 num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat;
504 if (num_act_mats_hor_dir == 0) {
505 return;
506 }
507 }
563
508
564 //compute num_do_mat for tag
565 if (is_tag)
566 {
567 if (!(fully_assoc || pure_cam))
568 {
569 num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir;
570 num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat;
571 }
572 }
509 //compute num_do_mat for tag
510 if (is_tag) {
511 if (!(fully_assoc || pure_cam)) {
512 num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir;
513 num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat;
514 }
515 }
573
516
574 if ((g_ip->is_cache == false && is_main_mem == true) || (PAGE_MODE == 1 && is_dram))
575 {
576 if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != (int)g_ip->page_sz_bits)
577 {
578 return;
579 }
580 }
517 if ((g_ip->is_cache == false && is_main_mem == true) ||
518 (PAGE_MODE == 1 && is_dram)) {
519 if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 !=
520 (int)g_ip->page_sz_bits) {
521 return;
522 }
523 }
581
582// if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays
524
525// if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays
583 if (is_tag == false && g_ip->is_main_mem == true &&
584 num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc))
585 {
586 return;
587 }
526 if (is_tag == false && g_ip->is_main_mem == true &&
527 num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 <
528 ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc)) {
529 return;
530 }
588
531
589 if (num_act_mats_hor_dir > num_mats_h_dir)
590 {
591 return;
592 }
532 if (num_act_mats_hor_dir > num_mats_h_dir) {
533 return;
534 }
593
594
535
536
595 //compute di for mat subbank and bank
596 if (!(fully_assoc ||pure_cam))
597 {
598 if(!is_tag)
599 {
600 if(g_ip->fast_access == true)
601 {
602 num_di_b_mat = num_do_b_mat / g_ip->data_assoc;
603 }
604 else
605 {
606 num_di_b_mat = num_do_b_mat;
607 }
608 }
609 else
610 {
611 num_di_b_mat = tagbits;
612 }
613 }
614 else
615 {
616 if (fully_assoc)
617 {
618 num_di_b_mat = num_do_b_mat;
619 //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache,
620 //but inside the mat wire tracks need to be reserved for search data bus
621 num_si_b_mat = tagbits;
622 }
623 else
624 {
625 num_di_b_mat = tagbits;
626 num_si_b_mat = tagbits;//*num_subarrays/num_mats;
627 }
537 //compute di for mat subbank and bank
538 if (!(fully_assoc || pure_cam)) {
539 if (!is_tag) {
540 if (g_ip->fast_access == true) {
541 num_di_b_mat = num_do_b_mat / g_ip->data_assoc;
542 } else {
543 num_di_b_mat = num_do_b_mat;
544 }
545 } else {
546 num_di_b_mat = tagbits;
547 }
548 } else {
549 if (fully_assoc) {
550 num_di_b_mat = num_do_b_mat;
551 //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache,
552 //but inside the mat wire tracks need to be reserved for search data bus
553 num_si_b_mat = tagbits;
554 } else {
555 num_di_b_mat = tagbits;
556 num_si_b_mat = tagbits;//*num_subarrays/num_mats;
557 }
628
558
629 }
559 }
630
560
631 num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA
632 num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast
561 num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA
562 num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast
633
563
634 int num_addr_b_row_dec = _log2(num_r_subarray);
635 if ((fully_assoc ||pure_cam))
636 num_addr_b_row_dec +=_log2(num_subarrays/num_mats);
637 int number_subbanks = num_mats / num_act_mats_hor_dir;
638 number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM
564 int num_addr_b_row_dec = _log2(num_r_subarray);
565 if ((fully_assoc || pure_cam))
566 num_addr_b_row_dec += _log2(num_subarrays / num_mats);
567 int number_subbanks = num_mats / num_act_mats_hor_dir;
568 number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM
639
569
640 num_rw_ports = g_ip->num_rw_ports;
641 num_rd_ports = g_ip->num_rd_ports;
642 num_wr_ports = g_ip->num_wr_ports;
643 num_se_rd_ports = g_ip->num_se_rd_ports;
644 num_search_ports = g_ip->num_search_ports;
570 num_rw_ports = g_ip->num_rw_ports;
571 num_rd_ports = g_ip->num_rd_ports;
572 num_wr_ports = g_ip->num_wr_ports;
573 num_se_rd_ports = g_ip->num_se_rd_ports;
574 num_search_ports = g_ip->num_search_ports;
645
575
646 if (is_dram && is_main_mem)
647 {
648 number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec,
649 _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2));
650 }
651 else
652 {
653 number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) +
654 _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2);
655 }
576 if (is_dram && is_main_mem) {
577 number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec,
578 _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2));
579 } else {
580 number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) +
581 _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2);
582 }
656
583
657 if (!(fully_assoc ||pure_cam))
658 {
659 if (is_tag)
660 {
661 num_di_b_bank_per_port = tagbits;
662 num_do_b_bank_per_port = g_ip->data_assoc;
663 }
664 else
665 {
666 num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc;
667 num_do_b_bank_per_port = g_ip->out_w;
668 }
669 }
670 else
671 {
672 if (fully_assoc)
673 {
674 num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz?
675 num_si_b_bank_per_port = tagbits;
676 num_do_b_bank_per_port = g_ip->out_w + tagbits;
677 num_so_b_bank_per_port = g_ip->out_w;
678 }
679 else
680 {
681 num_di_b_bank_per_port = tagbits;
682 num_si_b_bank_per_port = tagbits;
683 num_do_b_bank_per_port = tagbits;
684 num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));
685 }
686 }
584 if (!(fully_assoc || pure_cam)) {
585 if (is_tag) {
586 num_di_b_bank_per_port = tagbits;
587 num_do_b_bank_per_port = g_ip->data_assoc;
588 } else {
589 num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc;
590 num_do_b_bank_per_port = g_ip->out_w;
591 }
592 } else {
593 if (fully_assoc) {
594 num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz?
595 num_si_b_bank_per_port = tagbits;
596 num_do_b_bank_per_port = g_ip->out_w + tagbits;
597 num_so_b_bank_per_port = g_ip->out_w;
598 } else {
599 num_di_b_bank_per_port = tagbits;
600 num_si_b_bank_per_port = tagbits;
601 num_do_b_bank_per_port = tagbits;
602 num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));
603 }
604 }
687
605
688 if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access))
689 {
690 number_way_select_signals_mat = g_ip->data_assoc;
691 }
606 if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) {
607 number_way_select_signals_mat = g_ip->data_assoc;
608 }
692
609
693 // add ECC adjustment to all data signals that traverse on H-trees.
694 if (g_ip->add_ecc_b_ == true)
695 {
696 num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_));
697 num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_));
698 num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_));
699 num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_));
700 num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_));
701 num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_));
610 // add ECC adjustment to all data signals that traverse on H-trees.
611 if (g_ip->add_ecc_b_ == true) {
612 num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_));
613 num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_));
614 num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_));
615 num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_));
616 num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_));
617 num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_));
702
618
703 num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_));
704 num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_));
705 num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_));
706 num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_));
707 num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_));
708 num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_));
709 }
619 num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_));
620 num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_));
621 num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_));
622 num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_));
623 num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_));
624 num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_));
625 }
710
626
711 is_valid = true;
627 is_valid = true;
712}
713
628}
629