parameter.cc (10152:52c552138ba1) | parameter.cc (10234:5cb711fa6176) |
---|---|
1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. | 1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. |
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. |
|
5 * All Rights Reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer; 11 * redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the --- 7 unchanged lines hidden (view full) --- 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 6 * All Rights Reserved 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are 10 * met: redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer; 12 * redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the --- 7 unchanged lines hidden (view full) --- 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” | 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 * 30 ***************************************************************************/ 31 32 33 34#include <iomanip> 35#include <iostream> 36#include <string> --- 4 unchanged lines hidden (view full) --- 41using namespace std; 42 43 44InputParameter * g_ip; 45TechnologyParameter g_tp; 46 47 48 | 30 * 31 ***************************************************************************/ 32 33 34 35#include <iomanip> 36#include <iostream> 37#include <string> --- 4 unchanged lines hidden (view full) --- 42using namespace std; 43 44 45InputParameter * g_ip; 46TechnologyParameter g_tp; 47 48 49 |
49void TechnologyParameter::DeviceType::display(uint32_t indent) 50{ 51 string indent_str(indent, ' '); | 50void TechnologyParameter::DeviceType::display(uint32_t indent) { 51 string indent_str(indent, ' '); |
52 | 52 |
53 cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl; 54 cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl; 55 cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl; 56 cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl; 57 cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl; 58 cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl; 59 cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl; 60 cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl; 61 cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl; 62 cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl; 63 cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl; 64 cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl; 65 cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl; 66 cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl; 67 cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl; 68 cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl; 69 cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl; | 53 cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl; 54 cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl; 55 cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl; 56 cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl; 57 cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl; 58 cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl; 59 cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl; 60 cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl; 61 cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl; 62 cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl; 63 cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl; 64 cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl; 65 cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl; 66 cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl; 67 cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl; 68 cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl; 69 cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl; |
70} 71 72 73 | 70} 71 72 73 |
74void TechnologyParameter::InterconnectType::display(uint32_t indent) 75{ 76 string indent_str(indent, ' '); | 74void TechnologyParameter::InterconnectType::display(uint32_t indent) { 75 string indent_str(indent, ' '); |
77 | 76 |
78 cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl; 79 cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl; 80 cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl; | 77 cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl; 78 cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl; 79 cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl; |
81} 82 | 80} 81 |
83void TechnologyParameter::ScalingFactor::display(uint32_t indent) 84{ 85 string indent_str(indent, ' '); | 82void TechnologyParameter::ScalingFactor::display(uint32_t indent) { 83 string indent_str(indent, ' '); |
86 | 84 |
87 cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl; 88 cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl; | 85 cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl; 86 cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl; |
89} 90 | 87} 88 |
91void TechnologyParameter::MemoryType::display(uint32_t indent) 92{ 93 string indent_str(indent, ' '); | 89void TechnologyParameter::MemoryType::display(uint32_t indent) { 90 string indent_str(indent, ' '); |
94 | 91 |
95 cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl; 96 cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl; 97 cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl; 98 cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl; 99 cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl; 100 cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl; | 92 cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl; 93 cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl; 94 cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl; 95 cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl; 96 cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl; 97 cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl; |
101} 102 103 104 | 98} 99 100 101 |
105void TechnologyParameter::display(uint32_t indent) 106{ 107 string indent_str(indent, ' '); | 102void TechnologyParameter::display(uint32_t indent) { 103 string indent_str(indent, ' '); |
108 | 104 |
109 cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl; 110 cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl; 111 cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl; 112 cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl; 113 cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl; 114 cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl; 115 cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl; 116 cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl; 117 cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl; 118 cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl; 119 cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl; 120 cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl; 121 cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl; 122 cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl; 123 cout << endl; 124 cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl; 125 cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl; 126 cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl; 127 cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl; 128 cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl; 129 cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl; 130 cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl; 131 cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl; 132 cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl; 133 cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl; 134 cout << endl; 135 cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl; 136 cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl; 137 cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl; 138 cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl; 139 cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl; 140 cout << endl; 141 cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl; 142 cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl; 143 cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl; 144 cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl; 145 cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl; 146 cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl; 147 cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl; | 105 cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl; 106 cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl; 107 cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl; 108 cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl; 109 cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl; 110 cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl; 111 cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl; 112 cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl; 113 cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl; 114 cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl; 115 cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl; 116 cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl; 117 cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl; 118 cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl; 119 cout << endl; 120 cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl; 121 cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl; 122 cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl; 123 cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl; 124 cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl; 125 cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl; 126 cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl; 127 cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl; 128 cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl; 129 cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl; 130 cout << endl; 131 cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl; 132 cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl; 133 cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl; 134 cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl; 135 cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl; 136 cout << endl; 137 cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl; 138 cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl; 139 cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl; 140 cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl; 141 cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl; 142 cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl; 143 cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl; |
148 | 144 |
149 cout << endl; 150 cout << indent_str << "SRAM cell transistor: " << endl; 151 sram_cell.display(indent + 2); | 145 cout << endl; 146 cout << indent_str << "SRAM cell transistor: " << endl; 147 sram_cell.display(indent + 2); |
152 | 148 |
153 cout << endl; 154 cout << indent_str << "DRAM access transistor: " << endl; 155 dram_acc.display(indent + 2); | 149 cout << endl; 150 cout << indent_str << "DRAM access transistor: " << endl; 151 dram_acc.display(indent + 2); |
156 | 152 |
157 cout << endl; 158 cout << indent_str << "DRAM wordline transistor: " << endl; 159 dram_wl.display(indent + 2); | 153 cout << endl; 154 cout << indent_str << "DRAM wordline transistor: " << endl; 155 dram_wl.display(indent + 2); |
160 | 156 |
161 cout << endl; 162 cout << indent_str << "peripheral global transistor: " << endl; 163 peri_global.display(indent + 2); | 157 cout << endl; 158 cout << indent_str << "peripheral global transistor: " << endl; 159 peri_global.display(indent + 2); |
164 | 160 |
165 cout << endl; 166 cout << indent_str << "wire local" << endl; 167 wire_local.display(indent + 2); | 161 cout << endl; 162 cout << indent_str << "wire local" << endl; 163 wire_local.display(indent + 2); |
168 | 164 |
169 cout << endl; 170 cout << indent_str << "wire inside mat" << endl; 171 wire_inside_mat.display(indent + 2); | 165 cout << endl; 166 cout << indent_str << "wire inside mat" << endl; 167 wire_inside_mat.display(indent + 2); |
172 | 168 |
173 cout << endl; 174 cout << indent_str << "wire outside mat" << endl; 175 wire_outside_mat.display(indent + 2); | 169 cout << endl; 170 cout << indent_str << "wire outside mat" << endl; 171 wire_outside_mat.display(indent + 2); |
176 | 172 |
177 cout << endl; 178 cout << indent_str << "SRAM" << endl; 179 sram.display(indent + 2); | 173 cout << endl; 174 cout << indent_str << "SRAM" << endl; 175 sram.display(indent + 2); |
180 | 176 |
181 cout << endl; 182 cout << indent_str << "DRAM" << endl; 183 dram.display(indent + 2); | 177 cout << endl; 178 cout << indent_str << "DRAM" << endl; 179 dram.display(indent + 2); |
184} 185 186 187DynamicParameter::DynamicParameter(): | 180} 181 182 183DynamicParameter::DynamicParameter(): |
188 use_inp_params(0), cell(), is_valid(true) 189{ | 184 use_inp_params(0), cell(), is_valid(true) { |
190} 191 192 193 194DynamicParameter::DynamicParameter( 195 bool is_tag_, 196 int pure_ram_, 197 int pure_cam_, 198 double Nspd_, 199 unsigned int Ndwl_, 200 unsigned int Ndbl_, 201 unsigned int Ndcm_, 202 unsigned int Ndsam_lev_1_, 203 unsigned int Ndsam_lev_2_, 204 bool is_main_mem_): | 185} 186 187 188 189DynamicParameter::DynamicParameter( 190 bool is_tag_, 191 int pure_ram_, 192 int pure_cam_, 193 double Nspd_, 194 unsigned int Ndwl_, 195 unsigned int Ndbl_, 196 unsigned int Ndcm_, 197 unsigned int Ndsam_lev_1_, 198 unsigned int Ndsam_lev_2_, 199 bool is_main_mem_): |
205 is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_),Ndcm(Ndcm_), 206 Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_), 207 number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0), 208 is_main_mem(is_main_mem_), cell(), is_valid(false) 209{ 210 ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; 211 is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); | 200 is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), 201 Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_), Ndcm(Ndcm_), 202 Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_), 203 number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0), 204 is_main_mem(is_main_mem_), cell(), is_valid(false) { 205 ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; 206 is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); |
212 | 207 |
213 unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer 214 const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local; 215 fully_assoc = (g_ip->fully_assoc) ? true : false; | 208 unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer 209 const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local; 210 fully_assoc = (g_ip->fully_assoc) ? true : false; |
216 | 211 |
217 if (fully_assoc || pure_cam) 218 { // fully-assocative cache -- ref: CACTi 2.0 report 219 if (Ndwl != 1 || //Ndwl is fixed to 1 for FA 220 Ndcm != 1 || //Ndcm is fixed to 1 for FA 221 Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA 222 Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one 223 Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one 224 Ndbl < 2) 225 { 226 return; 227 } 228 } | 212 // fully-assocative cache -- ref: CACTi 2.0 report 213 if (fully_assoc || pure_cam) { 214 if (Ndwl != 1 || //Ndwl is fixed to 1 for FA 215 Ndcm != 1 || //Ndcm is fixed to 1 for FA 216 Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA 217 Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one 218 Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one 219 Ndbl < 2) { 220 return; 221 } 222 } |
229 | 223 |
230 if ((is_dram) && (!is_tag) && (Ndcm > 1)) 231 { 232 return; // For a DRAM array, each bitline has its own sense-amp 233 } | 224 if ((is_dram) && (!is_tag) && (Ndcm > 1)) { 225 return; // For a DRAM array, each bitline has its own sense-amp 226 } |
234 | 227 |
235 // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be 236 // at least two because an array is assumed to have at least one mat. And a mat 237 // is formed out of two horizontal subarrays and two vertical subarrays 238 if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1)) 239 { 240 return; 241 } | 228 // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be 229 // at least two because an array is assumed to have at least one mat. And a mat 230 // is formed out of two horizontal subarrays and two vertical subarrays 231 if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1)) { 232 return; 233 } |
242 | 234 |
243 //***********compute row, col of an subarray 244 if (!(fully_assoc || pure_cam))//Not fully_asso nor cam 245 { 246 // if data array, let tagbits = 0 247 if (is_tag) 248 { 249 if (g_ip->specific_tag) 250 { 251 tagbits = g_ip->tag_w; 252 } 253 else 254 { 255 tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) + 256 _log2(g_ip->tag_assoc*2 - 1) - _log2(g_ip->nbanks); | 235 //***********compute row, col of an subarray 236 if (!(fully_assoc || pure_cam)) { 237 //Not fully_asso nor cam 238 // if data array, let tagbits = 0 239 if (is_tag) { 240 if (g_ip->specific_tag) { 241 tagbits = g_ip->tag_w; 242 } else { 243 tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) + 244 _log2(g_ip->tag_assoc * 2 - 1) - _log2(g_ip->nbanks); |
257 | 245 |
258 } 259 tagbits = (((tagbits + 3) >> 2) << 2); | 246 } 247 tagbits = (((tagbits + 3) >> 2) << 2); |
260 | 248 |
261 num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * 262 g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON); 263 num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON); 264 //burst_length = 1; 265 } 266 else 267 { 268 num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * 269 g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON); 270 num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON); 271 // burst_length = g_ip->block_sz * 8 / g_ip->out_w; 272 } | 249 num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * 250 g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON); 251 num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON); 252 //burst_length = 1; 253 } else { 254 num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * 255 g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON); 256 num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON); 257 // burst_length = g_ip->block_sz * 8 / g_ip->out_w; 258 } |
273 | 259 |
274 if (num_r_subarray < MINSUBARRAYROWS) return; 275 if (num_r_subarray == 0) return; 276 if (num_r_subarray > MAXSUBARRAYROWS) return; 277 if (num_c_subarray < MINSUBARRAYCOLS) return; 278 if (num_c_subarray > MAXSUBARRAYCOLS) return; | 260 if (num_r_subarray < MINSUBARRAYROWS) return; 261 if (num_r_subarray == 0) return; 262 if (num_r_subarray > MAXSUBARRAYROWS) return; 263 if (num_c_subarray < MINSUBARRAYCOLS) return; 264 if (num_c_subarray > MAXSUBARRAYCOLS) return; |
279 | 265 |
280 } | 266 } |
281 | 267 |
282 else 283 {//either fully-asso or cam 284 if (pure_cam) 285 { 286 if (g_ip->specific_tag) 287 { 288 tagbits = int(ceil(g_ip->tag_w/8.0)*8); 289 } 290 else 291 { 292 tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS)/8.0)*8); | 268 else {//either fully-asso or cam 269 if (pure_cam) { 270 if (g_ip->specific_tag) { 271 tagbits = int(ceil(g_ip->tag_w / 8.0) * 8); 272 } else { 273 tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS) / 8.0) * 8); |
293// cout<<"Pure CAM needs tag width to be specified"<<endl; 294// exit(0); | 274// cout<<"Pure CAM needs tag width to be specified"<<endl; 275// exit(0); |
295 } 296 //tagbits = (((tagbits + 3) >> 2) << 2); | 276 } 277 //tagbits = (((tagbits + 3) >> 2) << 2); |
297 | 278 |
298 tag_num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks*tagbits/8.0 * Ndbl));//TODO: error check input of tagbits and blocksize //TODO: for pure CAM, g_ip->block should be number of entries. 299 //tag_num_c_subarray = (int)(tagbits + EPSILON); 300 tag_num_c_subarray = tagbits; 301 if (tag_num_r_subarray == 0) return; 302 if (tag_num_r_subarray > MAXSUBARRAYROWS) return; 303 if (tag_num_c_subarray < MINSUBARRAYCOLS) return; 304 if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; 305 num_r_subarray = tag_num_r_subarray; 306 } 307 else //fully associative 308 { 309 if (g_ip->specific_tag) 310 { 311 tagbits = g_ip->tag_w; 312 } 313 else 314 { 315 tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem. 316 } 317 tagbits = (((tagbits + 3) >> 2) << 2); | 279 //TODO: error check input of tagbits and blocksize 280 //TODO: for pure CAM, g_ip->block should be number of entries. 281 tag_num_r_subarray = (int)ceil(capacity_per_die / 282 (g_ip->nbanks * tagbits / 8.0 * Ndbl)); 283 //tag_num_c_subarray = (int)(tagbits + EPSILON); 284 tag_num_c_subarray = tagbits; 285 if (tag_num_r_subarray == 0) return; 286 if (tag_num_r_subarray > MAXSUBARRAYROWS) return; 287 if (tag_num_c_subarray < MINSUBARRAYCOLS) return; 288 if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; 289 num_r_subarray = tag_num_r_subarray; 290 } else { //fully associative 291 if (g_ip->specific_tag) { 292 tagbits = g_ip->tag_w; 293 } else { 294 tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem. 295 } 296 tagbits = (((tagbits + 3) >> 2) << 2); |
318 | 297 |
319 tag_num_r_subarray = (int)(capacity_per_die / (g_ip->nbanks*g_ip->block_sz * Ndbl)); 320 tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON); 321 if (tag_num_r_subarray == 0) return; 322 if (tag_num_r_subarray > MAXSUBARRAYROWS) return; 323 if (tag_num_c_subarray < MINSUBARRAYCOLS) return; 324 if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; | 298 tag_num_r_subarray = (int)(capacity_per_die / 299 (g_ip->nbanks * g_ip->block_sz * Ndbl)); 300 tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON); 301 if (tag_num_r_subarray == 0) return; 302 if (tag_num_r_subarray > MAXSUBARRAYROWS) return; 303 if (tag_num_c_subarray < MINSUBARRAYCOLS) return; 304 if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; |
325 | 305 |
326 data_num_r_subarray = tag_num_r_subarray; 327 data_num_c_subarray = 8 * g_ip->block_sz; 328 if (data_num_r_subarray == 0) return; 329 if (data_num_r_subarray > MAXSUBARRAYROWS) return; 330 if (data_num_c_subarray < MINSUBARRAYCOLS) return; 331 if (data_num_c_subarray > MAXSUBARRAYCOLS) return; 332 num_r_subarray = tag_num_r_subarray; 333 } 334 } | 306 data_num_r_subarray = tag_num_r_subarray; 307 data_num_c_subarray = 8 * g_ip->block_sz; 308 if (data_num_r_subarray == 0) return; 309 if (data_num_r_subarray > MAXSUBARRAYROWS) return; 310 if (data_num_c_subarray < MINSUBARRAYCOLS) return; 311 if (data_num_c_subarray > MAXSUBARRAYCOLS) return; 312 num_r_subarray = tag_num_r_subarray; 313 } 314 } |
335 | 315 |
336 num_subarrays = Ndwl * Ndbl; 337 //****************end of computation of row, col of an subarray | 316 num_subarrays = Ndwl * Ndbl; 317 //****************end of computation of row, col of an subarray |
338 | 318 |
339 // calculate wire parameters 340 if (fully_assoc || pure_cam) 341 { 342 cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports) 343 + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports; 344 cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports) 345 + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports; | 319 // calculate wire parameters 320 if (fully_assoc || pure_cam) { 321 cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * 322 (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports) 323 + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) + 324 wire_local.pitch * g_ip->num_se_rd_ports; 325 cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * 326 (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports) 327 + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) + 328 wire_local.pitch * g_ip->num_se_rd_ports; |
346 | 329 |
347 cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +g_ip->num_rw_ports-1 + g_ip->num_rd_ports) 348 + 2 * wire_local.pitch*(g_ip->num_search_ports-1); 349 cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports -1 + (g_ip->num_rd_ports - g_ip->num_se_rd_ports) 350 + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + 2 * wire_local.pitch*(g_ip->num_search_ports-1); 351 } 352 else 353 { 354 if(is_tag) 355 { 356 cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + 357 g_ip->num_wr_ports); 358 cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports + 359 (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) + 360 wire_local.pitch * g_ip->num_se_rd_ports; 361 } 362 else 363 { 364 if (is_dram) 365 { 366 cell.h = g_tp.dram.b_h; 367 cell.w = g_tp.dram.b_w; 368 } 369 else 370 { 371 cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports + 372 g_ip->num_rw_ports - 1 + g_ip->num_rd_ports); 373 cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + 374 (g_ip->num_rd_ports - g_ip->num_se_rd_ports) + 375 g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports; 376 } 377 } 378 } | 330 cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * 331 (g_ip->num_wr_ports + g_ip->num_rw_ports - 1 + g_ip->num_rd_ports) 332 + 2 * wire_local.pitch * (g_ip->num_search_ports - 1); 333 cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * 334 (g_ip->num_rw_ports - 1 + (g_ip->num_rd_ports - 335 g_ip->num_se_rd_ports) 336 + g_ip->num_wr_ports) + g_tp.wire_local.pitch * 337 g_ip->num_se_rd_ports + 2 * wire_local.pitch * 338 (g_ip->num_search_ports - 1); 339 } else { 340 if (is_tag) { 341 cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + 342 g_ip->num_wr_ports); 343 cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports + 344 (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) + 345 wire_local.pitch * g_ip->num_se_rd_ports; 346 } else { 347 if (is_dram) { 348 cell.h = g_tp.dram.b_h; 349 cell.w = g_tp.dram.b_w; 350 } else { 351 cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports + 352 g_ip->num_rw_ports - 1 + g_ip->num_rd_ports); 353 cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + 354 (g_ip->num_rd_ports - g_ip->num_se_rd_ports) + 355 g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports; 356 } 357 } 358 } |
379 | 359 |
380 double c_b_metal = cell.h * wire_local.C_per_um; 381 double C_bl; | 360 double c_b_metal = cell.h * wire_local.C_per_um; 361 double C_bl; |
382 | 362 |
383 if (!(fully_assoc || pure_cam)) 384 { 385 if (is_dram) 386 { 387 deg_bl_muxing = 1; 388 if (ram_cell_tech_type == comm_dram) 389 { 390 C_bl = num_r_subarray * c_b_metal; 391 V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl); 392 if (V_b_sense < VBITSENSEMIN) 393 { 394 return; 395 } 396 V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value 397 dram_refresh_period = 64e-3; 398 } 399 else 400 { 401 double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; 402 C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); 403 V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl); | 363 if (!(fully_assoc || pure_cam)) { 364 if (is_dram) { 365 deg_bl_muxing = 1; 366 if (ram_cell_tech_type == comm_dram) { 367 C_bl = num_r_subarray * c_b_metal; 368 V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C / 369 (g_tp.dram_cell_C + C_bl); 370 if (V_b_sense < VBITSENSEMIN) { 371 return; 372 } 373 V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value 374 dram_refresh_period = 64e-3; 375 } else { 376 double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; 377 C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); 378 V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C / 379 (g_tp.dram_cell_C + C_bl); |
404 | 380 |
405 if (V_b_sense < VBITSENSEMIN) 406 { 407 return; //Sense amp input signal is smaller that minimum allowable sense amp input signal 408 } 409 V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value 410 //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C; 411 //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp; 412 dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp; 413 } 414 } 415 else 416 { //SRAM 417 V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; 418 deg_bl_muxing = Ndcm; 419 // "/ 2.0" below is due to the fact that two adjacent access transistors share drain 420 // contacts in a physical layout 421 double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; 422 C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); 423 dram_refresh_period = 0; 424 } 425 } 426 else 427 { 428 c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM 429 V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; 430 deg_bl_muxing = 1;//FA fix as 1 431 // "/ 2.0" below is due to the fact that two adjacent access transistors share drain 432 // contacts in a physical layout 433 double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines 434 C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); 435 dram_refresh_period = 0; 436 } | 381 if (V_b_sense < VBITSENSEMIN) { 382 return; //Sense amp input signal is smaller that minimum allowable sense amp input signal 383 } 384 V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value 385 //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C; 386 //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp; 387 dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp; 388 } 389 } else { //SRAM 390 V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; 391 deg_bl_muxing = Ndcm; 392 // "/ 2.0" below is due to the fact that two adjacent access transistors share drain 393 // contacts in a physical layout 394 double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; 395 C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); 396 dram_refresh_period = 0; 397 } 398 } else { 399 c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM 400 V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; 401 deg_bl_muxing = 1;//FA fix as 1 402 // "/ 2.0" below is due to the fact that two adjacent access transistors share drain 403 // contacts in a physical layout 404 double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines 405 C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); 406 dram_refresh_period = 0; 407 } |
437 438 | 408 409 |
439 // do/di: data in/out, for fully associative they are the data width for normal read and write 440 // so/si: search data in/out, for fully associative they are the data width for the search ops 441 // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write) 442 // so/si needs broadcase while do/di do not | 410 // do/di: data in/out, for fully associative they are the data width for normal read and write 411 // so/si: search data in/out, for fully associative they are the data width for the search ops 412 // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write) 413 // so/si needs broadcase while do/di do not |
443 | 414 |
444 if (fully_assoc || pure_cam) 445 { 446 switch (Ndbl) { 447 case (0): 448 cout << " Invalid Ndbl \n"<<endl; 449 exit(0); 450 break; 451 case (1): 452 num_mats_h_dir = 1;//one subarray per mat 453 num_mats_v_dir = 1; 454 break; 455 case (2): 456 num_mats_h_dir = 1;//two subarrays per mat 457 num_mats_v_dir = 1; 458 break; 459 default: 460 num_mats_h_dir = int(floor(sqrt(Ndbl/4.0)));//4 subbarrys per mat 461 num_mats_v_dir = int(Ndbl/4.0 / num_mats_h_dir); 462 } 463 num_mats = num_mats_h_dir * num_mats_v_dir; | 415 if (fully_assoc || pure_cam) { 416 switch (Ndbl) { 417 case (0): 418 cout << " Invalid Ndbl \n" << endl; 419 exit(0); 420 break; 421 case (1): 422 num_mats_h_dir = 1;//one subarray per mat 423 num_mats_v_dir = 1; 424 break; 425 case (2): 426 num_mats_h_dir = 1;//two subarrays per mat 427 num_mats_v_dir = 1; 428 break; 429 default: 430 num_mats_h_dir = int(floor(sqrt(Ndbl / 4.0)));//4 subbarrys per mat 431 num_mats_v_dir = int(Ndbl / 4.0 / num_mats_h_dir); 432 } 433 num_mats = num_mats_h_dir * num_mats_v_dir; |
464 | 434 |
465 if (fully_assoc) 466 { 467 num_so_b_mat = data_num_c_subarray; 468 num_do_b_mat = data_num_c_subarray + tagbits; 469 } 470 else 471 { 472 num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data 473 num_do_b_mat = tagbits; 474 } 475 } 476 else 477 { 478 num_mats_h_dir = MAX(Ndwl / 2, 1); 479 num_mats_v_dir = MAX(Ndbl / 2, 1); 480 num_mats = num_mats_h_dir * num_mats_v_dir; 481 num_do_b_mat = MAX((num_subarrays/num_mats) * num_c_subarray / (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1); 482 } | 435 if (fully_assoc) { 436 num_so_b_mat = data_num_c_subarray; 437 num_do_b_mat = data_num_c_subarray + tagbits; 438 } else { 439 num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data 440 num_do_b_mat = tagbits; 441 } 442 } else { 443 num_mats_h_dir = MAX(Ndwl / 2, 1); 444 num_mats_v_dir = MAX(Ndbl / 2, 1); 445 num_mats = num_mats_h_dir * num_mats_v_dir; 446 num_do_b_mat = MAX((num_subarrays / num_mats) * num_c_subarray / 447 (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1); 448 } |
483 | 449 |
484 if (!(fully_assoc|| pure_cam) && (num_do_b_mat < (num_subarrays/num_mats))) 485 { 486 return; 487 } | 450 if (!(fully_assoc || pure_cam) && (num_do_b_mat < 451 (num_subarrays / num_mats))) { 452 return; 453 } |
488 489 | 454 455 |
490 int deg_sa_mux_l1_non_assoc; 491 //TODO:the i/o for subbank is not necessary and should be removed. 492 if (!(fully_assoc || pure_cam)) 493 { 494 if (!is_tag) 495 { 496 if (is_main_mem == true) 497 { 498 num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w; 499 deg_sa_mux_l1_non_assoc = Ndsam_lev_1; 500 } 501 else 502 { 503 if (g_ip->fast_access == true) 504 { 505 num_do_b_subbank = g_ip->out_w * g_ip->data_assoc; 506 deg_sa_mux_l1_non_assoc = Ndsam_lev_1; 507 } 508 else 509 { | 456 int deg_sa_mux_l1_non_assoc; 457 //TODO:the i/o for subbank is not necessary and should be removed. 458 if (!(fully_assoc || pure_cam)) { 459 if (!is_tag) { 460 if (is_main_mem == true) { 461 num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w; 462 deg_sa_mux_l1_non_assoc = Ndsam_lev_1; 463 } else { 464 if (g_ip->fast_access == true) { 465 num_do_b_subbank = g_ip->out_w * g_ip->data_assoc; 466 deg_sa_mux_l1_non_assoc = Ndsam_lev_1; 467 } else { |
510 | 468 |
511 num_do_b_subbank = g_ip->out_w; 512 deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc; 513 if (deg_sa_mux_l1_non_assoc < 1) 514 { 515 return; 516 } | 469 num_do_b_subbank = g_ip->out_w; 470 deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc; 471 if (deg_sa_mux_l1_non_assoc < 1) { 472 return; 473 } |
517 | 474 |
518 } 519 } 520 } 521 else 522 { 523 num_do_b_subbank = tagbits * g_ip->tag_assoc; 524 if (num_do_b_mat < tagbits) 525 { 526 return; 527 } 528 deg_sa_mux_l1_non_assoc = Ndsam_lev_1; 529 //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir; 530 } 531 } 532 else 533 { 534 if (fully_assoc) 535 { 536 num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa 537 num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray; 538 } 539 else 540 { 541 num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data 542 num_do_b_subbank = tag_num_c_subarray; 543 } | 475 } 476 } 477 } else { 478 num_do_b_subbank = tagbits * g_ip->tag_assoc; 479 if (num_do_b_mat < tagbits) { 480 return; 481 } 482 deg_sa_mux_l1_non_assoc = Ndsam_lev_1; 483 //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir; 484 } 485 } else { 486 if (fully_assoc) { 487 num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa 488 num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray; 489 } else { 490 num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data 491 num_do_b_subbank = tag_num_c_subarray; 492 } |
544 | 493 |
545 deg_sa_mux_l1_non_assoc = 1; 546 } | 494 deg_sa_mux_l1_non_assoc = 1; 495 } |
547 | 496 |
548 deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc; | 497 deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc; |
549 | 498 |
550 if (fully_assoc || pure_cam) 551 { 552 num_act_mats_hor_dir = 1; 553 num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used 554 } 555 else 556 { 557 num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat; 558 if (num_act_mats_hor_dir == 0) 559 { 560 return; 561 } 562 } | 499 if (fully_assoc || pure_cam) { 500 num_act_mats_hor_dir = 1; 501 num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used 502 } else { 503 num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat; 504 if (num_act_mats_hor_dir == 0) { 505 return; 506 } 507 } |
563 | 508 |
564 //compute num_do_mat for tag 565 if (is_tag) 566 { 567 if (!(fully_assoc || pure_cam)) 568 { 569 num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir; 570 num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat; 571 } 572 } | 509 //compute num_do_mat for tag 510 if (is_tag) { 511 if (!(fully_assoc || pure_cam)) { 512 num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir; 513 num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat; 514 } 515 } |
573 | 516 |
574 if ((g_ip->is_cache == false && is_main_mem == true) || (PAGE_MODE == 1 && is_dram)) 575 { 576 if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != (int)g_ip->page_sz_bits) 577 { 578 return; 579 } 580 } | 517 if ((g_ip->is_cache == false && is_main_mem == true) || 518 (PAGE_MODE == 1 && is_dram)) { 519 if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != 520 (int)g_ip->page_sz_bits) { 521 return; 522 } 523 } |
581 582// if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays | 524 525// if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays |
583 if (is_tag == false && g_ip->is_main_mem == true && 584 num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc)) 585 { 586 return; 587 } | 526 if (is_tag == false && g_ip->is_main_mem == true && 527 num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < 528 ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc)) { 529 return; 530 } |
588 | 531 |
589 if (num_act_mats_hor_dir > num_mats_h_dir) 590 { 591 return; 592 } | 532 if (num_act_mats_hor_dir > num_mats_h_dir) { 533 return; 534 } |
593 594 | 535 536 |
595 //compute di for mat subbank and bank 596 if (!(fully_assoc ||pure_cam)) 597 { 598 if(!is_tag) 599 { 600 if(g_ip->fast_access == true) 601 { 602 num_di_b_mat = num_do_b_mat / g_ip->data_assoc; 603 } 604 else 605 { 606 num_di_b_mat = num_do_b_mat; 607 } 608 } 609 else 610 { 611 num_di_b_mat = tagbits; 612 } 613 } 614 else 615 { 616 if (fully_assoc) 617 { 618 num_di_b_mat = num_do_b_mat; 619 //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache, 620 //but inside the mat wire tracks need to be reserved for search data bus 621 num_si_b_mat = tagbits; 622 } 623 else 624 { 625 num_di_b_mat = tagbits; 626 num_si_b_mat = tagbits;//*num_subarrays/num_mats; 627 } | 537 //compute di for mat subbank and bank 538 if (!(fully_assoc || pure_cam)) { 539 if (!is_tag) { 540 if (g_ip->fast_access == true) { 541 num_di_b_mat = num_do_b_mat / g_ip->data_assoc; 542 } else { 543 num_di_b_mat = num_do_b_mat; 544 } 545 } else { 546 num_di_b_mat = tagbits; 547 } 548 } else { 549 if (fully_assoc) { 550 num_di_b_mat = num_do_b_mat; 551 //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache, 552 //but inside the mat wire tracks need to be reserved for search data bus 553 num_si_b_mat = tagbits; 554 } else { 555 num_di_b_mat = tagbits; 556 num_si_b_mat = tagbits;//*num_subarrays/num_mats; 557 } |
628 | 558 |
629 } | 559 } |
630 | 560 |
631 num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA 632 num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast | 561 num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA 562 num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast |
633 | 563 |
634 int num_addr_b_row_dec = _log2(num_r_subarray); 635 if ((fully_assoc ||pure_cam)) 636 num_addr_b_row_dec +=_log2(num_subarrays/num_mats); 637 int number_subbanks = num_mats / num_act_mats_hor_dir; 638 number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM | 564 int num_addr_b_row_dec = _log2(num_r_subarray); 565 if ((fully_assoc || pure_cam)) 566 num_addr_b_row_dec += _log2(num_subarrays / num_mats); 567 int number_subbanks = num_mats / num_act_mats_hor_dir; 568 number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM |
639 | 569 |
640 num_rw_ports = g_ip->num_rw_ports; 641 num_rd_ports = g_ip->num_rd_ports; 642 num_wr_ports = g_ip->num_wr_ports; 643 num_se_rd_ports = g_ip->num_se_rd_ports; 644 num_search_ports = g_ip->num_search_ports; | 570 num_rw_ports = g_ip->num_rw_ports; 571 num_rd_ports = g_ip->num_rd_ports; 572 num_wr_ports = g_ip->num_wr_ports; 573 num_se_rd_ports = g_ip->num_se_rd_ports; 574 num_search_ports = g_ip->num_search_ports; |
645 | 575 |
646 if (is_dram && is_main_mem) 647 { 648 number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec, 649 _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2)); 650 } 651 else 652 { 653 number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) + 654 _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2); 655 } | 576 if (is_dram && is_main_mem) { 577 number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec, 578 _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2)); 579 } else { 580 number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) + 581 _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2); 582 } |
656 | 583 |
657 if (!(fully_assoc ||pure_cam)) 658 { 659 if (is_tag) 660 { 661 num_di_b_bank_per_port = tagbits; 662 num_do_b_bank_per_port = g_ip->data_assoc; 663 } 664 else 665 { 666 num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc; 667 num_do_b_bank_per_port = g_ip->out_w; 668 } 669 } 670 else 671 { 672 if (fully_assoc) 673 { 674 num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz? 675 num_si_b_bank_per_port = tagbits; 676 num_do_b_bank_per_port = g_ip->out_w + tagbits; 677 num_so_b_bank_per_port = g_ip->out_w; 678 } 679 else 680 { 681 num_di_b_bank_per_port = tagbits; 682 num_si_b_bank_per_port = tagbits; 683 num_do_b_bank_per_port = tagbits; 684 num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays))); 685 } 686 } | 584 if (!(fully_assoc || pure_cam)) { 585 if (is_tag) { 586 num_di_b_bank_per_port = tagbits; 587 num_do_b_bank_per_port = g_ip->data_assoc; 588 } else { 589 num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc; 590 num_do_b_bank_per_port = g_ip->out_w; 591 } 592 } else { 593 if (fully_assoc) { 594 num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz? 595 num_si_b_bank_per_port = tagbits; 596 num_do_b_bank_per_port = g_ip->out_w + tagbits; 597 num_so_b_bank_per_port = g_ip->out_w; 598 } else { 599 num_di_b_bank_per_port = tagbits; 600 num_si_b_bank_per_port = tagbits; 601 num_do_b_bank_per_port = tagbits; 602 num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays))); 603 } 604 } |
687 | 605 |
688 if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) 689 { 690 number_way_select_signals_mat = g_ip->data_assoc; 691 } | 606 if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) { 607 number_way_select_signals_mat = g_ip->data_assoc; 608 } |
692 | 609 |
693 // add ECC adjustment to all data signals that traverse on H-trees. 694 if (g_ip->add_ecc_b_ == true) 695 { 696 num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_)); 697 num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_)); 698 num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_)); 699 num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_)); 700 num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_)); 701 num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_)); | 610 // add ECC adjustment to all data signals that traverse on H-trees. 611 if (g_ip->add_ecc_b_ == true) { 612 num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_)); 613 num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_)); 614 num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_)); 615 num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_)); 616 num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_)); 617 num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_)); |
702 | 618 |
703 num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_)); 704 num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_)); 705 num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_)); 706 num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_)); 707 num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_)); 708 num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_)); 709 } | 619 num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_)); 620 num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_)); 621 num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_)); 622 num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_)); 623 num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_)); 624 num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_)); 625 } |
710 | 626 |
711 is_valid = true; | 627 is_valid = true; |
712} 713 | 628} 629 |