subarray.cc (10152:52c552138ba1) | subarray.cc (10234:5cb711fa6176) |
---|---|
1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. | 1/***************************************************************************** 2 * McPAT/CACTI 3 * SOFTWARE LICENSE AGREEMENT 4 * Copyright 2012 Hewlett-Packard Development Company, L.P. |
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. |
|
5 * All Rights Reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer; 11 * redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the --- 7 unchanged lines hidden (view full) --- 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 6 * All Rights Reserved 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are 10 * met: redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer; 12 * redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the --- 7 unchanged lines hidden (view full) --- 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” | 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 * 30 ***************************************************************************/ 31 32 33 34 35#include <cassert> 36#include <cmath> 37#include <iostream> 38 39#include "subarray.h" 40 41Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_): | 30 * 31 ***************************************************************************/ 32 33 34 35 36#include <cassert> 37#include <cmath> 38#include <iostream> 39 40#include "subarray.h" 41 42Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_): |
42 dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray), 43 num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray), 44 cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_) 45{ 46 //num_cols=7; 47 //cout<<"num_cols ="<< num_cols <<endl; 48 if (!(is_fa || dp.pure_cam)) 49 { 50 num_cols +=(g_ip->add_ecc_b_ ? (int)ceil(num_cols / num_bits_per_ecc_b_) : 0); // ECC overhead 51 uint32_t ram_num_cells_wl_stitching = 52 (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ : 53 (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_; | 43 dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray), 44 num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray), 45 cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_) { 46 //num_cols=7; 47 //cout<<"num_cols ="<< num_cols <<endl; 48 if (!(is_fa || dp.pure_cam)) { 49 // ECC overhead 50 num_cols += (g_ip->add_ecc_b_ ? (int)ceil(num_cols / 51 num_bits_per_ecc_b_) : 0); 52 uint32_t ram_num_cells_wl_stitching = 53 (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ : 54 (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_; |
54 | 55 |
55 area.h = cell.h * num_rows; | 56 area.h = cell.h * num_rows; |
56 | 57 |
57 area.w = cell.w * num_cols + 58 ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead 59 } 60 else //cam fa 61 { | 58 area.w = cell.w * num_cols + 59 ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead 60 } else { //cam fa |
62 | 61 |
63 //should not add dummy row here since the dummy row do not need decoder 64 if (is_fa)// fully associative cache 65 { 66 num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0; 67 num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0); 68 num_cols = num_cols_fa_cam + num_cols_fa_ram; 69 } 70 else 71 { 72 num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0; 73 num_cols_fa_ram = 0; 74 num_cols = num_cols_fa_cam; 75 } | 62 //should not add dummy row here since the dummy row do not need decoder 63 if (is_fa) { // fully associative cache 64 num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0; 65 num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0); 66 num_cols = num_cols_fa_cam + num_cols_fa_ram; 67 } else { 68 num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0; 69 num_cols_fa_ram = 0; 70 num_cols = num_cols_fa_cam; 71 } |
76 | 72 |
77 area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells 78 area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram 79 + ceil((num_cols_fa_cam + num_cols_fa_ram) / sram_num_cells_wl_stitching_)*g_tp.ram_wl_stitching_overhead_ 80 + 16*g_tp.wire_local.pitch //the overhead for the NAND gate to connect the two halves 81 + 128*g_tp.wire_local.pitch;//the overhead for the drivers from matchline to wordline of RAM 82 } | 73 area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells 74 area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram 75 + ceil((num_cols_fa_cam + num_cols_fa_ram) / 76 sram_num_cells_wl_stitching_) * 77 g_tp.ram_wl_stitching_overhead_ 78 //the overhead for the NAND gate to connect the two halves 79 + 16 * g_tp.wire_local.pitch 80 //the overhead for the drivers from matchline to wordline of RAM 81 + 128 * g_tp.wire_local.pitch; 82 } |
83 | 83 |
84 assert(area.h>0); 85 assert(area.w>0); 86 compute_C(); | 84 assert(area.h > 0); 85 assert(area.w > 0); 86 compute_C(); |
87} 88 89 90 | 87} 88 89 90 |
91Subarray::~Subarray() 92{ | 91Subarray::~Subarray() { |
93} 94 95 96 | 92} 93 94 95 |
97double Subarray::get_total_cell_area() 98{ | 96double Subarray::get_total_cell_area() { |
99// return (is_fa==false? cell.get_area() * num_rows * num_cols 100// //: cam_cell.h*(num_rows+1)*(num_cols_fa_cam + sram_cell.get_area()*num_cols_fa_ram)); 101// : cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)); 102// //: cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam + sram_cell.get_area()*(num_rows+1)*num_cols_fa_ram);//for FA, this area does not include the dummy cells in SRAM arrays. 103 104 if (!(is_fa || dp.pure_cam)) | 97// return (is_fa==false? cell.get_area() * num_rows * num_cols 98// //: cam_cell.h*(num_rows+1)*(num_cols_fa_cam + sram_cell.get_area()*num_cols_fa_ram)); 99// : cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)); 100// //: cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam + sram_cell.get_area()*(num_rows+1)*num_cols_fa_ram);//for FA, this area does not include the dummy cells in SRAM arrays. 101 102 if (!(is_fa || dp.pure_cam)) |
105 return (cell.get_area() * num_rows * num_cols); 106 else if (is_fa) 107 { //for FA, this area includes the dummy cells in SRAM arrays. 108 //return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)); 109 //cout<<"diff" < | 103 return (cell.get_area() * num_rows * num_cols); 104 else if (is_fa) { 105 //for FA, this area includes the dummy cells in SRAM arrays. 106 //return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)); 107 //cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl; 108 return (cam_cell.h * (num_rows + 1) * 109 (cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)); 110 } else { 111 return (cam_cell.get_area() * (num_rows + 1) * num_cols_fa_cam ); |
111 } | 112 } |
112 else 113 return (cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam ); | |
114 115 116} 117 118 119 | 113 114 115} 116 117 118 |
120void Subarray::compute_C() 121{ 122 double c_w_metal = cell.w * g_tp.wire_local.C_per_um; 123 double r_w_metal = cell.w * g_tp.wire_local.R_per_um; 124 double C_b_metal = cell.h * g_tp.wire_local.C_per_um; 125 double C_b_row_drain_C; | 119void Subarray::compute_C() { 120 double c_w_metal = cell.w * g_tp.wire_local.C_per_um; 121 double r_w_metal = cell.w * g_tp.wire_local.R_per_um; 122 double C_b_metal = cell.h * g_tp.wire_local.C_per_um; 123 double C_b_row_drain_C; |
126 | 124 |
127 if (dp.is_dram) 128 { 129 C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols; | 125 if (dp.is_dram) { 126 C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols; |
130 | 127 |
131 if (dp.ram_cell_tech_type == comm_dram) 132 { 133 C_bl = num_rows * C_b_metal; 134 } 135 else 136 { 137 C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact 138 C_bl = num_rows * (C_b_row_drain_C + C_b_metal); 139 } 140 } 141 else 142 { 143 if (!(is_fa ||dp.pure_cam)) 144 { 145 C_wl = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 + 146 c_w_metal) * num_cols; 147 C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact 148 C_bl = num_rows * (C_b_row_drain_C + C_b_metal); 149 } 150 else 151 { 152 //Following is wordline not matchline 153 //CAM portion 154 c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um; 155 r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um; 156 C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w, (g_tp.cam.b_w-2*g_tp.cam.cell_a_w)/2.0, false, true)*2 + 157 c_w_metal) * num_cols_fa_cam; 158 R_wl_cam = (r_w_metal) * num_cols_fa_cam; | 128 if (dp.ram_cell_tech_type == comm_dram) { 129 C_bl = num_rows * C_b_metal; 130 } else { 131 C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact 132 C_bl = num_rows * (C_b_row_drain_C + C_b_metal); 133 } 134 } else { 135 if (!(is_fa || dp.pure_cam)) { 136 C_wl = (gate_C_pass(g_tp.sram.cell_a_w, 137 (g_tp.sram.b_w - 2 * g_tp.sram.cell_a_w) / 2.0, 138 false, true) * 2 + 139 c_w_metal) * num_cols; 140 C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact 141 C_bl = num_rows * (C_b_row_drain_C + C_b_metal); 142 } else { 143 //Following is wordline not matchline 144 //CAM portion 145 c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um; 146 r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um; 147 C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w, 148 (g_tp.cam.b_w - 2 * g_tp.cam.cell_a_w) / 149 2.0, false, true) * 2 + 150 c_w_metal) * num_cols_fa_cam; 151 R_wl_cam = (r_w_metal) * num_cols_fa_cam; |
159 | 152 |
160 if (!dp.pure_cam) 161 { 162 //RAM portion 163 c_w_metal = cell.w * g_tp.wire_local.C_per_um; 164 r_w_metal = cell.w * g_tp.wire_local.R_per_um; 165 C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 + 166 c_w_metal) * num_cols_fa_ram; 167 R_wl_ram = (r_w_metal) * num_cols_fa_ram; 168 } 169 else 170 { 171 C_wl_ram = R_wl_ram =0; 172 } 173 C_wl = C_wl_cam + C_wl_ram; 174 C_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.C_per_um; | 153 if (!dp.pure_cam) { 154 //RAM portion 155 c_w_metal = cell.w * g_tp.wire_local.C_per_um; 156 r_w_metal = cell.w * g_tp.wire_local.R_per_um; 157 C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w, 158 (g_tp.sram.b_w - 2 * 159 g_tp.sram.cell_a_w) / 2.0, false, 160 true) * 2 + 161 c_w_metal) * num_cols_fa_ram; 162 R_wl_ram = (r_w_metal) * num_cols_fa_ram; 163 } else { 164 C_wl_ram = R_wl_ram = 0; 165 } 166 C_wl = C_wl_cam + C_wl_ram; 167 C_wl += (16 + 128) * g_tp.wire_local.pitch * 168 g_tp.wire_local.C_per_um; |
175 | 169 |
176 R_wl = R_wl_cam + R_wl_ram; 177 R_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.R_per_um; | 170 R_wl = R_wl_cam + R_wl_ram; 171 R_wl += (16 + 128) * g_tp.wire_local.pitch * 172 g_tp.wire_local.R_per_um; |
178 | 173 |
179 //there are two ways to write to a FA, 180 //1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM; 181 //2) using separate wordline for read/write and search in RAM. 182 //We are using the second approach. | 174 //there are two ways to write to a FA, 175 //1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM; 176 //2) using separate wordline for read/write and search in RAM. 177 //We are using the second approach. |
183 | 178 |
184 //Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations. 185 C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um; 186 C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact 187 C_bl_cam = (num_rows+1) * (C_b_row_drain_C + C_b_metal); 188 //height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells 189 C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact 190 C_bl = (num_rows +1) * (C_b_row_drain_C + C_b_metal); | 179 //Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations. 180 C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um; 181 C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact 182 C_bl_cam = (num_rows + 1) * (C_b_row_drain_C + C_b_metal); 183 //height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells 184 C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact 185 C_bl = (num_rows + 1) * (C_b_row_drain_C + C_b_metal); |
191 | 186 |
192 } 193 } | 187 } 188 } |
194} 195 196 | 189} 190 191 |