4a5
> * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
28c29
< * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
---
> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42,53c43,54
< dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
< num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
< cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_)
< {
< //num_cols=7;
< //cout<<"num_cols ="<< num_cols <<endl;
< if (!(is_fa || dp.pure_cam))
< {
< num_cols +=(g_ip->add_ecc_b_ ? (int)ceil(num_cols / num_bits_per_ecc_b_) : 0); // ECC overhead
< uint32_t ram_num_cells_wl_stitching =
< (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
< (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
---
> dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
> num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
> cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_) {
> //num_cols=7;
> //cout<<"num_cols ="<< num_cols <<endl;
> if (!(is_fa || dp.pure_cam)) {
> // ECC overhead
> num_cols += (g_ip->add_ecc_b_ ? (int)ceil(num_cols /
> num_bits_per_ecc_b_) : 0);
> uint32_t ram_num_cells_wl_stitching =
> (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
> (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
55c56
< area.h = cell.h * num_rows;
---
> area.h = cell.h * num_rows;
57,61c58,60
< area.w = cell.w * num_cols +
< ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
< }
< else //cam fa
< {
---
> area.w = cell.w * num_cols +
> ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
> } else { //cam fa
63,75c62,71
< //should not add dummy row here since the dummy row do not need decoder
< if (is_fa)// fully associative cache
< {
< num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
< num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
< num_cols = num_cols_fa_cam + num_cols_fa_ram;
< }
< else
< {
< num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
< num_cols_fa_ram = 0;
< num_cols = num_cols_fa_cam;
< }
---
> //should not add dummy row here since the dummy row do not need decoder
> if (is_fa) { // fully associative cache
> num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
> num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
> num_cols = num_cols_fa_cam + num_cols_fa_ram;
> } else {
> num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
> num_cols_fa_ram = 0;
> num_cols = num_cols_fa_cam;
> }
77,82c73,82
< area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
< area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
< + ceil((num_cols_fa_cam + num_cols_fa_ram) / sram_num_cells_wl_stitching_)*g_tp.ram_wl_stitching_overhead_
< + 16*g_tp.wire_local.pitch //the overhead for the NAND gate to connect the two halves
< + 128*g_tp.wire_local.pitch;//the overhead for the drivers from matchline to wordline of RAM
< }
---
> area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
> area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
> + ceil((num_cols_fa_cam + num_cols_fa_ram) /
> sram_num_cells_wl_stitching_) *
> g_tp.ram_wl_stitching_overhead_
> //the overhead for the NAND gate to connect the two halves
> + 16 * g_tp.wire_local.pitch
> //the overhead for the drivers from matchline to wordline of RAM
> + 128 * g_tp.wire_local.pitch;
> }
84,86c84,86
< assert(area.h>0);
< assert(area.w>0);
< compute_C();
---
> assert(area.h > 0);
> assert(area.w > 0);
> compute_C();
91,92c91
< Subarray::~Subarray()
< {
---
> Subarray::~Subarray() {
97,98c96
< double Subarray::get_total_cell_area()
< {
---
> double Subarray::get_total_cell_area() {
105,110c103,111
< return (cell.get_area() * num_rows * num_cols);
< else if (is_fa)
< { //for FA, this area includes the dummy cells in SRAM arrays.
< //return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
< //cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl;
< return (cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
---
> return (cell.get_area() * num_rows * num_cols);
> else if (is_fa) {
> //for FA, this area includes the dummy cells in SRAM arrays.
> //return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
> //cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl;
> return (cam_cell.h * (num_rows + 1) *
> (cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
> } else {
> return (cam_cell.get_area() * (num_rows + 1) * num_cols_fa_cam );
112,113d112
< else
< return (cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam );
120,125c119,123
< void Subarray::compute_C()
< {
< double c_w_metal = cell.w * g_tp.wire_local.C_per_um;
< double r_w_metal = cell.w * g_tp.wire_local.R_per_um;
< double C_b_metal = cell.h * g_tp.wire_local.C_per_um;
< double C_b_row_drain_C;
---
> void Subarray::compute_C() {
> double c_w_metal = cell.w * g_tp.wire_local.C_per_um;
> double r_w_metal = cell.w * g_tp.wire_local.R_per_um;
> double C_b_metal = cell.h * g_tp.wire_local.C_per_um;
> double C_b_row_drain_C;
127,129c125,126
< if (dp.is_dram)
< {
< C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols;
---
> if (dp.is_dram) {
> C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols;
131,158c128,151
< if (dp.ram_cell_tech_type == comm_dram)
< {
< C_bl = num_rows * C_b_metal;
< }
< else
< {
< C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact
< C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
< }
< }
< else
< {
< if (!(is_fa ||dp.pure_cam))
< {
< C_wl = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
< c_w_metal) * num_cols;
< C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
< C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
< }
< else
< {
< //Following is wordline not matchline
< //CAM portion
< c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um;
< r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um;
< C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w, (g_tp.cam.b_w-2*g_tp.cam.cell_a_w)/2.0, false, true)*2 +
< c_w_metal) * num_cols_fa_cam;
< R_wl_cam = (r_w_metal) * num_cols_fa_cam;
---
> if (dp.ram_cell_tech_type == comm_dram) {
> C_bl = num_rows * C_b_metal;
> } else {
> C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact
> C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
> }
> } else {
> if (!(is_fa || dp.pure_cam)) {
> C_wl = (gate_C_pass(g_tp.sram.cell_a_w,
> (g_tp.sram.b_w - 2 * g_tp.sram.cell_a_w) / 2.0,
> false, true) * 2 +
> c_w_metal) * num_cols;
> C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
> C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
> } else {
> //Following is wordline not matchline
> //CAM portion
> c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um;
> r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um;
> C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w,
> (g_tp.cam.b_w - 2 * g_tp.cam.cell_a_w) /
> 2.0, false, true) * 2 +
> c_w_metal) * num_cols_fa_cam;
> R_wl_cam = (r_w_metal) * num_cols_fa_cam;
160,174c153,168
< if (!dp.pure_cam)
< {
< //RAM portion
< c_w_metal = cell.w * g_tp.wire_local.C_per_um;
< r_w_metal = cell.w * g_tp.wire_local.R_per_um;
< C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
< c_w_metal) * num_cols_fa_ram;
< R_wl_ram = (r_w_metal) * num_cols_fa_ram;
< }
< else
< {
< C_wl_ram = R_wl_ram =0;
< }
< C_wl = C_wl_cam + C_wl_ram;
< C_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.C_per_um;
---
> if (!dp.pure_cam) {
> //RAM portion
> c_w_metal = cell.w * g_tp.wire_local.C_per_um;
> r_w_metal = cell.w * g_tp.wire_local.R_per_um;
> C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w,
> (g_tp.sram.b_w - 2 *
> g_tp.sram.cell_a_w) / 2.0, false,
> true) * 2 +
> c_w_metal) * num_cols_fa_ram;
> R_wl_ram = (r_w_metal) * num_cols_fa_ram;
> } else {
> C_wl_ram = R_wl_ram = 0;
> }
> C_wl = C_wl_cam + C_wl_ram;
> C_wl += (16 + 128) * g_tp.wire_local.pitch *
> g_tp.wire_local.C_per_um;
176,177c170,172
< R_wl = R_wl_cam + R_wl_ram;
< R_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.R_per_um;
---
> R_wl = R_wl_cam + R_wl_ram;
> R_wl += (16 + 128) * g_tp.wire_local.pitch *
> g_tp.wire_local.R_per_um;
179,182c174,177
< //there are two ways to write to a FA,
< //1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM;
< //2) using separate wordline for read/write and search in RAM.
< //We are using the second approach.
---
> //there are two ways to write to a FA,
> //1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM;
> //2) using separate wordline for read/write and search in RAM.
> //We are using the second approach.
184,190c179,185
< //Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations.
< C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um;
< C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact
< C_bl_cam = (num_rows+1) * (C_b_row_drain_C + C_b_metal);
< //height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
< C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
< C_bl = (num_rows +1) * (C_b_row_drain_C + C_b_metal);
---
> //Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations.
> C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um;
> C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact
> C_bl_cam = (num_rows + 1) * (C_b_row_drain_C + C_b_metal);
> //height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
> C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
> C_bl = (num_rows + 1) * (C_b_row_drain_C + C_b_metal);
192,193c187,188
< }
< }
---
> }
> }