subarray.cc revision 12429:beefb9f5f551
12SN/A/***************************************************************************** 21762SN/A * McPAT/CACTI 32SN/A * SOFTWARE LICENSE AGREEMENT 42SN/A * Copyright 2012 Hewlett-Packard Development Company, L.P. 52SN/A * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. 62SN/A * All Rights Reserved 72SN/A * 82SN/A * Redistribution and use in source and binary forms, with or without 92SN/A * modification, are permitted provided that the following conditions are 102SN/A * met: redistributions of source code must retain the above copyright 112SN/A * notice, this list of conditions and the following disclaimer; 122SN/A * redistributions in binary form must reproduce the above copyright 132SN/A * notice, this list of conditions and the following disclaimer in the 142SN/A * documentation and/or other materials provided with the distribution; 152SN/A * neither the name of the copyright holders nor the names of its 162SN/A * contributors may be used to endorse or promote products derived from 172SN/A * this software without specific prior written permission. 182SN/A 192SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 202SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 212SN/A * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 222SN/A * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 232SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 242SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 252SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 262SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 272665SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 282665SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 292SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 302SN/A * 312SN/A ***************************************************************************/ 322SN/A 332SN/A 342SN/A 3511264Sandreas.sandberg@arm.com 3611264Sandreas.sandberg@arm.com#include <cassert> 3711264Sandreas.sandberg@arm.com#include <cmath> 382SN/A#include <iostream> 392SN/A 402SN/A#include "subarray.h" 412SN/A 42146SN/ASubarray::Subarray(const DynamicParameter & dp_, bool is_fa_): 43146SN/A dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray), 44146SN/A num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray), 45146SN/A cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_) { 46146SN/A //num_cols=7; 478232SN/A //cout<<"num_cols ="<< num_cols <<endl; 488232SN/A if (!(is_fa || dp.pure_cam)) { 4911264Sandreas.sandberg@arm.com // ECC overhead 508706SN/A num_cols += (g_ip->add_ecc_b_ ? (int)ceil(num_cols / 512522SN/A num_bits_per_ecc_b_) : 0); 522SN/A uint32_t ram_num_cells_wl_stitching = 532SN/A (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ : 542SN/A (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_; 555034SN/A 565034SN/A area.h = cell.h * num_rows; 572SN/A 582SN/A area.w = cell.w * num_cols + 592SN/A ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead 602SN/A } else { //cam fa 612SN/A 622SN/A //should not add dummy row here since the dummy row do not need decoder 632SN/A if (is_fa) { // fully associative cache 642SN/A num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0; 652SN/A num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0); 662522SN/A num_cols = num_cols_fa_cam + num_cols_fa_ram; 672SN/A } else { 682SN/A num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0; 692SN/A num_cols_fa_ram = 0; 702SN/A num_cols = num_cols_fa_cam; 712SN/A } 722SN/A 732SN/A area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells 748852SN/A area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram 752522SN/A + ceil((num_cols_fa_cam + num_cols_fa_ram) / 762SN/A sram_num_cells_wl_stitching_) * 772SN/A g_tp.ram_wl_stitching_overhead_ 782522SN/A //the overhead for the NAND gate to connect the two halves 798994SN/A + 16 * g_tp.wire_local.pitch 802SN/A //the overhead for the drivers from matchline to wordline of RAM 812SN/A + 128 * g_tp.wire_local.pitch; 822SN/A } 832SN/A 842SN/A assert(area.h > 0); 852SN/A assert(area.w > 0); 862SN/A compute_C(); 872SN/A} 882SN/A 892SN/A 902SN/A 912SN/ASubarray::~Subarray() { 922SN/A} 932SN/A 942SN/A 952SN/A 964762SN/Adouble Subarray::get_total_cell_area() { 974762SN/A// return (is_fa==false? cell.get_area() * num_rows * num_cols 982SN/A// //: cam_cell.h*(num_rows+1)*(num_cols_fa_cam + sram_cell.get_area()*num_cols_fa_ram)); 995034SN/A// : cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)); 1002SN/A// //: cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam + sram_cell.get_area()*(num_rows+1)*num_cols_fa_ram);//for FA, this area does not include the dummy cells in SRAM arrays. 101 102 if (!(is_fa || dp.pure_cam)) 103 return (cell.get_area() * num_rows * num_cols); 104 else if (is_fa) { 105 //for FA, this area includes the dummy cells in SRAM arrays. 106 //return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)); 107 //cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl; 108 return (cam_cell.h * (num_rows + 1) * 109 (cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)); 110 } else { 111 return (cam_cell.get_area() * (num_rows + 1) * num_cols_fa_cam ); 112 } 113 114 115} 116 117 118 119void Subarray::compute_C() { 120 double c_w_metal = cell.w * g_tp.wire_local.C_per_um; 121 double r_w_metal = cell.w * g_tp.wire_local.R_per_um; 122 double C_b_metal = cell.h * g_tp.wire_local.C_per_um; 123 double C_b_row_drain_C; 124 125 if (dp.is_dram) { 126 C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols; 127 128 if (dp.ram_cell_tech_type == comm_dram) { 129 C_bl = num_rows * C_b_metal; 130 } else { 131 C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact 132 C_bl = num_rows * (C_b_row_drain_C + C_b_metal); 133 } 134 } else { 135 if (!(is_fa || dp.pure_cam)) { 136 C_wl = (gate_C_pass(g_tp.sram.cell_a_w, 137 (g_tp.sram.b_w - 2 * g_tp.sram.cell_a_w) / 2.0, 138 false, true) * 2 + 139 c_w_metal) * num_cols; 140 C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact 141 C_bl = num_rows * (C_b_row_drain_C + C_b_metal); 142 } else { 143 //Following is wordline not matchline 144 //CAM portion 145 c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um; 146 r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um; 147 C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w, 148 (g_tp.cam.b_w - 2 * g_tp.cam.cell_a_w) / 149 2.0, false, true) * 2 + 150 c_w_metal) * num_cols_fa_cam; 151 R_wl_cam = (r_w_metal) * num_cols_fa_cam; 152 153 if (!dp.pure_cam) { 154 //RAM portion 155 c_w_metal = cell.w * g_tp.wire_local.C_per_um; 156 r_w_metal = cell.w * g_tp.wire_local.R_per_um; 157 C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w, 158 (g_tp.sram.b_w - 2 * 159 g_tp.sram.cell_a_w) / 2.0, false, 160 true) * 2 + 161 c_w_metal) * num_cols_fa_ram; 162 R_wl_ram = (r_w_metal) * num_cols_fa_ram; 163 } else { 164 C_wl_ram = R_wl_ram = 0; 165 } 166 C_wl = C_wl_cam + C_wl_ram; 167 C_wl += (16 + 128) * g_tp.wire_local.pitch * 168 g_tp.wire_local.C_per_um; 169 170 R_wl = R_wl_cam + R_wl_ram; 171 R_wl += (16 + 128) * g_tp.wire_local.pitch * 172 g_tp.wire_local.R_per_um; 173 174 //there are two ways to write to a FA, 175 //1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM; 176 //2) using separate wordline for read/write and search in RAM. 177 //We are using the second approach. 178 179 //Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations. 180 C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um; 181 C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact 182 C_bl_cam = (num_rows + 1) * (C_b_row_drain_C + C_b_metal); 183 //height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells 184 C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact 185 C_bl = (num_rows + 1) * (C_b_row_drain_C + C_b_metal); 186 187 } 188 } 189} 190 191 192