subarray.cc (10152:52c552138ba1) subarray.cc (10234:5cb711fa6176)
1/*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
1/*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
5 * All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the

--- 7 unchanged lines hidden (view full) ---

20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
6 * All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the

--- 7 unchanged lines hidden (view full) ---

21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 ***************************************************************************/
31
32
33
34
35#include <cassert>
36#include <cmath>
37#include <iostream>
38
39#include "subarray.h"
40
41Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_):
30 *
31 ***************************************************************************/
32
33
34
35
36#include <cassert>
37#include <cmath>
38#include <iostream>
39
40#include "subarray.h"
41
42Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_):
42 dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
43 num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
44 cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_)
45{
46 //num_cols=7;
47 //cout<<"num_cols ="<< num_cols <<endl;
48 if (!(is_fa || dp.pure_cam))
49 {
50 num_cols +=(g_ip->add_ecc_b_ ? (int)ceil(num_cols / num_bits_per_ecc_b_) : 0); // ECC overhead
51 uint32_t ram_num_cells_wl_stitching =
52 (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
53 (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
43 dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
44 num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
45 cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_) {
46 //num_cols=7;
47 //cout<<"num_cols ="<< num_cols <<endl;
48 if (!(is_fa || dp.pure_cam)) {
49 // ECC overhead
50 num_cols += (g_ip->add_ecc_b_ ? (int)ceil(num_cols /
51 num_bits_per_ecc_b_) : 0);
52 uint32_t ram_num_cells_wl_stitching =
53 (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
54 (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
54
55
55 area.h = cell.h * num_rows;
56 area.h = cell.h * num_rows;
56
57
57 area.w = cell.w * num_cols +
58 ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
59 }
60 else //cam fa
61 {
58 area.w = cell.w * num_cols +
59 ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
60 } else { //cam fa
62
61
63 //should not add dummy row here since the dummy row do not need decoder
64 if (is_fa)// fully associative cache
65 {
66 num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
67 num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
68 num_cols = num_cols_fa_cam + num_cols_fa_ram;
69 }
70 else
71 {
72 num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
73 num_cols_fa_ram = 0;
74 num_cols = num_cols_fa_cam;
75 }
62 //should not add dummy row here since the dummy row do not need decoder
63 if (is_fa) { // fully associative cache
64 num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
65 num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
66 num_cols = num_cols_fa_cam + num_cols_fa_ram;
67 } else {
68 num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
69 num_cols_fa_ram = 0;
70 num_cols = num_cols_fa_cam;
71 }
76
72
77 area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
78 area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
79 + ceil((num_cols_fa_cam + num_cols_fa_ram) / sram_num_cells_wl_stitching_)*g_tp.ram_wl_stitching_overhead_
80 + 16*g_tp.wire_local.pitch //the overhead for the NAND gate to connect the two halves
81 + 128*g_tp.wire_local.pitch;//the overhead for the drivers from matchline to wordline of RAM
82 }
73 area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
74 area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
75 + ceil((num_cols_fa_cam + num_cols_fa_ram) /
76 sram_num_cells_wl_stitching_) *
77 g_tp.ram_wl_stitching_overhead_
78 //the overhead for the NAND gate to connect the two halves
79 + 16 * g_tp.wire_local.pitch
80 //the overhead for the drivers from matchline to wordline of RAM
81 + 128 * g_tp.wire_local.pitch;
82 }
83
83
84 assert(area.h>0);
85 assert(area.w>0);
86 compute_C();
84 assert(area.h > 0);
85 assert(area.w > 0);
86 compute_C();
87}
88
89
90
87}
88
89
90
91Subarray::~Subarray()
92{
91Subarray::~Subarray() {
93}
94
95
96
92}
93
94
95
97double Subarray::get_total_cell_area()
98{
96double Subarray::get_total_cell_area() {
99// return (is_fa==false? cell.get_area() * num_rows * num_cols
100// //: cam_cell.h*(num_rows+1)*(num_cols_fa_cam + sram_cell.get_area()*num_cols_fa_ram));
101// : cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
102// //: cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam + sram_cell.get_area()*(num_rows+1)*num_cols_fa_ram);//for FA, this area does not include the dummy cells in SRAM arrays.
103
104 if (!(is_fa || dp.pure_cam))
97// return (is_fa==false? cell.get_area() * num_rows * num_cols
98// //: cam_cell.h*(num_rows+1)*(num_cols_fa_cam + sram_cell.get_area()*num_cols_fa_ram));
99// : cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
100// //: cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam + sram_cell.get_area()*(num_rows+1)*num_cols_fa_ram);//for FA, this area does not include the dummy cells in SRAM arrays.
101
102 if (!(is_fa || dp.pure_cam))
105 return (cell.get_area() * num_rows * num_cols);
106 else if (is_fa)
107 { //for FA, this area includes the dummy cells in SRAM arrays.
108 //return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
109 //cout<<"diff" <110 return (cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
103 return (cell.get_area() * num_rows * num_cols);
104 else if (is_fa) {
105 //for FA, this area includes the dummy cells in SRAM arrays.
106 //return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
107 //cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl;
108 return (cam_cell.h * (num_rows + 1) *
109 (cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
110 } else {
111 return (cam_cell.get_area() * (num_rows + 1) * num_cols_fa_cam );
111 }
112 }
112 else
113 return (cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam );
114
115
116}
117
118
119
113
114
115}
116
117
118
120void Subarray::compute_C()
121{
122 double c_w_metal = cell.w * g_tp.wire_local.C_per_um;
123 double r_w_metal = cell.w * g_tp.wire_local.R_per_um;
124 double C_b_metal = cell.h * g_tp.wire_local.C_per_um;
125 double C_b_row_drain_C;
119void Subarray::compute_C() {
120 double c_w_metal = cell.w * g_tp.wire_local.C_per_um;
121 double r_w_metal = cell.w * g_tp.wire_local.R_per_um;
122 double C_b_metal = cell.h * g_tp.wire_local.C_per_um;
123 double C_b_row_drain_C;
126
124
127 if (dp.is_dram)
128 {
129 C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols;
125 if (dp.is_dram) {
126 C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols;
130
127
131 if (dp.ram_cell_tech_type == comm_dram)
132 {
133 C_bl = num_rows * C_b_metal;
134 }
135 else
136 {
137 C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact
138 C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
139 }
140 }
141 else
142 {
143 if (!(is_fa ||dp.pure_cam))
144 {
145 C_wl = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
146 c_w_metal) * num_cols;
147 C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
148 C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
149 }
150 else
151 {
152 //Following is wordline not matchline
153 //CAM portion
154 c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um;
155 r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um;
156 C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w, (g_tp.cam.b_w-2*g_tp.cam.cell_a_w)/2.0, false, true)*2 +
157 c_w_metal) * num_cols_fa_cam;
158 R_wl_cam = (r_w_metal) * num_cols_fa_cam;
128 if (dp.ram_cell_tech_type == comm_dram) {
129 C_bl = num_rows * C_b_metal;
130 } else {
131 C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact
132 C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
133 }
134 } else {
135 if (!(is_fa || dp.pure_cam)) {
136 C_wl = (gate_C_pass(g_tp.sram.cell_a_w,
137 (g_tp.sram.b_w - 2 * g_tp.sram.cell_a_w) / 2.0,
138 false, true) * 2 +
139 c_w_metal) * num_cols;
140 C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
141 C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
142 } else {
143 //Following is wordline not matchline
144 //CAM portion
145 c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um;
146 r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um;
147 C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w,
148 (g_tp.cam.b_w - 2 * g_tp.cam.cell_a_w) /
149 2.0, false, true) * 2 +
150 c_w_metal) * num_cols_fa_cam;
151 R_wl_cam = (r_w_metal) * num_cols_fa_cam;
159
152
160 if (!dp.pure_cam)
161 {
162 //RAM portion
163 c_w_metal = cell.w * g_tp.wire_local.C_per_um;
164 r_w_metal = cell.w * g_tp.wire_local.R_per_um;
165 C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
166 c_w_metal) * num_cols_fa_ram;
167 R_wl_ram = (r_w_metal) * num_cols_fa_ram;
168 }
169 else
170 {
171 C_wl_ram = R_wl_ram =0;
172 }
173 C_wl = C_wl_cam + C_wl_ram;
174 C_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.C_per_um;
153 if (!dp.pure_cam) {
154 //RAM portion
155 c_w_metal = cell.w * g_tp.wire_local.C_per_um;
156 r_w_metal = cell.w * g_tp.wire_local.R_per_um;
157 C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w,
158 (g_tp.sram.b_w - 2 *
159 g_tp.sram.cell_a_w) / 2.0, false,
160 true) * 2 +
161 c_w_metal) * num_cols_fa_ram;
162 R_wl_ram = (r_w_metal) * num_cols_fa_ram;
163 } else {
164 C_wl_ram = R_wl_ram = 0;
165 }
166 C_wl = C_wl_cam + C_wl_ram;
167 C_wl += (16 + 128) * g_tp.wire_local.pitch *
168 g_tp.wire_local.C_per_um;
175
169
176 R_wl = R_wl_cam + R_wl_ram;
177 R_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.R_per_um;
170 R_wl = R_wl_cam + R_wl_ram;
171 R_wl += (16 + 128) * g_tp.wire_local.pitch *
172 g_tp.wire_local.R_per_um;
178
173
179 //there are two ways to write to a FA,
180 //1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM;
181 //2) using separate wordline for read/write and search in RAM.
182 //We are using the second approach.
174 //there are two ways to write to a FA,
175 //1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM;
176 //2) using separate wordline for read/write and search in RAM.
177 //We are using the second approach.
183
178
184 //Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations.
185 C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um;
186 C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact
187 C_bl_cam = (num_rows+1) * (C_b_row_drain_C + C_b_metal);
188 //height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
189 C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
190 C_bl = (num_rows +1) * (C_b_row_drain_C + C_b_metal);
179 //Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations.
180 C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um;
181 C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact
182 C_bl_cam = (num_rows + 1) * (C_b_row_drain_C + C_b_metal);
183 //height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
184 C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
185 C_bl = (num_rows + 1) * (C_b_row_drain_C + C_b_metal);
191
186
192 }
193 }
187 }
188 }
194}
195
196
189}
190
191