FaultModel.cc (8946:fb6c89334b86) FaultModel.cc (11320:42ecb523c64a)
1/*
2 * Copyright (c) 2011 Massachusetts Institute of Technology
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;

--- 17 unchanged lines hidden (view full) ---

26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Konstantinos Aisopos
29 */
30
31/*
32 * Official Tool Website: www.mit.edu/~kaisopos/FaultModel
33 *
1/*
2 * Copyright (c) 2011 Massachusetts Institute of Technology
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;

--- 17 unchanged lines hidden (view full) ---

26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Konstantinos Aisopos
29 */
30
31/*
32 * Official Tool Website: www.mit.edu/~kaisopos/FaultModel
33 *
34 * If you use our tool for academic research, we request that you cite:
34 * If you use our tool for academic research, we request that you cite:
35 * Konstantinos Aisopos, Chia-Hsin Owen Chen, and Li-Shiuan Peh. Enabling
36 * System-Level Modeling of Variation-Induced Faults in Networks-on-Chip.
37 * Proceedings of the 48th Design Automation Conference (DAC'11)
38 */
39
40// C++ includes
41#include <cassert>
42#include <fstream>
43#include <iostream>
44#include <vector>
45
46// GEM5 includes
47#include "FaultModel.hh"
35 * Konstantinos Aisopos, Chia-Hsin Owen Chen, and Li-Shiuan Peh. Enabling
36 * System-Level Modeling of Variation-Induced Faults in Networks-on-Chip.
37 * Proceedings of the 48th Design Automation Conference (DAC'11)
38 */
39
40// C++ includes
41#include <cassert>
42#include <fstream>
43#include <iostream>
44#include <vector>
45
46// GEM5 includes
47#include "FaultModel.hh"
48#include "base/misc.hh"
48#include "base/misc.hh"
49
50using namespace std;
51
52#define MAX(a,b) ((a > b) ? (a) : (b))
53
54
55FaultModel::FaultModel(const Params *p) : SimObject(p)
56{
57 // read configurations into "configurations" vector
49
50using namespace std;
51
52#define MAX(a,b) ((a > b) ? (a) : (b))
53
54
55FaultModel::FaultModel(const Params *p) : SimObject(p)
56{
57 // read configurations into "configurations" vector
58 // format: <buff/vc> <vcs> <10 fault types>
58 // format: <10 fault types>
59 bool more_records = true;
60 for (int i = 0; more_records; i += (fields_per_conf_record)){
61 system_conf configuration;
59 bool more_records = true;
60 for (int i = 0; more_records; i += (fields_per_conf_record)){
61 system_conf configuration;
62 configuration.buff_per_vc =
62 configuration.buff_per_vc =
63 p->baseline_fault_vector_database[i + conf_record_buff_per_vc];
63 p->baseline_fault_vector_database[i + conf_record_buff_per_vc];
64 configuration.vcs =
64 configuration.vcs =
65 p->baseline_fault_vector_database[i + conf_record_vcs];
65 p->baseline_fault_vector_database[i + conf_record_vcs];
66 for (int fault_index = 0; fault_index < number_of_fault_types;
66 for (int fault_index = 0; fault_index < number_of_fault_types;
67 fault_index++){
67 fault_index++){
68 configuration.fault_type[fault_index] =
69 p->baseline_fault_vector_database[i +
68 configuration.fault_type[fault_index] =
69 p->baseline_fault_vector_database[i +
70 conf_record_first_fault_type + fault_index] / 100;
71 }
72 configurations.push_back(configuration);
73 if (p->baseline_fault_vector_database[i+fields_per_conf_record] < 0){
74 more_records = false;
75 }
76 }
77
78 // read temperature weights into "temperature_weights" vector
79 // format: <temperature> <weight>
80 more_records = true;
81 for (int i = 0; more_records; i += (fields_per_temperature_record)){
70 conf_record_first_fault_type + fault_index] / 100;
71 }
72 configurations.push_back(configuration);
73 if (p->baseline_fault_vector_database[i+fields_per_conf_record] < 0){
74 more_records = false;
75 }
76 }
77
78 // read temperature weights into "temperature_weights" vector
79 // format: <temperature> <weight>
80 more_records = true;
81 for (int i = 0; more_records; i += (fields_per_temperature_record)){
82 int record_temperature =
82 int record_temperature =
83 p->temperature_weights_database[i + temperature_record_temp];
84 int record_weight =
85 p->temperature_weights_database[i + temperature_record_weight];
86 static int first_record = true;
87 if (first_record){
83 p->temperature_weights_database[i + temperature_record_temp];
84 int record_weight =
85 p->temperature_weights_database[i + temperature_record_weight];
86 static int first_record = true;
87 if (first_record){
88 for (int temperature = 0; temperature < record_temperature;
88 for (int temperature = 0; temperature < record_temperature;
89 temperature++){
90 temperature_weights.push_back(0);
91 }
92 first_record = false;
93 }
94 assert(record_temperature == temperature_weights.size());
95 temperature_weights.push_back(record_weight);
89 temperature++){
90 temperature_weights.push_back(0);
91 }
92 first_record = false;
93 }
94 assert(record_temperature == temperature_weights.size());
95 temperature_weights.push_back(record_weight);
96 if (p->temperature_weights_database[i +
96 if (p->temperature_weights_database[i +
97 fields_per_temperature_record] < 0){
98 more_records = false;
99 }
100 }
101}
102
103string
104FaultModel::fault_type_to_string(int ft)

--- 21 unchanged lines hidden (view full) ---

126 } else if (ft == number_of_fault_types){
127 return "none";
128 } else {
129 return "none";
130 }
131}
132
133
97 fields_per_temperature_record] < 0){
98 more_records = false;
99 }
100 }
101}
102
103string
104FaultModel::fault_type_to_string(int ft)

--- 21 unchanged lines hidden (view full) ---

126 } else if (ft == number_of_fault_types){
127 return "none";
128 } else {
129 return "none";
130 }
131}
132
133
134int
135FaultModel::declare_router(int number_of_inputs,
136 int number_of_outputs,
137 int number_of_vcs_per_input,
138 int number_of_buff_per_data_vc,
134int
135FaultModel::declare_router(int number_of_inputs,
136 int number_of_outputs,
137 int number_of_vcs_per_input,
138 int number_of_buff_per_data_vc,
139 int number_of_buff_per_ctrl_vc)
140{
141 // check inputs (are they legal?)
142 if (number_of_inputs <= 0 || number_of_outputs <= 0 ||
139 int number_of_buff_per_ctrl_vc)
140{
141 // check inputs (are they legal?)
142 if (number_of_inputs <= 0 || number_of_outputs <= 0 ||
143 number_of_vcs_per_input <= 0 || number_of_buff_per_data_vc <= 0 ||
143 number_of_vcs_per_input <= 0 || number_of_buff_per_data_vc <= 0 ||
144 number_of_buff_per_ctrl_vc <= 0){
145 fatal("Fault Model: ERROR in argument of FaultModel_declare_router!");
146 }
147 int number_of_buffers_per_vc = MAX(number_of_buff_per_data_vc,
148 number_of_buff_per_ctrl_vc);
149 int total_vcs = number_of_inputs * number_of_vcs_per_input;
150 if (total_vcs > MAX_VCs){
151 fatal("Fault Model: ERROR! Number inputs*VCs (MAX_VCs) unsupported");
152 }
153 if (number_of_buffers_per_vc > MAX_BUFFERS_per_VC){
154 fatal("Fault Model: ERROR! buffers/VC (MAX_BUFFERS_per_VC) too high");
155 }
156
157 // link the router to a DB record
158 int record_hit = -1;
144 number_of_buff_per_ctrl_vc <= 0){
145 fatal("Fault Model: ERROR in argument of FaultModel_declare_router!");
146 }
147 int number_of_buffers_per_vc = MAX(number_of_buff_per_data_vc,
148 number_of_buff_per_ctrl_vc);
149 int total_vcs = number_of_inputs * number_of_vcs_per_input;
150 if (total_vcs > MAX_VCs){
151 fatal("Fault Model: ERROR! Number inputs*VCs (MAX_VCs) unsupported");
152 }
153 if (number_of_buffers_per_vc > MAX_BUFFERS_per_VC){
154 fatal("Fault Model: ERROR! buffers/VC (MAX_BUFFERS_per_VC) too high");
155 }
156
157 // link the router to a DB record
158 int record_hit = -1;
159 for (int record = 0; record < configurations.size(); record++){
159 for (int record = 0; record < configurations.size(); record++){
160 if ((configurations[record].buff_per_vc == number_of_buffers_per_vc)&&
161 (configurations[record].vcs == total_vcs)){
162 record_hit = record;
163 }
164 }
165 if (record_hit == -1){
166 panic("Fault Model: ERROR! configuration not found in DB. BUG?");
167 }
168
169 // remember the router and return its ID
170 routers.push_back(configurations[record_hit]);
171 static int router_index = 0;
172 return router_index++;
173}
174
160 if ((configurations[record].buff_per_vc == number_of_buffers_per_vc)&&
161 (configurations[record].vcs == total_vcs)){
162 record_hit = record;
163 }
164 }
165 if (record_hit == -1){
166 panic("Fault Model: ERROR! configuration not found in DB. BUG?");
167 }
168
169 // remember the router and return its ID
170 routers.push_back(configurations[record_hit]);
171 static int router_index = 0;
172 return router_index++;
173}
174
175bool
176FaultModel::fault_vector(int routerID,
175bool
176FaultModel::fault_vector(int routerID,
177 int temperature_input,
178 float fault_vector[])
179{
180 bool ok = true;
181
182 // is the routerID recorded?
183 if (routerID < 0 || routerID >= ((int) routers.size())){
184 warn("Fault Model: ERROR! unknown router ID argument.");
177 int temperature_input,
178 float fault_vector[])
179{
180 bool ok = true;
181
182 // is the routerID recorded?
183 if (routerID < 0 || routerID >= ((int) routers.size())){
184 warn("Fault Model: ERROR! unknown router ID argument.");
185 fatal("Fault Model: Did you enable the fault model flag)?");
185 fatal("Fault Model: Did you enable the fault model flag)?");
186 }
187
188 // is the temperature too high/too low?
189 int temperature = temperature_input;
190 if (temperature_input >= ((int) temperature_weights.size())){
191 ok = false;
192 warn_once("Fault Model: Temperature exceeded simulated upper bound.");
193 warn_once("Fault Model: The fault model is not accurate any more.");
194 temperature = (temperature_weights.size() - 1);
195 } else if (temperature_input < 0){
196 ok = false;
197 warn_once("Fault Model: Temperature exceeded simulated lower bound.");
198 warn_once("Fault Model: The fault model is not accurate any more.");
199 temperature = 0;
200 }
201
202 // recover the router record and return its fault vector
203 for (int i = 0; i < number_of_fault_types; i++){
186 }
187
188 // is the temperature too high/too low?
189 int temperature = temperature_input;
190 if (temperature_input >= ((int) temperature_weights.size())){
191 ok = false;
192 warn_once("Fault Model: Temperature exceeded simulated upper bound.");
193 warn_once("Fault Model: The fault model is not accurate any more.");
194 temperature = (temperature_weights.size() - 1);
195 } else if (temperature_input < 0){
196 ok = false;
197 warn_once("Fault Model: Temperature exceeded simulated lower bound.");
198 warn_once("Fault Model: The fault model is not accurate any more.");
199 temperature = 0;
200 }
201
202 // recover the router record and return its fault vector
203 for (int i = 0; i < number_of_fault_types; i++){
204 fault_vector[i] = routers[routerID].fault_type[i] *
204 fault_vector[i] = routers[routerID].fault_type[i] *
205 ((float)temperature_weights[temperature]);
206 }
207 return ok;
208}
209
205 ((float)temperature_weights[temperature]);
206 }
207 return ok;
208}
209
210bool
211FaultModel::fault_prob(int routerID,
210bool
211FaultModel::fault_prob(int routerID,
212 int temperature_input,
213 float *aggregate_fault_prob)
214{
215 *aggregate_fault_prob = 1.0;
216 bool ok = true;
217
218 // is the routerID recorded?
219 if (routerID < 0 || routerID >= ((int) routers.size())){
220 warn("Fault Model: ERROR! unknown router ID argument.");
212 int temperature_input,
213 float *aggregate_fault_prob)
214{
215 *aggregate_fault_prob = 1.0;
216 bool ok = true;
217
218 // is the routerID recorded?
219 if (routerID < 0 || routerID >= ((int) routers.size())){
220 warn("Fault Model: ERROR! unknown router ID argument.");
221 fatal("Fault Model: Did you enable the fault model flag)?");
221 fatal("Fault Model: Did you enable the fault model flag)?");
222 }
223
224 // is the temperature too high/too low?
225 int temperature = temperature_input;
226 if (temperature_input >= ((int) temperature_weights.size()) ){
227 ok = false;
228 warn_once("Fault Model: Temperature exceeded simulated upper bound.");
229 warn_once("Fault Model: The fault model is not accurate any more.");
230 temperature = (temperature_weights.size()-1);
231 } else if (temperature_input < 0){
232 ok = false;
233 warn_once("Fault Model: Temperature exceeded simulated lower bound.");
234 warn_once("Fault Model: The fault model is not accurate any more.");
235 temperature = 0;
236 }
237
238 // recover the router record and return its aggregate fault probability
239 for (int i = 0; i < number_of_fault_types; i++){
222 }
223
224 // is the temperature too high/too low?
225 int temperature = temperature_input;
226 if (temperature_input >= ((int) temperature_weights.size()) ){
227 ok = false;
228 warn_once("Fault Model: Temperature exceeded simulated upper bound.");
229 warn_once("Fault Model: The fault model is not accurate any more.");
230 temperature = (temperature_weights.size()-1);
231 } else if (temperature_input < 0){
232 ok = false;
233 warn_once("Fault Model: Temperature exceeded simulated lower bound.");
234 warn_once("Fault Model: The fault model is not accurate any more.");
235 temperature = 0;
236 }
237
238 // recover the router record and return its aggregate fault probability
239 for (int i = 0; i < number_of_fault_types; i++){
240 *aggregate_fault_prob= *aggregate_fault_prob *
241 ( 1.0 - (routers[routerID].fault_type[i] *
240 *aggregate_fault_prob= *aggregate_fault_prob *
241 ( 1.0 - (routers[routerID].fault_type[i] *
242 ((float)temperature_weights[temperature])) );
243 }
244 *aggregate_fault_prob = 1.0 - *aggregate_fault_prob;
245 return ok;
246}
247
248// this function is used only for debugging purposes
242 ((float)temperature_weights[temperature])) );
243 }
244 *aggregate_fault_prob = 1.0 - *aggregate_fault_prob;
245 return ok;
246}
247
248// this function is used only for debugging purposes
249void
249void
250FaultModel::print(void)
251{
252 cout << "--- PRINTING configurations ---\n";
253 for (int record = 0; record < configurations.size(); record++){
254 cout << "(" << record << ") ";
255 cout << "VCs=" << configurations[record].vcs << " ";
256 cout << "Buff/VC=" << configurations[record].buff_per_vc << " [";
250FaultModel::print(void)
251{
252 cout << "--- PRINTING configurations ---\n";
253 for (int record = 0; record < configurations.size(); record++){
254 cout << "(" << record << ") ";
255 cout << "VCs=" << configurations[record].vcs << " ";
256 cout << "Buff/VC=" << configurations[record].buff_per_vc << " [";
257 for (int fault_type_num = 0;
258 fault_type_num < number_of_fault_types;
257 for (int fault_type_num = 0;
258 fault_type_num < number_of_fault_types;
259 fault_type_num++){
259 fault_type_num++){
260 cout << (100 * configurations[record].fault_type[fault_type_num]);
260 cout << (100 * configurations[record].fault_type[fault_type_num]);
261 cout << "% ";
262 }
263 cout << "]\n";
264 }
265 cout << "--- PRINTING temperature weights ---\n";
266 for (int record = 0; record < temperature_weights.size(); record++){
267 cout << "temperature=" << record << " => ";
268 cout << "weight=" << temperature_weights[record];
269 cout << "\n";
270 }
271}
272
273FaultModel *
274FaultModelParams::create()
275{
276 return new FaultModel(this);
277}
261 cout << "% ";
262 }
263 cout << "]\n";
264 }
265 cout << "--- PRINTING temperature weights ---\n";
266 for (int record = 0; record < temperature_weights.size(); record++){
267 cout << "temperature=" << record << " => ";
268 cout << "weight=" << temperature_weights[record];
269 cout << "\n";
270 }
271}
272
273FaultModel *
274FaultModelParams::create()
275{
276 return new FaultModel(this);
277}