FaultModel.cc revision 8946:fb6c89334b86
1/* 2 * Copyright (c) 2011 Massachusetts Institute of Technology 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Konstantinos Aisopos 29 */ 30 31/* 32 * Official Tool Website: www.mit.edu/~kaisopos/FaultModel 33 * 34 * If you use our tool for academic research, we request that you cite: 35 * Konstantinos Aisopos, Chia-Hsin Owen Chen, and Li-Shiuan Peh. Enabling 36 * System-Level Modeling of Variation-Induced Faults in Networks-on-Chip. 37 * Proceedings of the 48th Design Automation Conference (DAC'11) 38 */ 39 40// C++ includes 41#include <cassert> 42#include <fstream> 43#include <iostream> 44#include <vector> 45 46// GEM5 includes 47#include "FaultModel.hh" 48#include "base/misc.hh" 49 50using namespace std; 51 52#define MAX(a,b) ((a > b) ? (a) : (b)) 53 54 55FaultModel::FaultModel(const Params *p) : SimObject(p) 56{ 57 // read configurations into "configurations" vector 58 // format: <buff/vc> <vcs> <10 fault types> 59 bool more_records = true; 60 for (int i = 0; more_records; i += (fields_per_conf_record)){ 61 system_conf configuration; 62 configuration.buff_per_vc = 63 p->baseline_fault_vector_database[i + conf_record_buff_per_vc]; 64 configuration.vcs = 65 p->baseline_fault_vector_database[i + conf_record_vcs]; 66 for (int fault_index = 0; fault_index < number_of_fault_types; 67 fault_index++){ 68 configuration.fault_type[fault_index] = 69 p->baseline_fault_vector_database[i + 70 conf_record_first_fault_type + fault_index] / 100; 71 } 72 configurations.push_back(configuration); 73 if (p->baseline_fault_vector_database[i+fields_per_conf_record] < 0){ 74 more_records = false; 75 } 76 } 77 78 // read temperature weights into "temperature_weights" vector 79 // format: <temperature> <weight> 80 more_records = true; 81 for (int i = 0; more_records; i += (fields_per_temperature_record)){ 82 int record_temperature = 83 p->temperature_weights_database[i + temperature_record_temp]; 84 int record_weight = 85 p->temperature_weights_database[i + temperature_record_weight]; 86 static int first_record = true; 87 if (first_record){ 88 for (int temperature = 0; temperature < record_temperature; 89 temperature++){ 90 temperature_weights.push_back(0); 91 } 92 first_record = false; 93 } 94 assert(record_temperature == temperature_weights.size()); 95 temperature_weights.push_back(record_weight); 96 if (p->temperature_weights_database[i + 97 fields_per_temperature_record] < 0){ 98 more_records = false; 99 } 100 } 101} 102 103string 104FaultModel::fault_type_to_string(int ft) 105{ 106 if (ft == data_corruption__few_bits){ 107 return "data_corruption__few_bits"; 108 } else if (ft == data_corruption__all_bits){ 109 return "data_corruption__all_bits"; 110 } else if (ft == flit_conservation__flit_duplication){ 111 return "flit_conservation__flit_duplication"; 112 } else if (ft == flit_conservation__flit_loss_or_split){ 113 return "flit_conservation__flit_loss_or_split"; 114 } else if (ft == misrouting){ 115 return "misrouting"; 116 } else if (ft == credit_conservation__credit_generation){ 117 return "credit_conservation__credit_generation"; 118 } else if (ft == credit_conservation__credit_loss){ 119 return "credit_conservation__credit_loss"; 120 } else if (ft == erroneous_allocation__VC){ 121 return "erroneous_allocation__VC"; 122 } else if (ft == erroneous_allocation__switch){ 123 return "erroneous_allocation__switch"; 124 } else if (ft == unfair_arbitration){ 125 return "unfair_arbitration"; 126 } else if (ft == number_of_fault_types){ 127 return "none"; 128 } else { 129 return "none"; 130 } 131} 132 133 134int 135FaultModel::declare_router(int number_of_inputs, 136 int number_of_outputs, 137 int number_of_vcs_per_input, 138 int number_of_buff_per_data_vc, 139 int number_of_buff_per_ctrl_vc) 140{ 141 // check inputs (are they legal?) 142 if (number_of_inputs <= 0 || number_of_outputs <= 0 || 143 number_of_vcs_per_input <= 0 || number_of_buff_per_data_vc <= 0 || 144 number_of_buff_per_ctrl_vc <= 0){ 145 fatal("Fault Model: ERROR in argument of FaultModel_declare_router!"); 146 } 147 int number_of_buffers_per_vc = MAX(number_of_buff_per_data_vc, 148 number_of_buff_per_ctrl_vc); 149 int total_vcs = number_of_inputs * number_of_vcs_per_input; 150 if (total_vcs > MAX_VCs){ 151 fatal("Fault Model: ERROR! Number inputs*VCs (MAX_VCs) unsupported"); 152 } 153 if (number_of_buffers_per_vc > MAX_BUFFERS_per_VC){ 154 fatal("Fault Model: ERROR! buffers/VC (MAX_BUFFERS_per_VC) too high"); 155 } 156 157 // link the router to a DB record 158 int record_hit = -1; 159 for (int record = 0; record < configurations.size(); record++){ 160 if ((configurations[record].buff_per_vc == number_of_buffers_per_vc)&& 161 (configurations[record].vcs == total_vcs)){ 162 record_hit = record; 163 } 164 } 165 if (record_hit == -1){ 166 panic("Fault Model: ERROR! configuration not found in DB. BUG?"); 167 } 168 169 // remember the router and return its ID 170 routers.push_back(configurations[record_hit]); 171 static int router_index = 0; 172 return router_index++; 173} 174 175bool 176FaultModel::fault_vector(int routerID, 177 int temperature_input, 178 float fault_vector[]) 179{ 180 bool ok = true; 181 182 // is the routerID recorded? 183 if (routerID < 0 || routerID >= ((int) routers.size())){ 184 warn("Fault Model: ERROR! unknown router ID argument."); 185 fatal("Fault Model: Did you enable the fault model flag)?"); 186 } 187 188 // is the temperature too high/too low? 189 int temperature = temperature_input; 190 if (temperature_input >= ((int) temperature_weights.size())){ 191 ok = false; 192 warn_once("Fault Model: Temperature exceeded simulated upper bound."); 193 warn_once("Fault Model: The fault model is not accurate any more."); 194 temperature = (temperature_weights.size() - 1); 195 } else if (temperature_input < 0){ 196 ok = false; 197 warn_once("Fault Model: Temperature exceeded simulated lower bound."); 198 warn_once("Fault Model: The fault model is not accurate any more."); 199 temperature = 0; 200 } 201 202 // recover the router record and return its fault vector 203 for (int i = 0; i < number_of_fault_types; i++){ 204 fault_vector[i] = routers[routerID].fault_type[i] * 205 ((float)temperature_weights[temperature]); 206 } 207 return ok; 208} 209 210bool 211FaultModel::fault_prob(int routerID, 212 int temperature_input, 213 float *aggregate_fault_prob) 214{ 215 *aggregate_fault_prob = 1.0; 216 bool ok = true; 217 218 // is the routerID recorded? 219 if (routerID < 0 || routerID >= ((int) routers.size())){ 220 warn("Fault Model: ERROR! unknown router ID argument."); 221 fatal("Fault Model: Did you enable the fault model flag)?"); 222 } 223 224 // is the temperature too high/too low? 225 int temperature = temperature_input; 226 if (temperature_input >= ((int) temperature_weights.size()) ){ 227 ok = false; 228 warn_once("Fault Model: Temperature exceeded simulated upper bound."); 229 warn_once("Fault Model: The fault model is not accurate any more."); 230 temperature = (temperature_weights.size()-1); 231 } else if (temperature_input < 0){ 232 ok = false; 233 warn_once("Fault Model: Temperature exceeded simulated lower bound."); 234 warn_once("Fault Model: The fault model is not accurate any more."); 235 temperature = 0; 236 } 237 238 // recover the router record and return its aggregate fault probability 239 for (int i = 0; i < number_of_fault_types; i++){ 240 *aggregate_fault_prob= *aggregate_fault_prob * 241 ( 1.0 - (routers[routerID].fault_type[i] * 242 ((float)temperature_weights[temperature])) ); 243 } 244 *aggregate_fault_prob = 1.0 - *aggregate_fault_prob; 245 return ok; 246} 247 248// this function is used only for debugging purposes 249void 250FaultModel::print(void) 251{ 252 cout << "--- PRINTING configurations ---\n"; 253 for (int record = 0; record < configurations.size(); record++){ 254 cout << "(" << record << ") "; 255 cout << "VCs=" << configurations[record].vcs << " "; 256 cout << "Buff/VC=" << configurations[record].buff_per_vc << " ["; 257 for (int fault_type_num = 0; 258 fault_type_num < number_of_fault_types; 259 fault_type_num++){ 260 cout << (100 * configurations[record].fault_type[fault_type_num]); 261 cout << "% "; 262 } 263 cout << "]\n"; 264 } 265 cout << "--- PRINTING temperature weights ---\n"; 266 for (int record = 0; record < temperature_weights.size(); record++){ 267 cout << "temperature=" << record << " => "; 268 cout << "weight=" << temperature_weights[record]; 269 cout << "\n"; 270 } 271} 272 273FaultModel * 274FaultModelParams::create() 275{ 276 return new FaultModel(this); 277} 278