FaultModel.cc revision 8612:df3b7a1e883f
1/* 2 * Copyright (c) 2011 Massachusetts Institute of Technology 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Konstantinos Aisopos 29 */ 30 31/* 32 * Official Tool Website: www.mit.edu/~kaisopos/FaultModel 33 * 34 * If you use our tool for academic research, we request that you cite: 35 * Konstantinos Aisopos, Chia-Hsin Owen Chen, and Li-Shiuan Peh. Enabling 36 * System-Level Modeling of Variation-Induced Faults in Networks-on-Chip. 37 * Proceedings of the 48th Design Automation Conference (DAC'11) 38 */ 39 40// C includes 41#include <assert.h> 42#include <stdio.h> 43 44// C++ includes 45#include <fstream> 46#include <iostream> 47#include <vector> 48 49// GEM5 includes 50#include "FaultModel.hh" 51#include "base/misc.hh" 52 53#define MAX(a,b) ((a > b) ? (a) : (b)) 54 55 56FaultModel::FaultModel(const Params *p) : SimObject(p) 57{ 58 // read configurations into "configurations" vector 59 // format: <buff/vc> <vcs> <10 fault types> 60 bool more_records = true; 61 for (int i = 0; more_records; i += (fields_per_conf_record)){ 62 system_conf configuration; 63 configuration.buff_per_vc = 64 p->baseline_fault_vector_database[i + conf_record_buff_per_vc]; 65 configuration.vcs = 66 p->baseline_fault_vector_database[i + conf_record_vcs]; 67 for (int fault_index = 0; fault_index < number_of_fault_types; 68 fault_index++){ 69 configuration.fault_type[fault_index] = 70 p->baseline_fault_vector_database[i + 71 conf_record_first_fault_type + fault_index] / 100; 72 } 73 configurations.push_back(configuration); 74 if (p->baseline_fault_vector_database[i+fields_per_conf_record] < 0){ 75 more_records = false; 76 } 77 } 78 79 // read temperature weights into "temperature_weights" vector 80 // format: <temperature> <weight> 81 more_records = true; 82 for (int i = 0; more_records; i += (fields_per_temperature_record)){ 83 int record_temperature = 84 p->temperature_weights_database[i + temperature_record_temp]; 85 int record_weight = 86 p->temperature_weights_database[i + temperature_record_weight]; 87 static int first_record = true; 88 if (first_record){ 89 for (int temperature = 0; temperature < record_temperature; 90 temperature++){ 91 temperature_weights.push_back(0); 92 } 93 first_record = false; 94 } 95 assert(record_temperature == temperature_weights.size()); 96 temperature_weights.push_back(record_weight); 97 if (p->temperature_weights_database[i + 98 fields_per_temperature_record] < 0){ 99 more_records = false; 100 } 101 } 102} 103 104string 105FaultModel::fault_type_to_string(int ft) 106{ 107 if (ft == data_corruption__few_bits){ 108 return "data_corruption__few_bits"; 109 } else if (ft == data_corruption__all_bits){ 110 return "data_corruption__all_bits"; 111 } else if (ft == flit_conservation__flit_duplication){ 112 return "flit_conservation__flit_duplication"; 113 } else if (ft == flit_conservation__flit_loss_or_split){ 114 return "flit_conservation__flit_loss_or_split"; 115 } else if (ft == misrouting){ 116 return "misrouting"; 117 } else if (ft == credit_conservation__credit_generation){ 118 return "credit_conservation__credit_generation"; 119 } else if (ft == credit_conservation__credit_loss){ 120 return "credit_conservation__credit_loss"; 121 } else if (ft == erroneous_allocation__VC){ 122 return "erroneous_allocation__VC"; 123 } else if (ft == erroneous_allocation__switch){ 124 return "erroneous_allocation__switch"; 125 } else if (ft == unfair_arbitration){ 126 return "unfair_arbitration"; 127 } else if (ft == number_of_fault_types){ 128 return "none"; 129 } else { 130 return "none"; 131 } 132} 133 134 135int 136FaultModel::declare_router(int number_of_inputs, 137 int number_of_outputs, 138 int number_of_vcs_per_input, 139 int number_of_buff_per_data_vc, 140 int number_of_buff_per_ctrl_vc) 141{ 142 // check inputs (are they legal?) 143 if (number_of_inputs <= 0 || number_of_outputs <= 0 || 144 number_of_vcs_per_input <= 0 || number_of_buff_per_data_vc <= 0 || 145 number_of_buff_per_ctrl_vc <= 0){ 146 fatal("Fault Model: ERROR in argument of FaultModel_declare_router!"); 147 } 148 int number_of_buffers_per_vc = MAX(number_of_buff_per_data_vc, 149 number_of_buff_per_ctrl_vc); 150 int total_vcs = number_of_inputs * number_of_vcs_per_input; 151 if (total_vcs > MAX_VCs){ 152 fatal("Fault Model: ERROR! Number inputs*VCs (MAX_VCs) unsupported"); 153 } 154 if (number_of_buffers_per_vc > MAX_BUFFERS_per_VC){ 155 fatal("Fault Model: ERROR! buffers/VC (MAX_BUFFERS_per_VC) too high"); 156 } 157 158 // link the router to a DB record 159 int record_hit = -1; 160 for (int record = 0; record < configurations.size(); record++){ 161 if ((configurations[record].buff_per_vc == number_of_buffers_per_vc)&& 162 (configurations[record].vcs == total_vcs)){ 163 record_hit = record; 164 } 165 } 166 if (record_hit == -1){ 167 panic("Fault Model: ERROR! configuration not found in DB. BUG?"); 168 } 169 170 // remember the router and return its ID 171 routers.push_back(configurations[record_hit]); 172 static int router_index = 0; 173 return router_index++; 174} 175 176bool 177FaultModel::fault_vector(int routerID, 178 int temperature_input, 179 float fault_vector[]) 180{ 181 bool ok = true; 182 183 // is the routerID recorded? 184 if (routerID < 0 || routerID >= ((int) routers.size())){ 185 warn("Fault Model: ERROR! unknown router ID argument."); 186 fatal("Fault Model: Did you enable the fault model flag)?"); 187 } 188 189 // is the temperature too high/too low? 190 int temperature = temperature_input; 191 if (temperature_input >= ((int) temperature_weights.size())){ 192 ok = false; 193 warn_once("Fault Model: Temperature exceeded simulated upper bound."); 194 warn_once("Fault Model: The fault model is not accurate any more."); 195 temperature = (temperature_weights.size() - 1); 196 } else if (temperature_input < 0){ 197 ok = false; 198 warn_once("Fault Model: Temperature exceeded simulated lower bound."); 199 warn_once("Fault Model: The fault model is not accurate any more."); 200 temperature = 0; 201 } 202 203 // recover the router record and return its fault vector 204 for (int i = 0; i < number_of_fault_types; i++){ 205 fault_vector[i] = routers[routerID].fault_type[i] * 206 ((float)temperature_weights[temperature]); 207 } 208 return ok; 209} 210 211bool 212FaultModel::fault_prob(int routerID, 213 int temperature_input, 214 float *aggregate_fault_prob) 215{ 216 *aggregate_fault_prob = 1.0; 217 bool ok = true; 218 219 // is the routerID recorded? 220 if (routerID < 0 || routerID >= ((int) routers.size())){ 221 warn("Fault Model: ERROR! unknown router ID argument."); 222 fatal("Fault Model: Did you enable the fault model flag)?"); 223 } 224 225 // is the temperature too high/too low? 226 int temperature = temperature_input; 227 if (temperature_input >= ((int) temperature_weights.size()) ){ 228 ok = false; 229 warn_once("Fault Model: Temperature exceeded simulated upper bound."); 230 warn_once("Fault Model: The fault model is not accurate any more."); 231 temperature = (temperature_weights.size()-1); 232 } else if (temperature_input < 0){ 233 ok = false; 234 warn_once("Fault Model: Temperature exceeded simulated lower bound."); 235 warn_once("Fault Model: The fault model is not accurate any more."); 236 temperature = 0; 237 } 238 239 // recover the router record and return its aggregate fault probability 240 for (int i = 0; i < number_of_fault_types; i++){ 241 *aggregate_fault_prob= *aggregate_fault_prob * 242 ( 1.0 - (routers[routerID].fault_type[i] * 243 ((float)temperature_weights[temperature])) ); 244 } 245 *aggregate_fault_prob = 1.0 - *aggregate_fault_prob; 246 return ok; 247} 248 249// this function is used only for debugging purposes 250void 251FaultModel::print(void) 252{ 253 cout << "--- PRINTING configurations ---\n"; 254 for (int record = 0; record < configurations.size(); record++){ 255 cout << "(" << record << ") "; 256 cout << "VCs=" << configurations[record].vcs << " "; 257 cout << "Buff/VC=" << configurations[record].buff_per_vc << " ["; 258 for (int fault_type_num = 0; 259 fault_type_num < number_of_fault_types; 260 fault_type_num++){ 261 cout << (100 * configurations[record].fault_type[fault_type_num]); 262 cout << "% "; 263 } 264 cout << "]\n"; 265 } 266 cout << "--- PRINTING temperature weights ---\n"; 267 for (int record = 0; record < temperature_weights.size(); record++){ 268 cout << "temperature=" << record << " => "; 269 cout << "weight=" << temperature_weights[record]; 270 cout << "\n"; 271 } 272} 273 274FaultModel * 275FaultModelParams::create() 276{ 277 return new FaultModel(this); 278} 279