/* * Copyright (c) 2011 Massachusetts Institute of Technology * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Konstantinos Aisopos */ /* * Official Tool Website: www.mit.edu/~kaisopos/FaultModel * * If you use our tool for academic research, we request that you cite: * Konstantinos Aisopos, Chia-Hsin Owen Chen, and Li-Shiuan Peh. Enabling * System-Level Modeling of Variation-Induced Faults in Networks-on-Chip. * Proceedings of the 48th Design Automation Conference (DAC'11) */ // C++ includes #include #include #include #include // GEM5 includes #include "FaultModel.hh" #include "base/logging.hh" using namespace std; #define MAX(a,b) ((a > b) ? (a) : (b)) FaultModel::FaultModel(const Params *p) : SimObject(p) { // read configurations into "configurations" vector // format: <10 fault types> bool more_records = true; for (int i = 0; more_records; i += (fields_per_conf_record)){ system_conf configuration; configuration.buff_per_vc = p->baseline_fault_vector_database[i + conf_record_buff_per_vc]; configuration.vcs = p->baseline_fault_vector_database[i + conf_record_vcs]; for (int fault_index = 0; fault_index < number_of_fault_types; fault_index++){ configuration.fault_type[fault_index] = p->baseline_fault_vector_database[i + conf_record_first_fault_type + fault_index] / 100; } configurations.push_back(configuration); if (p->baseline_fault_vector_database[i+fields_per_conf_record] < 0){ more_records = false; } } // read temperature weights into "temperature_weights" vector // format: more_records = true; for (int i = 0; more_records; i += (fields_per_temperature_record)){ int record_temperature = p->temperature_weights_database[i + temperature_record_temp]; int record_weight = p->temperature_weights_database[i + temperature_record_weight]; static int first_record = true; if (first_record){ for (int temperature = 0; temperature < record_temperature; temperature++){ temperature_weights.push_back(0); } first_record = false; } assert(record_temperature == temperature_weights.size()); temperature_weights.push_back(record_weight); if (p->temperature_weights_database[i + fields_per_temperature_record] < 0){ more_records = false; } } } string FaultModel::fault_type_to_string(int ft) { if (ft == data_corruption__few_bits){ return "data_corruption__few_bits"; } else if (ft == data_corruption__all_bits){ return "data_corruption__all_bits"; } else if (ft == flit_conservation__flit_duplication){ return "flit_conservation__flit_duplication"; } else if (ft == flit_conservation__flit_loss_or_split){ return "flit_conservation__flit_loss_or_split"; } else if (ft == misrouting){ return "misrouting"; } else if (ft == credit_conservation__credit_generation){ return "credit_conservation__credit_generation"; } else if (ft == credit_conservation__credit_loss){ return "credit_conservation__credit_loss"; } else if (ft == erroneous_allocation__VC){ return "erroneous_allocation__VC"; } else if (ft == erroneous_allocation__switch){ return "erroneous_allocation__switch"; } else if (ft == unfair_arbitration){ return "unfair_arbitration"; } else if (ft == number_of_fault_types){ return "none"; } else { return "none"; } } int FaultModel::declare_router(int number_of_inputs, int number_of_outputs, int number_of_vcs_per_input, int number_of_buff_per_data_vc, int number_of_buff_per_ctrl_vc) { // check inputs (are they legal?) if (number_of_inputs <= 0 || number_of_outputs <= 0 || number_of_vcs_per_input <= 0 || number_of_buff_per_data_vc <= 0 || number_of_buff_per_ctrl_vc <= 0){ fatal("Fault Model: ERROR in argument of FaultModel_declare_router!"); } int number_of_buffers_per_vc = MAX(number_of_buff_per_data_vc, number_of_buff_per_ctrl_vc); int total_vcs = number_of_inputs * number_of_vcs_per_input; if (total_vcs > MAX_VCs){ fatal("Fault Model: ERROR! Number inputs*VCs (MAX_VCs) unsupported"); } if (number_of_buffers_per_vc > MAX_BUFFERS_per_VC){ fatal("Fault Model: ERROR! buffers/VC (MAX_BUFFERS_per_VC) too high"); } // link the router to a DB record int record_hit = -1; for (int record = 0; record < configurations.size(); record++){ if ((configurations[record].buff_per_vc == number_of_buffers_per_vc)&& (configurations[record].vcs == total_vcs)){ record_hit = record; } } if (record_hit == -1){ panic("Fault Model: ERROR! configuration not found in DB. BUG?"); } // remember the router and return its ID routers.push_back(configurations[record_hit]); static int router_index = 0; return router_index++; } bool FaultModel::fault_vector(int routerID, int temperature_input, float fault_vector[]) { bool ok = true; // is the routerID recorded? if (routerID < 0 || routerID >= ((int) routers.size())){ warn("Fault Model: ERROR! unknown router ID argument."); fatal("Fault Model: Did you enable the fault model flag)?"); } // is the temperature too high/too low? int temperature = temperature_input; if (temperature_input >= ((int) temperature_weights.size())){ ok = false; warn_once("Fault Model: Temperature exceeded simulated upper bound."); warn_once("Fault Model: The fault model is not accurate any more."); temperature = (temperature_weights.size() - 1); } else if (temperature_input < 0){ ok = false; warn_once("Fault Model: Temperature exceeded simulated lower bound."); warn_once("Fault Model: The fault model is not accurate any more."); temperature = 0; } // recover the router record and return its fault vector for (int i = 0; i < number_of_fault_types; i++){ fault_vector[i] = routers[routerID].fault_type[i] * ((float)temperature_weights[temperature]); } return ok; } bool FaultModel::fault_prob(int routerID, int temperature_input, float *aggregate_fault_prob) { *aggregate_fault_prob = 1.0; bool ok = true; // is the routerID recorded? if (routerID < 0 || routerID >= ((int) routers.size())){ warn("Fault Model: ERROR! unknown router ID argument."); fatal("Fault Model: Did you enable the fault model flag)?"); } // is the temperature too high/too low? int temperature = temperature_input; if (temperature_input >= ((int) temperature_weights.size()) ){ ok = false; warn_once("Fault Model: Temperature exceeded simulated upper bound."); warn_once("Fault Model: The fault model is not accurate any more."); temperature = (temperature_weights.size()-1); } else if (temperature_input < 0){ ok = false; warn_once("Fault Model: Temperature exceeded simulated lower bound."); warn_once("Fault Model: The fault model is not accurate any more."); temperature = 0; } // recover the router record and return its aggregate fault probability for (int i = 0; i < number_of_fault_types; i++){ *aggregate_fault_prob= *aggregate_fault_prob * ( 1.0 - (routers[routerID].fault_type[i] * ((float)temperature_weights[temperature])) ); } *aggregate_fault_prob = 1.0 - *aggregate_fault_prob; return ok; } // this function is used only for debugging purposes void FaultModel::print(void) { cout << "--- PRINTING configurations ---\n"; for (int record = 0; record < configurations.size(); record++){ cout << "(" << record << ") "; cout << "VCs=" << configurations[record].vcs << " "; cout << "Buff/VC=" << configurations[record].buff_per_vc << " ["; for (int fault_type_num = 0; fault_type_num < number_of_fault_types; fault_type_num++){ cout << (100 * configurations[record].fault_type[fault_type_num]); cout << "% "; } cout << "]\n"; } cout << "--- PRINTING temperature weights ---\n"; for (int record = 0; record < temperature_weights.size(); record++){ cout << "temperature=" << record << " => "; cout << "weight=" << temperature_weights[record]; cout << "\n"; } } FaultModel * FaultModelParams::create() { return new FaultModel(this); }