FaultModel.cc revision 8612:df3b7a1e883f
1/*
2 * Copyright (c) 2011 Massachusetts Institute of Technology
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Konstantinos Aisopos
29 */
30
31/*
32 * Official Tool Website: www.mit.edu/~kaisopos/FaultModel
33 *
34 * If you use our tool for academic research, we request that you cite:
35 * Konstantinos Aisopos, Chia-Hsin Owen Chen, and Li-Shiuan Peh. Enabling
36 * System-Level Modeling of Variation-Induced Faults in Networks-on-Chip.
37 * Proceedings of the 48th Design Automation Conference (DAC'11)
38 */
39
40// C includes
41#include <assert.h>
42#include <stdio.h>
43
44// C++ includes
45#include <fstream>
46#include <iostream>
47#include <vector>
48
49// GEM5 includes
50#include "FaultModel.hh"
51#include "base/misc.hh"
52
53#define MAX(a,b) ((a > b) ? (a) : (b))
54
55
56FaultModel::FaultModel(const Params *p) : SimObject(p)
57{
58    // read configurations into "configurations" vector
59    // format: <buff/vc> <vcs> <10 fault types>
60    bool more_records = true;
61    for (int i = 0; more_records; i += (fields_per_conf_record)){
62        system_conf configuration;
63        configuration.buff_per_vc =
64            p->baseline_fault_vector_database[i + conf_record_buff_per_vc];
65        configuration.vcs =
66            p->baseline_fault_vector_database[i + conf_record_vcs];
67        for (int fault_index = 0; fault_index < number_of_fault_types;
68            fault_index++){
69            configuration.fault_type[fault_index] =
70                p->baseline_fault_vector_database[i +
71                   conf_record_first_fault_type + fault_index] / 100;
72        }
73        configurations.push_back(configuration);
74        if (p->baseline_fault_vector_database[i+fields_per_conf_record] < 0){
75            more_records = false;
76        }
77    }
78
79    // read temperature weights into "temperature_weights" vector
80    // format: <temperature> <weight>
81    more_records = true;
82    for (int i = 0; more_records; i += (fields_per_temperature_record)){
83        int record_temperature =
84               p->temperature_weights_database[i + temperature_record_temp];
85        int record_weight =
86               p->temperature_weights_database[i + temperature_record_weight];
87        static int first_record = true;
88        if (first_record){
89            for (int temperature = 0; temperature < record_temperature;
90                 temperature++){
91                 temperature_weights.push_back(0);
92            }
93            first_record = false;
94        }
95        assert(record_temperature == temperature_weights.size());
96        temperature_weights.push_back(record_weight);
97        if (p->temperature_weights_database[i +
98               fields_per_temperature_record] < 0){
99            more_records = false;
100        }
101    }
102}
103
104string
105FaultModel::fault_type_to_string(int ft)
106{
107   if (ft == data_corruption__few_bits){
108       return "data_corruption__few_bits";
109   } else if (ft == data_corruption__all_bits){
110      return "data_corruption__all_bits";
111   } else if (ft == flit_conservation__flit_duplication){
112      return "flit_conservation__flit_duplication";
113   } else if (ft == flit_conservation__flit_loss_or_split){
114      return "flit_conservation__flit_loss_or_split";
115   } else if (ft == misrouting){
116      return "misrouting";
117   } else if (ft == credit_conservation__credit_generation){
118      return "credit_conservation__credit_generation";
119   } else if (ft == credit_conservation__credit_loss){
120      return "credit_conservation__credit_loss";
121   } else if (ft == erroneous_allocation__VC){
122      return "erroneous_allocation__VC";
123   } else if (ft == erroneous_allocation__switch){
124      return "erroneous_allocation__switch";
125   } else if (ft == unfair_arbitration){
126      return "unfair_arbitration";
127   } else if (ft == number_of_fault_types){
128      return "none";
129   } else {
130      return "none";
131   }
132}
133
134
135int
136FaultModel::declare_router(int number_of_inputs,
137                           int number_of_outputs,
138                           int number_of_vcs_per_input,
139                           int number_of_buff_per_data_vc,
140                           int number_of_buff_per_ctrl_vc)
141{
142    // check inputs (are they legal?)
143    if (number_of_inputs <= 0 || number_of_outputs <= 0 ||
144        number_of_vcs_per_input <= 0 || number_of_buff_per_data_vc <= 0 ||
145        number_of_buff_per_ctrl_vc <= 0){
146        fatal("Fault Model: ERROR in argument of FaultModel_declare_router!");
147    }
148    int number_of_buffers_per_vc = MAX(number_of_buff_per_data_vc,
149                                       number_of_buff_per_ctrl_vc);
150    int total_vcs = number_of_inputs * number_of_vcs_per_input;
151    if (total_vcs > MAX_VCs){
152        fatal("Fault Model: ERROR! Number inputs*VCs (MAX_VCs) unsupported");
153    }
154    if (number_of_buffers_per_vc > MAX_BUFFERS_per_VC){
155        fatal("Fault Model: ERROR! buffers/VC (MAX_BUFFERS_per_VC) too high");
156    }
157
158    // link the router to a DB record
159    int record_hit = -1;
160    for (int record = 0; record < configurations.size(); record++){
161        if ((configurations[record].buff_per_vc == number_of_buffers_per_vc)&&
162            (configurations[record].vcs == total_vcs)){
163            record_hit = record;
164        }
165    }
166    if (record_hit == -1){
167        panic("Fault Model: ERROR! configuration not found in DB. BUG?");
168    }
169
170    // remember the router and return its ID
171    routers.push_back(configurations[record_hit]);
172    static int router_index = 0;
173    return router_index++;
174}
175
176bool
177FaultModel::fault_vector(int routerID,
178                         int temperature_input,
179                         float fault_vector[])
180{
181    bool ok = true;
182
183    // is the routerID recorded?
184    if (routerID < 0 || routerID >= ((int) routers.size())){
185         warn("Fault Model: ERROR! unknown router ID argument.");
186        fatal("Fault Model: Did you enable the fault model flag)?");
187    }
188
189    // is the temperature too high/too low?
190    int temperature = temperature_input;
191    if (temperature_input >= ((int) temperature_weights.size())){
192        ok = false;
193        warn_once("Fault Model: Temperature exceeded simulated upper bound.");
194        warn_once("Fault Model: The fault model is not accurate any more.");
195        temperature = (temperature_weights.size() - 1);
196    } else if (temperature_input < 0){
197        ok = false;
198        warn_once("Fault Model: Temperature exceeded simulated lower bound.");
199        warn_once("Fault Model: The fault model is not accurate any more.");
200        temperature = 0;
201    }
202
203    // recover the router record and return its fault vector
204    for (int i = 0; i < number_of_fault_types; i++){
205        fault_vector[i] = routers[routerID].fault_type[i] *
206                          ((float)temperature_weights[temperature]);
207    }
208    return ok;
209}
210
211bool
212FaultModel::fault_prob(int routerID,
213                       int temperature_input,
214                       float *aggregate_fault_prob)
215{
216    *aggregate_fault_prob = 1.0;
217    bool ok = true;
218
219    // is the routerID recorded?
220    if (routerID < 0 || routerID >= ((int) routers.size())){
221         warn("Fault Model: ERROR! unknown router ID argument.");
222        fatal("Fault Model: Did you enable the fault model flag)?");
223    }
224
225    // is the temperature too high/too low?
226    int temperature = temperature_input;
227    if (temperature_input >= ((int) temperature_weights.size()) ){
228        ok = false;
229        warn_once("Fault Model: Temperature exceeded simulated upper bound.");
230        warn_once("Fault Model: The fault model is not accurate any more.");
231        temperature = (temperature_weights.size()-1);
232    } else if (temperature_input < 0){
233        ok = false;
234        warn_once("Fault Model: Temperature exceeded simulated lower bound.");
235        warn_once("Fault Model: The fault model is not accurate any more.");
236        temperature = 0;
237    }
238
239    // recover the router record and return its aggregate fault probability
240    for (int i = 0; i < number_of_fault_types; i++){
241        *aggregate_fault_prob=  *aggregate_fault_prob *
242                               ( 1.0 - (routers[routerID].fault_type[i] *
243                                 ((float)temperature_weights[temperature])) );
244    }
245    *aggregate_fault_prob = 1.0 - *aggregate_fault_prob;
246    return ok;
247}
248
249// this function is used only for debugging purposes
250void
251FaultModel::print(void)
252{
253    cout << "--- PRINTING configurations ---\n";
254    for (int record = 0; record < configurations.size(); record++){
255        cout << "(" << record << ") ";
256        cout << "VCs=" << configurations[record].vcs << " ";
257        cout << "Buff/VC=" << configurations[record].buff_per_vc << " [";
258        for (int fault_type_num = 0;
259             fault_type_num < number_of_fault_types;
260             fault_type_num++){
261            cout << (100 * configurations[record].fault_type[fault_type_num]);
262            cout << "% ";
263        }
264        cout << "]\n";
265    }
266    cout << "--- PRINTING temperature weights ---\n";
267    for (int record = 0; record < temperature_weights.size(); record++){
268        cout << "temperature=" << record << " => ";
269        cout << "weight=" << temperature_weights[record];
270        cout << "\n";
271    }
272}
273
274FaultModel *
275FaultModelParams::create()
276{
277    return new FaultModel(this);
278}
279