1/*
2 * Copyright (c) 2011 Massachusetts Institute of Technology
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Konstantinos Aisopos
29 */
30
31/*
32 * Official Tool Website: www.mit.edu/~kaisopos/FaultModel
33 *
34 * If you use our tool for academic research, we request that you cite:
35 * Konstantinos Aisopos, Chia-Hsin Owen Chen, and Li-Shiuan Peh. Enabling
36 * System-Level Modeling of Variation-Induced Faults in Networks-on-Chip.
37 * Proceedings of the 48th Design Automation Conference (DAC'11)
38 */
39
40// C++ includes
41#include <cassert>
42#include <fstream>
43#include <iostream>
44#include <vector>
45
46// GEM5 includes
47#include "FaultModel.hh"
48#include "base/logging.hh"
49
50using namespace std;
51
52#define MAX(a,b) ((a > b) ? (a) : (b))
53
54
55FaultModel::FaultModel(const Params *p) : SimObject(p)
56{
57    // read configurations into "configurations" vector
58    // format: <buff/vc> <vcs> <10 fault types>
59    bool more_records = true;
60    for (int i = 0; more_records; i += (fields_per_conf_record)){
61        system_conf configuration;
62        configuration.buff_per_vc =
63            p->baseline_fault_vector_database[i + conf_record_buff_per_vc];
64        configuration.vcs =
65            p->baseline_fault_vector_database[i + conf_record_vcs];
66        for (int fault_index = 0; fault_index < number_of_fault_types;
67            fault_index++){
68            configuration.fault_type[fault_index] =
69                p->baseline_fault_vector_database[i +
70                   conf_record_first_fault_type + fault_index] / 100;
71        }
72        configurations.push_back(configuration);
73        if (p->baseline_fault_vector_database[i+fields_per_conf_record] < 0){
74            more_records = false;
75        }
76    }
77
78    // read temperature weights into "temperature_weights" vector
79    // format: <temperature> <weight>
80    more_records = true;
81    for (int i = 0; more_records; i += (fields_per_temperature_record)){
82        int record_temperature =
83               p->temperature_weights_database[i + temperature_record_temp];
84        int record_weight =
85               p->temperature_weights_database[i + temperature_record_weight];
86        static int first_record = true;
87        if (first_record){
88            for (int temperature = 0; temperature < record_temperature;
89                 temperature++){
90                 temperature_weights.push_back(0);
91            }
92            first_record = false;
93        }
94        assert(record_temperature == temperature_weights.size());
95        temperature_weights.push_back(record_weight);
96        if (p->temperature_weights_database[i +
97               fields_per_temperature_record] < 0){
98            more_records = false;
99        }
100    }
101}
102
103string
104FaultModel::fault_type_to_string(int ft)
105{
106   if (ft == data_corruption__few_bits){
107       return "data_corruption__few_bits";
108   } else if (ft == data_corruption__all_bits){
109      return "data_corruption__all_bits";
110   } else if (ft == flit_conservation__flit_duplication){
111      return "flit_conservation__flit_duplication";
112   } else if (ft == flit_conservation__flit_loss_or_split){
113      return "flit_conservation__flit_loss_or_split";
114   } else if (ft == misrouting){
115      return "misrouting";
116   } else if (ft == credit_conservation__credit_generation){
117      return "credit_conservation__credit_generation";
118   } else if (ft == credit_conservation__credit_loss){
119      return "credit_conservation__credit_loss";
120   } else if (ft == erroneous_allocation__VC){
121      return "erroneous_allocation__VC";
122   } else if (ft == erroneous_allocation__switch){
123      return "erroneous_allocation__switch";
124   } else if (ft == unfair_arbitration){
125      return "unfair_arbitration";
126   } else if (ft == number_of_fault_types){
127      return "none";
128   } else {
129      return "none";
130   }
131}
132
133
134int
135FaultModel::declare_router(int number_of_inputs,
136                           int number_of_outputs,
137                           int number_of_vcs_per_input,
138                           int number_of_buff_per_data_vc,
139                           int number_of_buff_per_ctrl_vc)
140{
141    // check inputs (are they legal?)
142    if (number_of_inputs <= 0 || number_of_outputs <= 0 ||
143        number_of_vcs_per_input <= 0 || number_of_buff_per_data_vc <= 0 ||
144        number_of_buff_per_ctrl_vc <= 0){
145        fatal("Fault Model: ERROR in argument of FaultModel_declare_router!");
146    }
147    int number_of_buffers_per_vc = MAX(number_of_buff_per_data_vc,
148                                       number_of_buff_per_ctrl_vc);
149    int total_vcs = number_of_inputs * number_of_vcs_per_input;
150    if (total_vcs > MAX_VCs){
151        fatal("Fault Model: ERROR! Number inputs*VCs (MAX_VCs) unsupported");
152    }
153    if (number_of_buffers_per_vc > MAX_BUFFERS_per_VC){
154        fatal("Fault Model: ERROR! buffers/VC (MAX_BUFFERS_per_VC) too high");
155    }
156
157    // link the router to a DB record
158    int record_hit = -1;
159    for (int record = 0; record < configurations.size(); record++){
160        if ((configurations[record].buff_per_vc == number_of_buffers_per_vc)&&
161            (configurations[record].vcs == total_vcs)){
162            record_hit = record;
163        }
164    }
165    if (record_hit == -1){
166        panic("Fault Model: ERROR! configuration not found in DB. BUG?");
167    }
168
169    // remember the router and return its ID
170    routers.push_back(configurations[record_hit]);
171    static int router_index = 0;
172    return router_index++;
173}
174
175bool
176FaultModel::fault_vector(int routerID,
177                         int temperature_input,
178                         float fault_vector[])
179{
180    bool ok = true;
181
182    // is the routerID recorded?
183    if (routerID < 0 || routerID >= ((int) routers.size())){
184         warn("Fault Model: ERROR! unknown router ID argument.");
185        fatal("Fault Model: Did you enable the fault model flag)?");
186    }
187
188    // is the temperature too high/too low?
189    int temperature = temperature_input;
190    if (temperature_input >= ((int) temperature_weights.size())){
191        ok = false;
192        warn_once("Fault Model: Temperature exceeded simulated upper bound.");
193        warn_once("Fault Model: The fault model is not accurate any more.");
194        temperature = (temperature_weights.size() - 1);
195    } else if (temperature_input < 0){
196        ok = false;
197        warn_once("Fault Model: Temperature exceeded simulated lower bound.");
198        warn_once("Fault Model: The fault model is not accurate any more.");
199        temperature = 0;
200    }
201
202    // recover the router record and return its fault vector
203    for (int i = 0; i < number_of_fault_types; i++){
204        fault_vector[i] = routers[routerID].fault_type[i] *
205                          ((float)temperature_weights[temperature]);
206    }
207    return ok;
208}
209
210bool
211FaultModel::fault_prob(int routerID,
212                       int temperature_input,
213                       float *aggregate_fault_prob)
214{
215    *aggregate_fault_prob = 1.0;
216    bool ok = true;
217
218    // is the routerID recorded?
219    if (routerID < 0 || routerID >= ((int) routers.size())){
220         warn("Fault Model: ERROR! unknown router ID argument.");
221        fatal("Fault Model: Did you enable the fault model flag)?");
222    }
223
224    // is the temperature too high/too low?
225    int temperature = temperature_input;
226    if (temperature_input >= ((int) temperature_weights.size()) ){
227        ok = false;
228        warn_once("Fault Model: Temperature exceeded simulated upper bound.");
229        warn_once("Fault Model: The fault model is not accurate any more.");
230        temperature = (temperature_weights.size()-1);
231    } else if (temperature_input < 0){
232        ok = false;
233        warn_once("Fault Model: Temperature exceeded simulated lower bound.");
234        warn_once("Fault Model: The fault model is not accurate any more.");
235        temperature = 0;
236    }
237
238    // recover the router record and return its aggregate fault probability
239    for (int i = 0; i < number_of_fault_types; i++){
240        *aggregate_fault_prob=  *aggregate_fault_prob *
241                               ( 1.0 - (routers[routerID].fault_type[i] *
242                                 ((float)temperature_weights[temperature])) );
243    }
244    *aggregate_fault_prob = 1.0 - *aggregate_fault_prob;
245    return ok;
246}
247
248// this function is used only for debugging purposes
249void
250FaultModel::print(void)
251{
252    cout << "--- PRINTING configurations ---\n";
253    for (int record = 0; record < configurations.size(); record++){
254        cout << "(" << record << ") ";
255        cout << "VCs=" << configurations[record].vcs << " ";
256        cout << "Buff/VC=" << configurations[record].buff_per_vc << " [";
257        for (int fault_type_num = 0;
258             fault_type_num < number_of_fault_types;
259             fault_type_num++){
260            cout << (100 * configurations[record].fault_type[fault_type_num]);
261            cout << "% ";
262        }
263        cout << "]\n";
264    }
265    cout << "--- PRINTING temperature weights ---\n";
266    for (int record = 0; record < temperature_weights.size(); record++){
267        cout << "temperature=" << record << " => ";
268        cout << "weight=" << temperature_weights[record];
269        cout << "\n";
270    }
271}
272
273FaultModel *
274FaultModelParams::create()
275{
276    return new FaultModel(this);
277}
278