1# Copyright (c) 2003-2004 The Regents of The University of Michigan 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer; 8# redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution; 11# neither the name of the copyright holders nor the names of its 12# contributors may be used to endorse or promote products derived from 13# this software without specific prior written permission. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26# 27# Authors: Nathan Binkert 28 29import MySQLdb 30 31class MyDB(object): 32 def __init__(self, options): 33 self.name = options.db 34 self.host = options.host 35 self.user = options.user 36 self.passwd = options.passwd 37 self.mydb = None 38 self.cursor = None 39 40 def admin(self): 41 self.close() 42 self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user, 43 passwd=self.passwd) 44 self.cursor = self.mydb.cursor() 45 46 def connect(self): 47 self.close() 48 self.mydb = MySQLdb.connect(db=self.name, host=self.host, 49 user=self.user, passwd=self.passwd) 50 self.cursor = self.mydb.cursor() 51 52 def close(self): 53 if self.mydb is not None: 54 self.mydb.close() 55 self.cursor = None 56 57 def query(self, sql): 58 self.cursor.execute(sql) 59 60 def drop(self): 61 self.query('DROP DATABASE IF EXISTS %s' % self.name) 62 63 def create(self): 64 self.query('CREATE DATABASE %s' % self.name) 65 66 def populate(self): 67 # 68 # Each run (or simulation) gets its own entry in the runs table to 69 # group stats by where they were generated 70 # 71 # COLUMNS: 72 # 'id' is a unique identifier for each run to be used in other 73 # tables. 74 # 'name' is the user designated name for the data generated. It is 75 # configured in the simulator. 76 # 'user' identifies the user that generated the data for the given 77 # run. 78 # 'project' another name to identify runs for a specific goal 79 # 'date' is a timestamp for when the data was generated. It can be 80 # used to easily expire data that was generated in the past. 81 # 'expire' is a timestamp for when the data should be removed from 82 # the database so we don't have years worth of junk. 83 # 84 # INDEXES: 85 # 'run' is indexed so you can find out details of a run if the run 86 # was retreived from the data table. 87 # 'name' is indexed so that two all run names are forced to be unique 88 # 89 self.query(''' 90 CREATE TABLE runs( 91 rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 92 rn_name VARCHAR(200) NOT NULL, 93 rn_sample VARCHAR(32) NOT NULL, 94 rn_user VARCHAR(32) NOT NULL, 95 rn_project VARCHAR(100) NOT NULL, 96 rn_date TIMESTAMP NOT NULL, 97 rn_expire TIMESTAMP NOT NULL, 98 PRIMARY KEY (rn_id), 99 UNIQUE (rn_name,rn_sample) 100 ) TYPE=InnoDB''') 101 102 # 103 # The stat table gives us all of the data for a particular stat. 104 # 105 # COLUMNS: 106 # 'stat' is a unique identifier for each stat to be used in other 107 # tables for references. 108 # 'name' is simply the simulator derived name for a given 109 # statistic. 110 # 'descr' is the description of the statistic and what it tells 111 # you. 112 # 'type' defines what the stat tells you. Types are: 113 # SCALAR: A simple scalar statistic that holds one value 114 # VECTOR: An array of statistic values. Such a something that 115 # is generated per-thread. Vectors exist to give averages, 116 # pdfs, cdfs, means, standard deviations, etc across the 117 # stat values. 118 # DIST: Is a distribution of data. When the statistic value is 119 # sampled, its value is counted in a particular bucket. 120 # Useful for keeping track of utilization of a resource. 121 # (e.g. fraction of time it is 25% used vs. 50% vs. 100%) 122 # VECTORDIST: Can be used when the distribution needs to be 123 # factored out into a per-thread distribution of data for 124 # example. It can still be summed across threads to find 125 # the total distribution. 126 # VECTOR2D: Can be used when you have a stat that is not only 127 # per-thread, but it is per-something else. Like 128 # per-message type. 129 # FORMULA: This statistic is a formula, and its data must be 130 # looked up in the formula table, for indicating how to 131 # present its values. 132 # 'subdata' is potentially used by any of the vector types to 133 # give a specific name to all of the data elements within a 134 # stat. 135 # 'print' indicates whether this stat should be printed ever. 136 # (Unnamed stats don't usually get printed) 137 # 'prereq' only print the stat if the prereq is not zero. 138 # 'prec' number of decimal places to print 139 # 'nozero' don't print zero values 140 # 'nonan' don't print NaN values 141 # 'total' for vector type stats, print the total. 142 # 'pdf' for vector type stats, print the pdf. 143 # 'cdf' for vector type stats, print the cdf. 144 # 145 # The Following are for dist type stats: 146 # 'min' is the minimum bucket value. Anything less is an underflow. 147 # 'max' is the maximum bucket value. Anything more is an overflow. 148 # 'bktsize' is the approximate number of entries in each bucket. 149 # 'size' is the number of buckets. equal to (min/max)/bktsize. 150 # 151 # INDEXES: 152 # 'stat' is indexed so that you can find out details about a stat 153 # if the stat id was retrieved from the data table. 154 # 'name' is indexed so that you can simply look up data about a 155 # named stat. 156 # 157 self.query(''' 158 CREATE TABLE stats( 159 st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 160 st_name VARCHAR(255) NOT NULL, 161 st_descr TEXT NOT NULL, 162 st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST", 163 "VECTOR2D", "FORMULA") NOT NULL, 164 st_print BOOL NOT NULL, 165 st_prereq SMALLINT UNSIGNED NOT NULL, 166 st_prec TINYINT NOT NULL, 167 st_nozero BOOL NOT NULL, 168 st_nonan BOOL NOT NULL, 169 st_total BOOL NOT NULL, 170 st_pdf BOOL NOT NULL, 171 st_cdf BOOL NOT NULL, 172 st_min DOUBLE NOT NULL, 173 st_max DOUBLE NOT NULL, 174 st_bktsize DOUBLE NOT NULL, 175 st_size SMALLINT UNSIGNED NOT NULL, 176 PRIMARY KEY (st_id), 177 UNIQUE (st_name) 178 ) TYPE=InnoDB''') 179 180 # 181 # This is the main table of data for stats. 182 # 183 # COLUMNS: 184 # 'stat' refers to the stat field given in the stat table. 185 # 186 # 'x' referrs to the first dimension of a multi-dimensional stat. For 187 # a vector, x will start at 0 and increase for each vector 188 # element. 189 # For a distribution: 190 # -1: sum (for calculating standard deviation) 191 # -2: sum of squares (for calculating standard deviation) 192 # -3: total number of samples taken (for calculating 193 # standard deviation) 194 # -4: minimum value 195 # -5: maximum value 196 # -6: underflow 197 # -7: overflow 198 # 'y' is used by a VECTORDIST and the VECTOR2D to describe the second 199 # dimension. 200 # 'run' is the run that the data was generated from. Details up in 201 # the run table 202 # 'tick' is a timestamp generated by the simulator. 203 # 'data' is the actual stat value. 204 # 205 # INDEXES: 206 # 'stat' is indexed so that a user can find all of the data for a 207 # particular stat. It is not unique, because that specific stat 208 # can be found in many runs and samples, in addition to 209 # having entries for the mulidimensional cases. 210 # 'run' is indexed to allow a user to remove all of the data for a 211 # particular execution run. It can also be used to allow the 212 # user to print out all of the data for a given run. 213 # 214 self.query(''' 215 CREATE TABLE data( 216 dt_stat SMALLINT UNSIGNED NOT NULL, 217 dt_x SMALLINT NOT NULL, 218 dt_y SMALLINT NOT NULL, 219 dt_run SMALLINT UNSIGNED NOT NULL, 220 dt_tick BIGINT UNSIGNED NOT NULL, 221 dt_data DOUBLE NOT NULL, 222 INDEX (dt_stat), 223 INDEX (dt_run), 224 UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick) 225 ) TYPE=InnoDB;''') 226 227 # 228 # Names and descriptions for multi-dimensional stats (vectors, etc.) 229 # are stored here instead of having their own entry in the statistics 230 # table. This allows all parts of a single stat to easily share a 231 # single id. 232 # 233 # COLUMNS: 234 # 'stat' is the unique stat identifier from the stat table. 235 # 'x' is the first dimension for multi-dimensional stats 236 # corresponding to the data table above. 237 # 'y' is the second dimension for multi-dimensional stats 238 # corresponding to the data table above. 239 # 'name' is the specific subname for the unique stat,x,y combination. 240 # 'descr' is the specific description for the uniqe stat,x,y 241 # combination. 242 # 243 # INDEXES: 244 # 'stat' is indexed so you can get the subdata for a specific stat. 245 # 246 self.query(''' 247 CREATE TABLE subdata( 248 sd_stat SMALLINT UNSIGNED NOT NULL, 249 sd_x SMALLINT NOT NULL, 250 sd_y SMALLINT NOT NULL, 251 sd_name VARCHAR(255) NOT NULL, 252 sd_descr TEXT, 253 UNIQUE (sd_stat,sd_x,sd_y) 254 ) TYPE=InnoDB''') 255 256 257 # 258 # The formula table is maintained separately from the data table 259 # because formula data, unlike other stat data cannot be represented 260 # there. 261 # 262 # COLUMNS: 263 # 'stat' refers to the stat field generated in the stat table. 264 # 'formula' is the actual string representation of the formula 265 # itself. 266 # 267 # INDEXES: 268 # 'stat' is indexed so that you can just look up a formula. 269 # 270 self.query(''' 271 CREATE TABLE formulas( 272 fm_stat SMALLINT UNSIGNED NOT NULL, 273 fm_formula BLOB NOT NULL, 274 PRIMARY KEY(fm_stat) 275 ) TYPE=InnoDB''') 276 277 # 278 # Each stat used in each formula is kept in this table. This way, if 279 # you want to print out a particular formula, you can simply find out 280 # which stats you need by looking in this table. Additionally, when 281 # you remove a stat from the stats table and data table, you remove 282 # any references to the formula in this table. When a formula is no 283 # longer referred to, you remove its entry. 284 # 285 # COLUMNS: 286 # 'stat' is the stat id from the stat table above. 287 # 'child' is the stat id of a stat that is used for this formula. 288 # There may be many children for any given 'stat' (formula) 289 # 290 # INDEXES: 291 # 'stat' is indexed so you can look up all of the children for a 292 # particular stat. 293 # 'child' is indexed so that you can remove an entry when a stat is 294 # removed. 295 # 296 self.query(''' 297 CREATE TABLE formula_ref( 298 fr_stat SMALLINT UNSIGNED NOT NULL, 299 fr_run SMALLINT UNSIGNED NOT NULL, 300 UNIQUE (fr_stat,fr_run), 301 INDEX (fr_stat), 302 INDEX (fr_run) 303 ) TYPE=InnoDB''') 304 305 # COLUMNS: 306 # 'event' is the unique event id from the event_desc table 307 # 'run' is simulation run id that this event took place in 308 # 'tick' is the tick when the event happened 309 # 310 # INDEXES: 311 # 'event' is indexed so you can look up all occurences of a 312 # specific event 313 # 'run' is indexed so you can find all events in a run 314 # 'tick' is indexed because we want the unique thing anyway 315 # 'event,run,tick' is unique combination 316 self.query(''' 317 CREATE TABLE events( 318 ev_event SMALLINT UNSIGNED NOT NULL, 319 ev_run SMALLINT UNSIGNED NOT NULL, 320 ev_tick BIGINT UNSIGNED NOT NULL, 321 INDEX(ev_event), 322 INDEX(ev_run), 323 INDEX(ev_tick), 324 UNIQUE(ev_event,ev_run,ev_tick) 325 ) TYPE=InnoDB''') 326 327 # COLUMNS: 328 # 'id' is the unique description id 329 # 'name' is the name of the event that occurred 330 # 331 # INDEXES: 332 # 'id' is indexed because it is the primary key and is what you use 333 # to look up the descriptions 334 # 'name' is indexed so one can find the event based on name 335 # 336 self.query(''' 337 CREATE TABLE event_names( 338 en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 339 en_name VARCHAR(255) NOT NULL, 340 PRIMARY KEY (en_id), 341 UNIQUE (en_name) 342 ) TYPE=InnoDB''') 343 344 def clean(self): 345 self.query(''' 346 DELETE data 347 FROM data 348 LEFT JOIN runs ON dt_run=rn_id 349 WHERE rn_id IS NULL''') 350 351 self.query(''' 352 DELETE formula_ref 353 FROM formula_ref 354 LEFT JOIN runs ON fr_run=rn_id 355 WHERE rn_id IS NULL''') 356 357 self.query(''' 358 DELETE formulas 359 FROM formulas 360 LEFT JOIN formula_ref ON fm_stat=fr_stat 361 WHERE fr_stat IS NULL''') 362 363 self.query(''' 364 DELETE stats 365 FROM stats 366 LEFT JOIN data ON st_id=dt_stat 367 WHERE dt_stat IS NULL''') 368 369 self.query(''' 370 DELETE subdata 371 FROM subdata 372 LEFT JOIN data ON sd_stat=dt_stat 373 WHERE dt_stat IS NULL''') 374 375 self.query(''' 376 DELETE events 377 FROM events 378 LEFT JOIN runs ON ev_run=rn_id 379 WHERE rn_id IS NULL''') 380 381 self.query(''' 382 DELETE event_names 383 FROM event_names 384 LEFT JOIN events ON en_id=ev_event 385 WHERE ev_event IS NULL''') 386