Cross Reference: /gem5/util/stats/dbinit.py

Deleted Added

sdiff udiff text old ( 1049:b175a798c8d4 ) new ( 1758:74acd5b23964 )

full compact

dbinit.py (1049:b175a798c8d4)	dbinit.py (1758:74acd5b23964)
	1 2# Copyright (c) 2003-2004 The Regents of The University of Michigan 3# All rights reserved. 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: redistributions of source code must retain the above copyright 8# notice, this list of conditions and the following disclaimer; 9# redistributions in binary form must reproduce the above copyright 10# notice, this list of conditions and the following disclaimer in the 11# documentation and/or other materials provided with the distribution; 12# neither the name of the copyright holders nor the names of its 13# contributors may be used to endorse or promote products derived from 14# this software without specific prior written permission. 15# 16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28#Permission is granted to use, copy, create derivative works and 29#redistribute this software and such derivative works for any purpose, 30#so long as the copyright notice above, this grant of permission, and 31#the disclaimer below appear in all copies made; and so long as the 32#name of The University of Michigan is not used in any advertising or 33#publicity pertaining to the use or distribution of this software 34#without specific, written prior authorization. 35# 36#THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION FROM THE 37#UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY PURPOSE, AND WITHOUT 38#WARRANTY BY THE UNIVERSITY OF MICHIGAN OF ANY KIND, EITHER EXPRESS OR 39#IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED WARRANTIES OF 40#MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE REGENTS OF 41#THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE FOR ANY DAMAGES, 42#INCLUDING DIRECT, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL 43#DAMAGES, WITH RESPECT TO ANY CLAIM ARISING OUT OF OR IN CONNECTION 44#WITH THE USE OF THE SOFTWARE, EVEN IF IT HAS BEEN OR IS HEREAFTER 45#ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 46
1import MySQLdb 2 3class MyDB(object): 4 def __init__(self, options): 5 self.name = options.db 6 self.host = options.host 7 self.user = options.user 8 self.passwd = options.passwd 9 self.mydb = None 10 self.cursor = None 11 12 def admin(self): 13 self.close() 14 self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user, 15 passwd=self.passwd) 16 self.cursor = self.mydb.cursor() 17 18 def connect(self): 19 self.close() 20 self.mydb = MySQLdb.connect(db=self.name, host=self.host, 21 user=self.user, passwd=self.passwd) 22 self.cursor = self.mydb.cursor() 23 24 def close(self): 25 if self.mydb is not None: 26 self.mydb.close() 27 self.cursor = None 28 29 def query(self, sql): 30 self.cursor.execute(sql) 31 32 def drop(self): 33 self.query('DROP DATABASE IF EXISTS %s' % self.name) 34 35 def create(self): 36 self.query('CREATE DATABASE %s' % self.name) 37 38 def populate(self): 39 # 40 # Each run (or simulation) gets its own entry in the runs table to 41 # group stats by where they were generated 42 # 43 # COLUMNS: 44 # 'id' is a unique identifier for each run to be used in other 45 # tables. 46 # 'name' is the user designated name for the data generated. It is 47 # configured in the simulator. 48 # 'user' identifies the user that generated the data for the given 49 # run. 50 # 'project' another name to identify runs for a specific goal 51 # 'date' is a timestamp for when the data was generated. It can be 52 # used to easily expire data that was generated in the past. 53 # 'expire' is a timestamp for when the data should be removed from 54 # the database so we don't have years worth of junk. 55 # 56 # INDEXES: 57 # 'run' is indexed so you can find out details of a run if the run 58 # was retreived from the data table. 59 # 'name' is indexed so that two all run names are forced to be unique 60 # 61 self.query(''' 62 CREATE TABLE runs( 63 rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 64 rn_name VARCHAR(200) NOT NULL, 65 rn_sample VARCHAR(32) NOT NULL, 66 rn_user VARCHAR(32) NOT NULL, 67 rn_project VARCHAR(100) NOT NULL, 68 rn_date TIMESTAMP NOT NULL, 69 rn_expire TIMESTAMP NOT NULL, 70 PRIMARY KEY (rn_id), 71 UNIQUE (rn_name,rn_sample) 72 ) TYPE=InnoDB''') 73 74 # 75 # We keep the bin names separate so that the data table doesn't get 76 # huge since bin names are frequently repeated. 77 # 78 # COLUMNS: 79 # 'id' is the unique bin identifer. 80 # 'name' is the string name for the bin. 81 # 82 # INDEXES: 83 # 'bin' is indexed to get the name of a bin when data is retrieved 84 # via the data table. 85 # 'name' is indexed to get the bin id for a named bin when you want 86 # to search the data table based on a specific bin. 87 # 88 self.query(''' 89 CREATE TABLE bins( 90 bn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 91 bn_name VARCHAR(255) NOT NULL, 92 PRIMARY KEY(bn_id), 93 UNIQUE (bn_name) 94 ) TYPE=InnoDB''') 95 96 # 97 # The stat table gives us all of the data for a particular stat. 98 # 99 # COLUMNS: 100 # 'stat' is a unique identifier for each stat to be used in other 101 # tables for references. 102 # 'name' is simply the simulator derived name for a given 103 # statistic. 104 # 'descr' is the description of the statistic and what it tells 105 # you. 106 # 'type' defines what the stat tells you. Types are: 107 # SCALAR: A simple scalar statistic that holds one value 108 # VECTOR: An array of statistic values. Such a something that 109 # is generated per-thread. Vectors exist to give averages, 110 # pdfs, cdfs, means, standard deviations, etc across the 111 # stat values. 112 # DIST: Is a distribution of data. When the statistic value is 113 # sampled, its value is counted in a particular bucket. 114 # Useful for keeping track of utilization of a resource. 115 # (e.g. fraction of time it is 25% used vs. 50% vs. 100%) 116 # VECTORDIST: Can be used when the distribution needs to be 117 # factored out into a per-thread distribution of data for 118 # example. It can still be summed across threads to find 119 # the total distribution. 120 # VECTOR2D: Can be used when you have a stat that is not only 121 # per-thread, but it is per-something else. Like 122 # per-message type. 123 # FORMULA: This statistic is a formula, and its data must be 124 # looked up in the formula table, for indicating how to 125 # present its values. 126 # 'subdata' is potentially used by any of the vector types to 127 # give a specific name to all of the data elements within a 128 # stat. 129 # 'print' indicates whether this stat should be printed ever. 130 # (Unnamed stats don't usually get printed) 131 # 'prereq' only print the stat if the prereq is not zero. 132 # 'prec' number of decimal places to print 133 # 'nozero' don't print zero values 134 # 'nonan' don't print NaN values 135 # 'total' for vector type stats, print the total. 136 # 'pdf' for vector type stats, print the pdf. 137 # 'cdf' for vector type stats, print the cdf. 138 # 139 # The Following are for dist type stats: 140 # 'min' is the minimum bucket value. Anything less is an underflow. 141 # 'max' is the maximum bucket value. Anything more is an overflow. 142 # 'bktsize' is the approximate number of entries in each bucket. 143 # 'size' is the number of buckets. equal to (min/max)/bktsize. 144 # 145 # INDEXES: 146 # 'stat' is indexed so that you can find out details about a stat 147 # if the stat id was retrieved from the data table. 148 # 'name' is indexed so that you can simply look up data about a 149 # named stat. 150 # 151 self.query(''' 152 CREATE TABLE stats( 153 st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 154 st_name VARCHAR(255) NOT NULL, 155 st_descr TEXT NOT NULL, 156 st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST", 157 "VECTOR2D", "FORMULA") NOT NULL, 158 st_print BOOL NOT NULL, 159 st_prereq SMALLINT UNSIGNED NOT NULL, 160 st_prec TINYINT NOT NULL, 161 st_nozero BOOL NOT NULL, 162 st_nonan BOOL NOT NULL, 163 st_total BOOL NOT NULL, 164 st_pdf BOOL NOT NULL, 165 st_cdf BOOL NOT NULL, 166 st_min DOUBLE NOT NULL, 167 st_max DOUBLE NOT NULL, 168 st_bktsize DOUBLE NOT NULL, 169 st_size SMALLINT UNSIGNED NOT NULL, 170 PRIMARY KEY (st_id), 171 UNIQUE (st_name) 172 ) TYPE=InnoDB''') 173 174 # 175 # This is the main table of data for stats. 176 # 177 # COLUMNS: 178 # 'stat' refers to the stat field given in the stat table. 179 # 180 # 'x' referrs to the first dimension of a multi-dimensional stat. For 181 # a vector, x will start at 0 and increase for each vector 182 # element. 183 # For a distribution: 184 # -1: sum (for calculating standard deviation) 185 # -2: sum of squares (for calculating standard deviation) 186 # -3: total number of samples taken (for calculating 187 # standard deviation) 188 # -4: minimum value 189 # -5: maximum value 190 # -6: underflow 191 # -7: overflow 192 # 'y' is used by a VECTORDIST and the VECTOR2D to describe the second 193 # dimension. 194 # 'run' is the run that the data was generated from. Details up in 195 # the run table 196 # 'tick' is a timestamp generated by the simulator. 197 # 'bin' is the name of the bin that the data was generated in, if 198 # any. 199 # 'data' is the actual stat value. 200 # 201 # INDEXES: 202 # 'stat' is indexed so that a user can find all of the data for a 203 # particular stat. It is not unique, because that specific stat 204 # can be found in many runs, bins, and samples, in addition to 205 # having entries for the mulidimensional cases. 206 # 'run' is indexed to allow a user to remove all of the data for a 207 # particular execution run. It can also be used to allow the 208 # user to print out all of the data for a given run. 209 # 210 self.query(''' 211 CREATE TABLE data( 212 dt_stat SMALLINT UNSIGNED NOT NULL, 213 dt_x SMALLINT NOT NULL, 214 dt_y SMALLINT NOT NULL, 215 dt_run SMALLINT UNSIGNED NOT NULL, 216 dt_tick BIGINT UNSIGNED NOT NULL, 217 dt_bin SMALLINT UNSIGNED NOT NULL, 218 dt_data DOUBLE NOT NULL, 219 INDEX (dt_stat), 220 INDEX (dt_run), 221 UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin) 222 ) TYPE=InnoDB;''') 223 224 # 225 # Names and descriptions for multi-dimensional stats (vectors, etc.) 226 # are stored here instead of having their own entry in the statistics 227 # table. This allows all parts of a single stat to easily share a 228 # single id. 229 # 230 # COLUMNS: 231 # 'stat' is the unique stat identifier from the stat table. 232 # 'x' is the first dimension for multi-dimensional stats 233 # corresponding to the data table above. 234 # 'y' is the second dimension for multi-dimensional stats 235 # corresponding to the data table above. 236 # 'name' is the specific subname for the unique stat,x,y combination. 237 # 'descr' is the specific description for the uniqe stat,x,y 238 # combination. 239 # 240 # INDEXES: 241 # 'stat' is indexed so you can get the subdata for a specific stat. 242 # 243 self.query(''' 244 CREATE TABLE subdata( 245 sd_stat SMALLINT UNSIGNED NOT NULL, 246 sd_x SMALLINT NOT NULL, 247 sd_y SMALLINT NOT NULL, 248 sd_name VARCHAR(255) NOT NULL, 249 sd_descr TEXT, 250 UNIQUE (sd_stat,sd_x,sd_y) 251 ) TYPE=InnoDB''') 252 253 254 # 255 # The formula table is maintained separately from the data table 256 # because formula data, unlike other stat data cannot be represented 257 # there. 258 # 259 # COLUMNS: 260 # 'stat' refers to the stat field generated in the stat table. 261 # 'formula' is the actual string representation of the formula 262 # itself. 263 # 264 # INDEXES: 265 # 'stat' is indexed so that you can just look up a formula. 266 # 267 self.query(''' 268 CREATE TABLE formulas( 269 fm_stat SMALLINT UNSIGNED NOT NULL, 270 fm_formula BLOB NOT NULL, 271 PRIMARY KEY(fm_stat) 272 ) TYPE=InnoDB''') 273 274 # 275 # Each stat used in each formula is kept in this table. This way, if 276 # you want to print out a particular formula, you can simply find out 277 # which stats you need by looking in this table. Additionally, when 278 # you remove a stat from the stats table and data table, you remove 279 # any references to the formula in this table. When a formula is no 280 # longer referred to, you remove its entry. 281 # 282 # COLUMNS: 283 # 'stat' is the stat id from the stat table above. 284 # 'child' is the stat id of a stat that is used for this formula. 285 # There may be many children for any given 'stat' (formula) 286 # 287 # INDEXES: 288 # 'stat' is indexed so you can look up all of the children for a 289 # particular stat. 290 # 'child' is indexed so that you can remove an entry when a stat is 291 # removed. 292 # 293 self.query(''' 294 CREATE TABLE formula_ref( 295 fr_stat SMALLINT UNSIGNED NOT NULL, 296 fr_run SMALLINT UNSIGNED NOT NULL, 297 UNIQUE (fr_stat,fr_run), 298 INDEX (fr_stat), 299 INDEX (fr_run) 300 ) TYPE=InnoDB''') 301 302 # COLUMNS: 303 # 'event' is the unique event id from the event_desc table 304 # 'run' is simulation run id that this event took place in 305 # 'tick' is the tick when the event happened 306 # 307 # INDEXES: 308 # 'event' is indexed so you can look up all occurences of a 309 # specific event 310 # 'run' is indexed so you can find all events in a run 311 # 'tick' is indexed because we want the unique thing anyway 312 # 'event,run,tick' is unique combination 313 self.query(''' 314 CREATE TABLE events( 315 ev_event SMALLINT UNSIGNED NOT NULL, 316 ev_run SMALLINT UNSIGNED NOT NULL, 317 ev_tick BIGINT UNSIGNED NOT NULL, 318 INDEX(ev_event), 319 INDEX(ev_run), 320 INDEX(ev_tick), 321 UNIQUE(ev_event,ev_run,ev_tick) 322 ) TYPE=InnoDB''') 323 324 # COLUMNS: 325 # 'id' is the unique description id 326 # 'name' is the name of the event that occurred 327 # 328 # INDEXES: 329 # 'id' is indexed because it is the primary key and is what you use 330 # to look up the descriptions 331 # 'name' is indexed so one can find the event based on name 332 # 333 self.query(''' 334 CREATE TABLE event_names( 335 en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 336 en_name VARCHAR(255) NOT NULL, 337 PRIMARY KEY (en_id), 338 UNIQUE (en_name) 339 ) TYPE=InnoDB''') 340 341 def clean(self): 342 self.query(''' 343 DELETE data 344 FROM data 345 LEFT JOIN runs ON dt_run=rn_id 346 WHERE rn_id IS NULL''') 347 348 self.query(''' 349 DELETE formula_ref 350 FROM formula_ref 351 LEFT JOIN runs ON fr_run=rn_id 352 WHERE rn_id IS NULL''') 353 354 self.query(''' 355 DELETE formulas 356 FROM formulas 357 LEFT JOIN formula_ref ON fm_stat=fr_stat 358 WHERE fr_stat IS NULL''') 359 360 self.query(''' 361 DELETE stats 362 FROM stats 363 LEFT JOIN data ON st_id=dt_stat 364 WHERE dt_stat IS NULL''') 365 366 self.query(''' 367 DELETE subdata 368 FROM subdata 369 LEFT JOIN data ON sd_stat=dt_stat 370 WHERE dt_stat IS NULL''') 371 372 self.query(''' 373 DELETE bins 374 FROM bins 375 LEFT JOIN data ON bn_id=dt_bin 376 WHERE dt_bin IS NULL''') 377 378 self.query(''' 379 DELETE events 380 FROM events 381 LEFT JOIN runs ON ev_run=rn_id 382 WHERE rn_id IS NULL''') 383 384 self.query(''' 385 DELETE event_names 386 FROM event_names 387 LEFT JOIN events ON en_id=ev_event 388 WHERE ev_event IS NULL''')	47import MySQLdb 48 49class MyDB(object): 50 def __init__(self, options): 51 self.name = options.db 52 self.host = options.host 53 self.user = options.user 54 self.passwd = options.passwd 55 self.mydb = None 56 self.cursor = None 57 58 def admin(self): 59 self.close() 60 self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user, 61 passwd=self.passwd) 62 self.cursor = self.mydb.cursor() 63 64 def connect(self): 65 self.close() 66 self.mydb = MySQLdb.connect(db=self.name, host=self.host, 67 user=self.user, passwd=self.passwd) 68 self.cursor = self.mydb.cursor() 69 70 def close(self): 71 if self.mydb is not None: 72 self.mydb.close() 73 self.cursor = None 74 75 def query(self, sql): 76 self.cursor.execute(sql) 77 78 def drop(self): 79 self.query('DROP DATABASE IF EXISTS %s' % self.name) 80 81 def create(self): 82 self.query('CREATE DATABASE %s' % self.name) 83 84 def populate(self): 85 # 86 # Each run (or simulation) gets its own entry in the runs table to 87 # group stats by where they were generated 88 # 89 # COLUMNS: 90 # 'id' is a unique identifier for each run to be used in other 91 # tables. 92 # 'name' is the user designated name for the data generated. It is 93 # configured in the simulator. 94 # 'user' identifies the user that generated the data for the given 95 # run. 96 # 'project' another name to identify runs for a specific goal 97 # 'date' is a timestamp for when the data was generated. It can be 98 # used to easily expire data that was generated in the past. 99 # 'expire' is a timestamp for when the data should be removed from 100 # the database so we don't have years worth of junk. 101 # 102 # INDEXES: 103 # 'run' is indexed so you can find out details of a run if the run 104 # was retreived from the data table. 105 # 'name' is indexed so that two all run names are forced to be unique 106 # 107 self.query(''' 108 CREATE TABLE runs( 109 rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 110 rn_name VARCHAR(200) NOT NULL, 111 rn_sample VARCHAR(32) NOT NULL, 112 rn_user VARCHAR(32) NOT NULL, 113 rn_project VARCHAR(100) NOT NULL, 114 rn_date TIMESTAMP NOT NULL, 115 rn_expire TIMESTAMP NOT NULL, 116 PRIMARY KEY (rn_id), 117 UNIQUE (rn_name,rn_sample) 118 ) TYPE=InnoDB''') 119 120 # 121 # We keep the bin names separate so that the data table doesn't get 122 # huge since bin names are frequently repeated. 123 # 124 # COLUMNS: 125 # 'id' is the unique bin identifer. 126 # 'name' is the string name for the bin. 127 # 128 # INDEXES: 129 # 'bin' is indexed to get the name of a bin when data is retrieved 130 # via the data table. 131 # 'name' is indexed to get the bin id for a named bin when you want 132 # to search the data table based on a specific bin. 133 # 134 self.query(''' 135 CREATE TABLE bins( 136 bn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 137 bn_name VARCHAR(255) NOT NULL, 138 PRIMARY KEY(bn_id), 139 UNIQUE (bn_name) 140 ) TYPE=InnoDB''') 141 142 # 143 # The stat table gives us all of the data for a particular stat. 144 # 145 # COLUMNS: 146 # 'stat' is a unique identifier for each stat to be used in other 147 # tables for references. 148 # 'name' is simply the simulator derived name for a given 149 # statistic. 150 # 'descr' is the description of the statistic and what it tells 151 # you. 152 # 'type' defines what the stat tells you. Types are: 153 # SCALAR: A simple scalar statistic that holds one value 154 # VECTOR: An array of statistic values. Such a something that 155 # is generated per-thread. Vectors exist to give averages, 156 # pdfs, cdfs, means, standard deviations, etc across the 157 # stat values. 158 # DIST: Is a distribution of data. When the statistic value is 159 # sampled, its value is counted in a particular bucket. 160 # Useful for keeping track of utilization of a resource. 161 # (e.g. fraction of time it is 25% used vs. 50% vs. 100%) 162 # VECTORDIST: Can be used when the distribution needs to be 163 # factored out into a per-thread distribution of data for 164 # example. It can still be summed across threads to find 165 # the total distribution. 166 # VECTOR2D: Can be used when you have a stat that is not only 167 # per-thread, but it is per-something else. Like 168 # per-message type. 169 # FORMULA: This statistic is a formula, and its data must be 170 # looked up in the formula table, for indicating how to 171 # present its values. 172 # 'subdata' is potentially used by any of the vector types to 173 # give a specific name to all of the data elements within a 174 # stat. 175 # 'print' indicates whether this stat should be printed ever. 176 # (Unnamed stats don't usually get printed) 177 # 'prereq' only print the stat if the prereq is not zero. 178 # 'prec' number of decimal places to print 179 # 'nozero' don't print zero values 180 # 'nonan' don't print NaN values 181 # 'total' for vector type stats, print the total. 182 # 'pdf' for vector type stats, print the pdf. 183 # 'cdf' for vector type stats, print the cdf. 184 # 185 # The Following are for dist type stats: 186 # 'min' is the minimum bucket value. Anything less is an underflow. 187 # 'max' is the maximum bucket value. Anything more is an overflow. 188 # 'bktsize' is the approximate number of entries in each bucket. 189 # 'size' is the number of buckets. equal to (min/max)/bktsize. 190 # 191 # INDEXES: 192 # 'stat' is indexed so that you can find out details about a stat 193 # if the stat id was retrieved from the data table. 194 # 'name' is indexed so that you can simply look up data about a 195 # named stat. 196 # 197 self.query(''' 198 CREATE TABLE stats( 199 st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 200 st_name VARCHAR(255) NOT NULL, 201 st_descr TEXT NOT NULL, 202 st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST", 203 "VECTOR2D", "FORMULA") NOT NULL, 204 st_print BOOL NOT NULL, 205 st_prereq SMALLINT UNSIGNED NOT NULL, 206 st_prec TINYINT NOT NULL, 207 st_nozero BOOL NOT NULL, 208 st_nonan BOOL NOT NULL, 209 st_total BOOL NOT NULL, 210 st_pdf BOOL NOT NULL, 211 st_cdf BOOL NOT NULL, 212 st_min DOUBLE NOT NULL, 213 st_max DOUBLE NOT NULL, 214 st_bktsize DOUBLE NOT NULL, 215 st_size SMALLINT UNSIGNED NOT NULL, 216 PRIMARY KEY (st_id), 217 UNIQUE (st_name) 218 ) TYPE=InnoDB''') 219 220 # 221 # This is the main table of data for stats. 222 # 223 # COLUMNS: 224 # 'stat' refers to the stat field given in the stat table. 225 # 226 # 'x' referrs to the first dimension of a multi-dimensional stat. For 227 # a vector, x will start at 0 and increase for each vector 228 # element. 229 # For a distribution: 230 # -1: sum (for calculating standard deviation) 231 # -2: sum of squares (for calculating standard deviation) 232 # -3: total number of samples taken (for calculating 233 # standard deviation) 234 # -4: minimum value 235 # -5: maximum value 236 # -6: underflow 237 # -7: overflow 238 # 'y' is used by a VECTORDIST and the VECTOR2D to describe the second 239 # dimension. 240 # 'run' is the run that the data was generated from. Details up in 241 # the run table 242 # 'tick' is a timestamp generated by the simulator. 243 # 'bin' is the name of the bin that the data was generated in, if 244 # any. 245 # 'data' is the actual stat value. 246 # 247 # INDEXES: 248 # 'stat' is indexed so that a user can find all of the data for a 249 # particular stat. It is not unique, because that specific stat 250 # can be found in many runs, bins, and samples, in addition to 251 # having entries for the mulidimensional cases. 252 # 'run' is indexed to allow a user to remove all of the data for a 253 # particular execution run. It can also be used to allow the 254 # user to print out all of the data for a given run. 255 # 256 self.query(''' 257 CREATE TABLE data( 258 dt_stat SMALLINT UNSIGNED NOT NULL, 259 dt_x SMALLINT NOT NULL, 260 dt_y SMALLINT NOT NULL, 261 dt_run SMALLINT UNSIGNED NOT NULL, 262 dt_tick BIGINT UNSIGNED NOT NULL, 263 dt_bin SMALLINT UNSIGNED NOT NULL, 264 dt_data DOUBLE NOT NULL, 265 INDEX (dt_stat), 266 INDEX (dt_run), 267 UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin) 268 ) TYPE=InnoDB;''') 269 270 # 271 # Names and descriptions for multi-dimensional stats (vectors, etc.) 272 # are stored here instead of having their own entry in the statistics 273 # table. This allows all parts of a single stat to easily share a 274 # single id. 275 # 276 # COLUMNS: 277 # 'stat' is the unique stat identifier from the stat table. 278 # 'x' is the first dimension for multi-dimensional stats 279 # corresponding to the data table above. 280 # 'y' is the second dimension for multi-dimensional stats 281 # corresponding to the data table above. 282 # 'name' is the specific subname for the unique stat,x,y combination. 283 # 'descr' is the specific description for the uniqe stat,x,y 284 # combination. 285 # 286 # INDEXES: 287 # 'stat' is indexed so you can get the subdata for a specific stat. 288 # 289 self.query(''' 290 CREATE TABLE subdata( 291 sd_stat SMALLINT UNSIGNED NOT NULL, 292 sd_x SMALLINT NOT NULL, 293 sd_y SMALLINT NOT NULL, 294 sd_name VARCHAR(255) NOT NULL, 295 sd_descr TEXT, 296 UNIQUE (sd_stat,sd_x,sd_y) 297 ) TYPE=InnoDB''') 298 299 300 # 301 # The formula table is maintained separately from the data table 302 # because formula data, unlike other stat data cannot be represented 303 # there. 304 # 305 # COLUMNS: 306 # 'stat' refers to the stat field generated in the stat table. 307 # 'formula' is the actual string representation of the formula 308 # itself. 309 # 310 # INDEXES: 311 # 'stat' is indexed so that you can just look up a formula. 312 # 313 self.query(''' 314 CREATE TABLE formulas( 315 fm_stat SMALLINT UNSIGNED NOT NULL, 316 fm_formula BLOB NOT NULL, 317 PRIMARY KEY(fm_stat) 318 ) TYPE=InnoDB''') 319 320 # 321 # Each stat used in each formula is kept in this table. This way, if 322 # you want to print out a particular formula, you can simply find out 323 # which stats you need by looking in this table. Additionally, when 324 # you remove a stat from the stats table and data table, you remove 325 # any references to the formula in this table. When a formula is no 326 # longer referred to, you remove its entry. 327 # 328 # COLUMNS: 329 # 'stat' is the stat id from the stat table above. 330 # 'child' is the stat id of a stat that is used for this formula. 331 # There may be many children for any given 'stat' (formula) 332 # 333 # INDEXES: 334 # 'stat' is indexed so you can look up all of the children for a 335 # particular stat. 336 # 'child' is indexed so that you can remove an entry when a stat is 337 # removed. 338 # 339 self.query(''' 340 CREATE TABLE formula_ref( 341 fr_stat SMALLINT UNSIGNED NOT NULL, 342 fr_run SMALLINT UNSIGNED NOT NULL, 343 UNIQUE (fr_stat,fr_run), 344 INDEX (fr_stat), 345 INDEX (fr_run) 346 ) TYPE=InnoDB''') 347 348 # COLUMNS: 349 # 'event' is the unique event id from the event_desc table 350 # 'run' is simulation run id that this event took place in 351 # 'tick' is the tick when the event happened 352 # 353 # INDEXES: 354 # 'event' is indexed so you can look up all occurences of a 355 # specific event 356 # 'run' is indexed so you can find all events in a run 357 # 'tick' is indexed because we want the unique thing anyway 358 # 'event,run,tick' is unique combination 359 self.query(''' 360 CREATE TABLE events( 361 ev_event SMALLINT UNSIGNED NOT NULL, 362 ev_run SMALLINT UNSIGNED NOT NULL, 363 ev_tick BIGINT UNSIGNED NOT NULL, 364 INDEX(ev_event), 365 INDEX(ev_run), 366 INDEX(ev_tick), 367 UNIQUE(ev_event,ev_run,ev_tick) 368 ) TYPE=InnoDB''') 369 370 # COLUMNS: 371 # 'id' is the unique description id 372 # 'name' is the name of the event that occurred 373 # 374 # INDEXES: 375 # 'id' is indexed because it is the primary key and is what you use 376 # to look up the descriptions 377 # 'name' is indexed so one can find the event based on name 378 # 379 self.query(''' 380 CREATE TABLE event_names( 381 en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 382 en_name VARCHAR(255) NOT NULL, 383 PRIMARY KEY (en_id), 384 UNIQUE (en_name) 385 ) TYPE=InnoDB''') 386 387 def clean(self): 388 self.query(''' 389 DELETE data 390 FROM data 391 LEFT JOIN runs ON dt_run=rn_id 392 WHERE rn_id IS NULL''') 393 394 self.query(''' 395 DELETE formula_ref 396 FROM formula_ref 397 LEFT JOIN runs ON fr_run=rn_id 398 WHERE rn_id IS NULL''') 399 400 self.query(''' 401 DELETE formulas 402 FROM formulas 403 LEFT JOIN formula_ref ON fm_stat=fr_stat 404 WHERE fr_stat IS NULL''') 405 406 self.query(''' 407 DELETE stats 408 FROM stats 409 LEFT JOIN data ON st_id=dt_stat 410 WHERE dt_stat IS NULL''') 411 412 self.query(''' 413 DELETE subdata 414 FROM subdata 415 LEFT JOIN data ON sd_stat=dt_stat 416 WHERE dt_stat IS NULL''') 417 418 self.query(''' 419 DELETE bins 420 FROM bins 421 LEFT JOIN data ON bn_id=dt_bin 422 WHERE dt_bin IS NULL''') 423 424 self.query(''' 425 DELETE events 426 FROM events 427 LEFT JOIN runs ON ev_run=rn_id 428 WHERE rn_id IS NULL''') 429 430 self.query(''' 431 DELETE event_names 432 FROM event_names 433 LEFT JOIN events ON en_id=ev_event 434 WHERE ev_event IS NULL''')

dbinit.py (1049:b175a798c8d4)

dbinit.py (1758:74acd5b23964)

1
2# Copyright (c) 2003-2004 The Regents of The University of Michigan
3# All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met: redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer;
9# redistributions in binary form must reproduce the above copyright
10# notice, this list of conditions and the following disclaimer in the
11# documentation and/or other materials provided with the distribution;
12# neither the name of the copyright holders nor the names of its
13# contributors may be used to endorse or promote products derived from
14# this software without specific prior written permission.
15#
16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#Permission is granted to use, copy, create derivative works and
29#redistribute this software and such derivative works for any purpose,
30#so long as the copyright notice above, this grant of permission, and
31#the disclaimer below appear in all copies made; and so long as the
32#name of The University of Michigan is not used in any advertising or
33#publicity pertaining to the use or distribution of this software
34#without specific, written prior authorization.
35#
36#THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION FROM THE
37#UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY PURPOSE, AND WITHOUT
38#WARRANTY BY THE UNIVERSITY OF MICHIGAN OF ANY KIND, EITHER EXPRESS OR
39#IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
40#MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE REGENTS OF
41#THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE FOR ANY DAMAGES,
42#INCLUDING DIRECT, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
43#DAMAGES, WITH RESPECT TO ANY CLAIM ARISING OUT OF OR IN CONNECTION
44#WITH THE USE OF THE SOFTWARE, EVEN IF IT HAS BEEN OR IS HEREAFTER
45#ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
46

1import MySQLdb
2
3class MyDB(object):
4 def __init__(self, options):
5 self.name = options.db
6 self.host = options.host
7 self.user = options.user
8 self.passwd = options.passwd
9 self.mydb = None
10 self.cursor = None
11
12 def admin(self):
13 self.close()
14 self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,
15 passwd=self.passwd)
16 self.cursor = self.mydb.cursor()
17
18 def connect(self):
19 self.close()
20 self.mydb = MySQLdb.connect(db=self.name, host=self.host,
21 user=self.user, passwd=self.passwd)
22 self.cursor = self.mydb.cursor()
23
24 def close(self):
25 if self.mydb is not None:
26 self.mydb.close()
27 self.cursor = None
28
29 def query(self, sql):
30 self.cursor.execute(sql)
31
32 def drop(self):
33 self.query('DROP DATABASE IF EXISTS %s' % self.name)
34
35 def create(self):
36 self.query('CREATE DATABASE %s' % self.name)
37
38 def populate(self):
39 #
40 # Each run (or simulation) gets its own entry in the runs table to
41 # group stats by where they were generated
42 #
43 # COLUMNS:
44 # 'id' is a unique identifier for each run to be used in other
45 # tables.
46 # 'name' is the user designated name for the data generated. It is
47 # configured in the simulator.
48 # 'user' identifies the user that generated the data for the given
49 # run.
50 # 'project' another name to identify runs for a specific goal
51 # 'date' is a timestamp for when the data was generated. It can be
52 # used to easily expire data that was generated in the past.
53 # 'expire' is a timestamp for when the data should be removed from
54 # the database so we don't have years worth of junk.
55 #
56 # INDEXES:
57 # 'run' is indexed so you can find out details of a run if the run
58 # was retreived from the data table.
59 # 'name' is indexed so that two all run names are forced to be unique
60 #
61 self.query('''
62 CREATE TABLE runs(
63 rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
64 rn_name VARCHAR(200) NOT NULL,
65 rn_sample VARCHAR(32) NOT NULL,
66 rn_user VARCHAR(32) NOT NULL,
67 rn_project VARCHAR(100) NOT NULL,
68 rn_date TIMESTAMP NOT NULL,
69 rn_expire TIMESTAMP NOT NULL,
70 PRIMARY KEY (rn_id),
71 UNIQUE (rn_name,rn_sample)
72 ) TYPE=InnoDB''')
73
74 #
75 # We keep the bin names separate so that the data table doesn't get
76 # huge since bin names are frequently repeated.
77 #
78 # COLUMNS:
79 # 'id' is the unique bin identifer.
80 # 'name' is the string name for the bin.
81 #
82 # INDEXES:
83 # 'bin' is indexed to get the name of a bin when data is retrieved
84 # via the data table.
85 # 'name' is indexed to get the bin id for a named bin when you want
86 # to search the data table based on a specific bin.
87 #
88 self.query('''
89 CREATE TABLE bins(
90 bn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
91 bn_name VARCHAR(255) NOT NULL,
92 PRIMARY KEY(bn_id),
93 UNIQUE (bn_name)
94 ) TYPE=InnoDB''')
95
96 #
97 # The stat table gives us all of the data for a particular stat.
98 #
99 # COLUMNS:
100 # 'stat' is a unique identifier for each stat to be used in other
101 # tables for references.
102 # 'name' is simply the simulator derived name for a given
103 # statistic.
104 # 'descr' is the description of the statistic and what it tells
105 # you.
106 # 'type' defines what the stat tells you. Types are:
107 # SCALAR: A simple scalar statistic that holds one value
108 # VECTOR: An array of statistic values. Such a something that
109 # is generated per-thread. Vectors exist to give averages,
110 # pdfs, cdfs, means, standard deviations, etc across the
111 # stat values.
112 # DIST: Is a distribution of data. When the statistic value is
113 # sampled, its value is counted in a particular bucket.
114 # Useful for keeping track of utilization of a resource.
115 # (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
116 # VECTORDIST: Can be used when the distribution needs to be
117 # factored out into a per-thread distribution of data for
118 # example. It can still be summed across threads to find
119 # the total distribution.
120 # VECTOR2D: Can be used when you have a stat that is not only
121 # per-thread, but it is per-something else. Like
122 # per-message type.
123 # FORMULA: This statistic is a formula, and its data must be
124 # looked up in the formula table, for indicating how to
125 # present its values.
126 # 'subdata' is potentially used by any of the vector types to
127 # give a specific name to all of the data elements within a
128 # stat.
129 # 'print' indicates whether this stat should be printed ever.
130 # (Unnamed stats don't usually get printed)
131 # 'prereq' only print the stat if the prereq is not zero.
132 # 'prec' number of decimal places to print
133 # 'nozero' don't print zero values
134 # 'nonan' don't print NaN values
135 # 'total' for vector type stats, print the total.
136 # 'pdf' for vector type stats, print the pdf.
137 # 'cdf' for vector type stats, print the cdf.
138 #
139 # The Following are for dist type stats:
140 # 'min' is the minimum bucket value. Anything less is an underflow.
141 # 'max' is the maximum bucket value. Anything more is an overflow.
142 # 'bktsize' is the approximate number of entries in each bucket.
143 # 'size' is the number of buckets. equal to (min/max)/bktsize.
144 #
145 # INDEXES:
146 # 'stat' is indexed so that you can find out details about a stat
147 # if the stat id was retrieved from the data table.
148 # 'name' is indexed so that you can simply look up data about a
149 # named stat.
150 #
151 self.query('''
152 CREATE TABLE stats(
153 st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
154 st_name VARCHAR(255) NOT NULL,
155 st_descr TEXT NOT NULL,
156 st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
157 "VECTOR2D", "FORMULA") NOT NULL,
158 st_print BOOL NOT NULL,
159 st_prereq SMALLINT UNSIGNED NOT NULL,
160 st_prec TINYINT NOT NULL,
161 st_nozero BOOL NOT NULL,
162 st_nonan BOOL NOT NULL,
163 st_total BOOL NOT NULL,
164 st_pdf BOOL NOT NULL,
165 st_cdf BOOL NOT NULL,
166 st_min DOUBLE NOT NULL,
167 st_max DOUBLE NOT NULL,
168 st_bktsize DOUBLE NOT NULL,
169 st_size SMALLINT UNSIGNED NOT NULL,
170 PRIMARY KEY (st_id),
171 UNIQUE (st_name)
172 ) TYPE=InnoDB''')
173
174 #
175 # This is the main table of data for stats.
176 #
177 # COLUMNS:
178 # 'stat' refers to the stat field given in the stat table.
179 #
180 # 'x' referrs to the first dimension of a multi-dimensional stat. For
181 # a vector, x will start at 0 and increase for each vector
182 # element.
183 # For a distribution:
184 # -1: sum (for calculating standard deviation)
185 # -2: sum of squares (for calculating standard deviation)
186 # -3: total number of samples taken (for calculating
187 # standard deviation)
188 # -4: minimum value
189 # -5: maximum value
190 # -6: underflow
191 # -7: overflow
192 # 'y' is used by a VECTORDIST and the VECTOR2D to describe the second
193 # dimension.
194 # 'run' is the run that the data was generated from. Details up in
195 # the run table
196 # 'tick' is a timestamp generated by the simulator.
197 # 'bin' is the name of the bin that the data was generated in, if
198 # any.
199 # 'data' is the actual stat value.
200 #
201 # INDEXES:
202 # 'stat' is indexed so that a user can find all of the data for a
203 # particular stat. It is not unique, because that specific stat
204 # can be found in many runs, bins, and samples, in addition to
205 # having entries for the mulidimensional cases.
206 # 'run' is indexed to allow a user to remove all of the data for a
207 # particular execution run. It can also be used to allow the
208 # user to print out all of the data for a given run.
209 #
210 self.query('''
211 CREATE TABLE data(
212 dt_stat SMALLINT UNSIGNED NOT NULL,
213 dt_x SMALLINT NOT NULL,
214 dt_y SMALLINT NOT NULL,
215 dt_run SMALLINT UNSIGNED NOT NULL,
216 dt_tick BIGINT UNSIGNED NOT NULL,
217 dt_bin SMALLINT UNSIGNED NOT NULL,
218 dt_data DOUBLE NOT NULL,
219 INDEX (dt_stat),
220 INDEX (dt_run),
221 UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin)
222 ) TYPE=InnoDB;''')
223
224 #
225 # Names and descriptions for multi-dimensional stats (vectors, etc.)
226 # are stored here instead of having their own entry in the statistics
227 # table. This allows all parts of a single stat to easily share a
228 # single id.
229 #
230 # COLUMNS:
231 # 'stat' is the unique stat identifier from the stat table.
232 # 'x' is the first dimension for multi-dimensional stats
233 # corresponding to the data table above.
234 # 'y' is the second dimension for multi-dimensional stats
235 # corresponding to the data table above.
236 # 'name' is the specific subname for the unique stat,x,y combination.
237 # 'descr' is the specific description for the uniqe stat,x,y
238 # combination.
239 #
240 # INDEXES:
241 # 'stat' is indexed so you can get the subdata for a specific stat.
242 #
243 self.query('''
244 CREATE TABLE subdata(
245 sd_stat SMALLINT UNSIGNED NOT NULL,
246 sd_x SMALLINT NOT NULL,
247 sd_y SMALLINT NOT NULL,
248 sd_name VARCHAR(255) NOT NULL,
249 sd_descr TEXT,
250 UNIQUE (sd_stat,sd_x,sd_y)
251 ) TYPE=InnoDB''')
252
253
254 #
255 # The formula table is maintained separately from the data table
256 # because formula data, unlike other stat data cannot be represented
257 # there.
258 #
259 # COLUMNS:
260 # 'stat' refers to the stat field generated in the stat table.
261 # 'formula' is the actual string representation of the formula
262 # itself.
263 #
264 # INDEXES:
265 # 'stat' is indexed so that you can just look up a formula.
266 #
267 self.query('''
268 CREATE TABLE formulas(
269 fm_stat SMALLINT UNSIGNED NOT NULL,
270 fm_formula BLOB NOT NULL,
271 PRIMARY KEY(fm_stat)
272 ) TYPE=InnoDB''')
273
274 #
275 # Each stat used in each formula is kept in this table. This way, if
276 # you want to print out a particular formula, you can simply find out
277 # which stats you need by looking in this table. Additionally, when
278 # you remove a stat from the stats table and data table, you remove
279 # any references to the formula in this table. When a formula is no
280 # longer referred to, you remove its entry.
281 #
282 # COLUMNS:
283 # 'stat' is the stat id from the stat table above.
284 # 'child' is the stat id of a stat that is used for this formula.
285 # There may be many children for any given 'stat' (formula)
286 #
287 # INDEXES:
288 # 'stat' is indexed so you can look up all of the children for a
289 # particular stat.
290 # 'child' is indexed so that you can remove an entry when a stat is
291 # removed.
292 #
293 self.query('''
294 CREATE TABLE formula_ref(
295 fr_stat SMALLINT UNSIGNED NOT NULL,
296 fr_run SMALLINT UNSIGNED NOT NULL,
297 UNIQUE (fr_stat,fr_run),
298 INDEX (fr_stat),
299 INDEX (fr_run)
300 ) TYPE=InnoDB''')
301
302 # COLUMNS:
303 # 'event' is the unique event id from the event_desc table
304 # 'run' is simulation run id that this event took place in
305 # 'tick' is the tick when the event happened
306 #
307 # INDEXES:
308 # 'event' is indexed so you can look up all occurences of a
309 # specific event
310 # 'run' is indexed so you can find all events in a run
311 # 'tick' is indexed because we want the unique thing anyway
312 # 'event,run,tick' is unique combination
313 self.query('''
314 CREATE TABLE events(
315 ev_event SMALLINT UNSIGNED NOT NULL,
316 ev_run SMALLINT UNSIGNED NOT NULL,
317 ev_tick BIGINT UNSIGNED NOT NULL,
318 INDEX(ev_event),
319 INDEX(ev_run),
320 INDEX(ev_tick),
321 UNIQUE(ev_event,ev_run,ev_tick)
322 ) TYPE=InnoDB''')
323
324 # COLUMNS:
325 # 'id' is the unique description id
326 # 'name' is the name of the event that occurred
327 #
328 # INDEXES:
329 # 'id' is indexed because it is the primary key and is what you use
330 # to look up the descriptions
331 # 'name' is indexed so one can find the event based on name
332 #
333 self.query('''
334 CREATE TABLE event_names(
335 en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
336 en_name VARCHAR(255) NOT NULL,
337 PRIMARY KEY (en_id),
338 UNIQUE (en_name)
339 ) TYPE=InnoDB''')
340
341 def clean(self):
342 self.query('''
343 DELETE data
344 FROM data
345 LEFT JOIN runs ON dt_run=rn_id
346 WHERE rn_id IS NULL''')
347
348 self.query('''
349 DELETE formula_ref
350 FROM formula_ref
351 LEFT JOIN runs ON fr_run=rn_id
352 WHERE rn_id IS NULL''')
353
354 self.query('''
355 DELETE formulas
356 FROM formulas
357 LEFT JOIN formula_ref ON fm_stat=fr_stat
358 WHERE fr_stat IS NULL''')
359
360 self.query('''
361 DELETE stats
362 FROM stats
363 LEFT JOIN data ON st_id=dt_stat
364 WHERE dt_stat IS NULL''')
365
366 self.query('''
367 DELETE subdata
368 FROM subdata
369 LEFT JOIN data ON sd_stat=dt_stat
370 WHERE dt_stat IS NULL''')
371
372 self.query('''
373 DELETE bins
374 FROM bins
375 LEFT JOIN data ON bn_id=dt_bin
376 WHERE dt_bin IS NULL''')
377
378 self.query('''
379 DELETE events
380 FROM events
381 LEFT JOIN runs ON ev_run=rn_id
382 WHERE rn_id IS NULL''')
383
384 self.query('''
385 DELETE event_names
386 FROM event_names
387 LEFT JOIN events ON en_id=ev_event
388 WHERE ev_event IS NULL''')

47import MySQLdb
48
49class MyDB(object):
50 def __init__(self, options):
51 self.name = options.db
52 self.host = options.host
53 self.user = options.user
54 self.passwd = options.passwd
55 self.mydb = None
56 self.cursor = None
57
58 def admin(self):
59 self.close()
60 self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,
61 passwd=self.passwd)
62 self.cursor = self.mydb.cursor()
63
64 def connect(self):
65 self.close()
66 self.mydb = MySQLdb.connect(db=self.name, host=self.host,
67 user=self.user, passwd=self.passwd)
68 self.cursor = self.mydb.cursor()
69
70 def close(self):
71 if self.mydb is not None:
72 self.mydb.close()
73 self.cursor = None
74
75 def query(self, sql):
76 self.cursor.execute(sql)
77
78 def drop(self):
79 self.query('DROP DATABASE IF EXISTS %s' % self.name)
80
81 def create(self):
82 self.query('CREATE DATABASE %s' % self.name)
83
84 def populate(self):
85 #
86 # Each run (or simulation) gets its own entry in the runs table to
87 # group stats by where they were generated
88 #
89 # COLUMNS:
90 # 'id' is a unique identifier for each run to be used in other
91 # tables.
92 # 'name' is the user designated name for the data generated. It is
93 # configured in the simulator.
94 # 'user' identifies the user that generated the data for the given
95 # run.
96 # 'project' another name to identify runs for a specific goal
97 # 'date' is a timestamp for when the data was generated. It can be
98 # used to easily expire data that was generated in the past.
99 # 'expire' is a timestamp for when the data should be removed from
100 # the database so we don't have years worth of junk.
101 #
102 # INDEXES:
103 # 'run' is indexed so you can find out details of a run if the run
104 # was retreived from the data table.
105 # 'name' is indexed so that two all run names are forced to be unique
106 #
107 self.query('''
108 CREATE TABLE runs(
109 rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
110 rn_name VARCHAR(200) NOT NULL,
111 rn_sample VARCHAR(32) NOT NULL,
112 rn_user VARCHAR(32) NOT NULL,
113 rn_project VARCHAR(100) NOT NULL,
114 rn_date TIMESTAMP NOT NULL,
115 rn_expire TIMESTAMP NOT NULL,
116 PRIMARY KEY (rn_id),
117 UNIQUE (rn_name,rn_sample)
118 ) TYPE=InnoDB''')
119
120 #
121 # We keep the bin names separate so that the data table doesn't get
122 # huge since bin names are frequently repeated.
123 #
124 # COLUMNS:
125 # 'id' is the unique bin identifer.
126 # 'name' is the string name for the bin.
127 #
128 # INDEXES:
129 # 'bin' is indexed to get the name of a bin when data is retrieved
130 # via the data table.
131 # 'name' is indexed to get the bin id for a named bin when you want
132 # to search the data table based on a specific bin.
133 #
134 self.query('''
135 CREATE TABLE bins(
136 bn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
137 bn_name VARCHAR(255) NOT NULL,
138 PRIMARY KEY(bn_id),
139 UNIQUE (bn_name)
140 ) TYPE=InnoDB''')
141
142 #
143 # The stat table gives us all of the data for a particular stat.
144 #
145 # COLUMNS:
146 # 'stat' is a unique identifier for each stat to be used in other
147 # tables for references.
148 # 'name' is simply the simulator derived name for a given
149 # statistic.
150 # 'descr' is the description of the statistic and what it tells
151 # you.
152 # 'type' defines what the stat tells you. Types are:
153 # SCALAR: A simple scalar statistic that holds one value
154 # VECTOR: An array of statistic values. Such a something that
155 # is generated per-thread. Vectors exist to give averages,
156 # pdfs, cdfs, means, standard deviations, etc across the
157 # stat values.
158 # DIST: Is a distribution of data. When the statistic value is
159 # sampled, its value is counted in a particular bucket.
160 # Useful for keeping track of utilization of a resource.
161 # (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
162 # VECTORDIST: Can be used when the distribution needs to be
163 # factored out into a per-thread distribution of data for
164 # example. It can still be summed across threads to find
165 # the total distribution.
166 # VECTOR2D: Can be used when you have a stat that is not only
167 # per-thread, but it is per-something else. Like
168 # per-message type.
169 # FORMULA: This statistic is a formula, and its data must be
170 # looked up in the formula table, for indicating how to
171 # present its values.
172 # 'subdata' is potentially used by any of the vector types to
173 # give a specific name to all of the data elements within a
174 # stat.
175 # 'print' indicates whether this stat should be printed ever.
176 # (Unnamed stats don't usually get printed)
177 # 'prereq' only print the stat if the prereq is not zero.
178 # 'prec' number of decimal places to print
179 # 'nozero' don't print zero values
180 # 'nonan' don't print NaN values
181 # 'total' for vector type stats, print the total.
182 # 'pdf' for vector type stats, print the pdf.
183 # 'cdf' for vector type stats, print the cdf.
184 #
185 # The Following are for dist type stats:
186 # 'min' is the minimum bucket value. Anything less is an underflow.
187 # 'max' is the maximum bucket value. Anything more is an overflow.
188 # 'bktsize' is the approximate number of entries in each bucket.
189 # 'size' is the number of buckets. equal to (min/max)/bktsize.
190 #
191 # INDEXES:
192 # 'stat' is indexed so that you can find out details about a stat
193 # if the stat id was retrieved from the data table.
194 # 'name' is indexed so that you can simply look up data about a
195 # named stat.
196 #
197 self.query('''
198 CREATE TABLE stats(
199 st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
200 st_name VARCHAR(255) NOT NULL,
201 st_descr TEXT NOT NULL,
202 st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
203 "VECTOR2D", "FORMULA") NOT NULL,
204 st_print BOOL NOT NULL,
205 st_prereq SMALLINT UNSIGNED NOT NULL,
206 st_prec TINYINT NOT NULL,
207 st_nozero BOOL NOT NULL,
208 st_nonan BOOL NOT NULL,
209 st_total BOOL NOT NULL,
210 st_pdf BOOL NOT NULL,
211 st_cdf BOOL NOT NULL,
212 st_min DOUBLE NOT NULL,
213 st_max DOUBLE NOT NULL,
214 st_bktsize DOUBLE NOT NULL,
215 st_size SMALLINT UNSIGNED NOT NULL,
216 PRIMARY KEY (st_id),
217 UNIQUE (st_name)
218 ) TYPE=InnoDB''')
219
220 #
221 # This is the main table of data for stats.
222 #
223 # COLUMNS:
224 # 'stat' refers to the stat field given in the stat table.
225 #
226 # 'x' referrs to the first dimension of a multi-dimensional stat. For
227 # a vector, x will start at 0 and increase for each vector
228 # element.
229 # For a distribution:
230 # -1: sum (for calculating standard deviation)
231 # -2: sum of squares (for calculating standard deviation)
232 # -3: total number of samples taken (for calculating
233 # standard deviation)
234 # -4: minimum value
235 # -5: maximum value
236 # -6: underflow
237 # -7: overflow
238 # 'y' is used by a VECTORDIST and the VECTOR2D to describe the second
239 # dimension.
240 # 'run' is the run that the data was generated from. Details up in
241 # the run table
242 # 'tick' is a timestamp generated by the simulator.
243 # 'bin' is the name of the bin that the data was generated in, if
244 # any.
245 # 'data' is the actual stat value.
246 #
247 # INDEXES:
248 # 'stat' is indexed so that a user can find all of the data for a
249 # particular stat. It is not unique, because that specific stat
250 # can be found in many runs, bins, and samples, in addition to
251 # having entries for the mulidimensional cases.
252 # 'run' is indexed to allow a user to remove all of the data for a
253 # particular execution run. It can also be used to allow the
254 # user to print out all of the data for a given run.
255 #
256 self.query('''
257 CREATE TABLE data(
258 dt_stat SMALLINT UNSIGNED NOT NULL,
259 dt_x SMALLINT NOT NULL,
260 dt_y SMALLINT NOT NULL,
261 dt_run SMALLINT UNSIGNED NOT NULL,
262 dt_tick BIGINT UNSIGNED NOT NULL,
263 dt_bin SMALLINT UNSIGNED NOT NULL,
264 dt_data DOUBLE NOT NULL,
265 INDEX (dt_stat),
266 INDEX (dt_run),
267 UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin)
268 ) TYPE=InnoDB;''')
269
270 #
271 # Names and descriptions for multi-dimensional stats (vectors, etc.)
272 # are stored here instead of having their own entry in the statistics
273 # table. This allows all parts of a single stat to easily share a
274 # single id.
275 #
276 # COLUMNS:
277 # 'stat' is the unique stat identifier from the stat table.
278 # 'x' is the first dimension for multi-dimensional stats
279 # corresponding to the data table above.
280 # 'y' is the second dimension for multi-dimensional stats
281 # corresponding to the data table above.
282 # 'name' is the specific subname for the unique stat,x,y combination.
283 # 'descr' is the specific description for the uniqe stat,x,y
284 # combination.
285 #
286 # INDEXES:
287 # 'stat' is indexed so you can get the subdata for a specific stat.
288 #
289 self.query('''
290 CREATE TABLE subdata(
291 sd_stat SMALLINT UNSIGNED NOT NULL,
292 sd_x SMALLINT NOT NULL,
293 sd_y SMALLINT NOT NULL,
294 sd_name VARCHAR(255) NOT NULL,
295 sd_descr TEXT,
296 UNIQUE (sd_stat,sd_x,sd_y)
297 ) TYPE=InnoDB''')
298
299
300 #
301 # The formula table is maintained separately from the data table
302 # because formula data, unlike other stat data cannot be represented
303 # there.
304 #
305 # COLUMNS:
306 # 'stat' refers to the stat field generated in the stat table.
307 # 'formula' is the actual string representation of the formula
308 # itself.
309 #
310 # INDEXES:
311 # 'stat' is indexed so that you can just look up a formula.
312 #
313 self.query('''
314 CREATE TABLE formulas(
315 fm_stat SMALLINT UNSIGNED NOT NULL,
316 fm_formula BLOB NOT NULL,
317 PRIMARY KEY(fm_stat)
318 ) TYPE=InnoDB''')
319
320 #
321 # Each stat used in each formula is kept in this table. This way, if
322 # you want to print out a particular formula, you can simply find out
323 # which stats you need by looking in this table. Additionally, when
324 # you remove a stat from the stats table and data table, you remove
325 # any references to the formula in this table. When a formula is no
326 # longer referred to, you remove its entry.
327 #
328 # COLUMNS:
329 # 'stat' is the stat id from the stat table above.
330 # 'child' is the stat id of a stat that is used for this formula.
331 # There may be many children for any given 'stat' (formula)
332 #
333 # INDEXES:
334 # 'stat' is indexed so you can look up all of the children for a
335 # particular stat.
336 # 'child' is indexed so that you can remove an entry when a stat is
337 # removed.
338 #
339 self.query('''
340 CREATE TABLE formula_ref(
341 fr_stat SMALLINT UNSIGNED NOT NULL,
342 fr_run SMALLINT UNSIGNED NOT NULL,
343 UNIQUE (fr_stat,fr_run),
344 INDEX (fr_stat),
345 INDEX (fr_run)
346 ) TYPE=InnoDB''')
347
348 # COLUMNS:
349 # 'event' is the unique event id from the event_desc table
350 # 'run' is simulation run id that this event took place in
351 # 'tick' is the tick when the event happened
352 #
353 # INDEXES:
354 # 'event' is indexed so you can look up all occurences of a
355 # specific event
356 # 'run' is indexed so you can find all events in a run
357 # 'tick' is indexed because we want the unique thing anyway
358 # 'event,run,tick' is unique combination
359 self.query('''
360 CREATE TABLE events(
361 ev_event SMALLINT UNSIGNED NOT NULL,
362 ev_run SMALLINT UNSIGNED NOT NULL,
363 ev_tick BIGINT UNSIGNED NOT NULL,
364 INDEX(ev_event),
365 INDEX(ev_run),
366 INDEX(ev_tick),
367 UNIQUE(ev_event,ev_run,ev_tick)
368 ) TYPE=InnoDB''')
369
370 # COLUMNS:
371 # 'id' is the unique description id
372 # 'name' is the name of the event that occurred
373 #
374 # INDEXES:
375 # 'id' is indexed because it is the primary key and is what you use
376 # to look up the descriptions
377 # 'name' is indexed so one can find the event based on name
378 #
379 self.query('''
380 CREATE TABLE event_names(
381 en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
382 en_name VARCHAR(255) NOT NULL,
383 PRIMARY KEY (en_id),
384 UNIQUE (en_name)
385 ) TYPE=InnoDB''')
386
387 def clean(self):
388 self.query('''
389 DELETE data
390 FROM data
391 LEFT JOIN runs ON dt_run=rn_id
392 WHERE rn_id IS NULL''')
393
394 self.query('''
395 DELETE formula_ref
396 FROM formula_ref
397 LEFT JOIN runs ON fr_run=rn_id
398 WHERE rn_id IS NULL''')
399
400 self.query('''
401 DELETE formulas
402 FROM formulas
403 LEFT JOIN formula_ref ON fm_stat=fr_stat
404 WHERE fr_stat IS NULL''')
405
406 self.query('''
407 DELETE stats
408 FROM stats
409 LEFT JOIN data ON st_id=dt_stat
410 WHERE dt_stat IS NULL''')
411
412 self.query('''
413 DELETE subdata
414 FROM subdata
415 LEFT JOIN data ON sd_stat=dt_stat
416 WHERE dt_stat IS NULL''')
417
418 self.query('''
419 DELETE bins
420 FROM bins
421 LEFT JOIN data ON bn_id=dt_bin
422 WHERE dt_bin IS NULL''')
423
424 self.query('''
425 DELETE events
426 FROM events
427 LEFT JOIN runs ON ev_run=rn_id
428 WHERE rn_id IS NULL''')
429
430 self.query('''
431 DELETE event_names
432 FROM event_names
433 LEFT JOIN events ON en_id=ev_event
434 WHERE ev_event IS NULL''')