Cross Reference: /gem5/util/stats/dbinit.py

dbinit.py (2343:a2b4a6ccee56)	dbinit.py (2665:a124942bacb8)
1# Copyright (c) 2003-2004 The Regents of The University of Michigan 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer; 8# redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution; 11# neither the name of the copyright holders nor the names of its 12# contributors may be used to endorse or promote products derived from 13# this software without specific prior written permission. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	1# Copyright (c) 2003-2004 The Regents of The University of Michigan 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer; 8# redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution; 11# neither the name of the copyright holders nor the names of its 12# contributors may be used to endorse or promote products derived from 13# this software without specific prior written permission. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	26# 27# Authors: Nathan Binkert
26 27import MySQLdb 28 29class MyDB(object): 30 def __init__(self, options): 31 self.name = options.db 32 self.host = options.host 33 self.user = options.user 34 self.passwd = options.passwd 35 self.mydb = None 36 self.cursor = None 37 38 def admin(self): 39 self.close() 40 self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user, 41 passwd=self.passwd) 42 self.cursor = self.mydb.cursor() 43 44 def connect(self): 45 self.close() 46 self.mydb = MySQLdb.connect(db=self.name, host=self.host, 47 user=self.user, passwd=self.passwd) 48 self.cursor = self.mydb.cursor() 49 50 def close(self): 51 if self.mydb is not None: 52 self.mydb.close() 53 self.cursor = None 54 55 def query(self, sql): 56 self.cursor.execute(sql) 57 58 def drop(self): 59 self.query('DROP DATABASE IF EXISTS %s' % self.name) 60 61 def create(self): 62 self.query('CREATE DATABASE %s' % self.name) 63 64 def populate(self): 65 # 66 # Each run (or simulation) gets its own entry in the runs table to 67 # group stats by where they were generated 68 # 69 # COLUMNS: 70 # 'id' is a unique identifier for each run to be used in other 71 # tables. 72 # 'name' is the user designated name for the data generated. It is 73 # configured in the simulator. 74 # 'user' identifies the user that generated the data for the given 75 # run. 76 # 'project' another name to identify runs for a specific goal 77 # 'date' is a timestamp for when the data was generated. It can be 78 # used to easily expire data that was generated in the past. 79 # 'expire' is a timestamp for when the data should be removed from 80 # the database so we don't have years worth of junk. 81 # 82 # INDEXES: 83 # 'run' is indexed so you can find out details of a run if the run 84 # was retreived from the data table. 85 # 'name' is indexed so that two all run names are forced to be unique 86 # 87 self.query(''' 88 CREATE TABLE runs( 89 rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 90 rn_name VARCHAR(200) NOT NULL, 91 rn_sample VARCHAR(32) NOT NULL, 92 rn_user VARCHAR(32) NOT NULL, 93 rn_project VARCHAR(100) NOT NULL, 94 rn_date TIMESTAMP NOT NULL, 95 rn_expire TIMESTAMP NOT NULL, 96 PRIMARY KEY (rn_id), 97 UNIQUE (rn_name,rn_sample) 98 ) TYPE=InnoDB''') 99 100 #	28 29import MySQLdb 30 31class MyDB(object): 32 def __init__(self, options): 33 self.name = options.db 34 self.host = options.host 35 self.user = options.user 36 self.passwd = options.passwd 37 self.mydb = None 38 self.cursor = None 39 40 def admin(self): 41 self.close() 42 self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user, 43 passwd=self.passwd) 44 self.cursor = self.mydb.cursor() 45 46 def connect(self): 47 self.close() 48 self.mydb = MySQLdb.connect(db=self.name, host=self.host, 49 user=self.user, passwd=self.passwd) 50 self.cursor = self.mydb.cursor() 51 52 def close(self): 53 if self.mydb is not None: 54 self.mydb.close() 55 self.cursor = None 56 57 def query(self, sql): 58 self.cursor.execute(sql) 59 60 def drop(self): 61 self.query('DROP DATABASE IF EXISTS %s' % self.name) 62 63 def create(self): 64 self.query('CREATE DATABASE %s' % self.name) 65 66 def populate(self): 67 # 68 # Each run (or simulation) gets its own entry in the runs table to 69 # group stats by where they were generated 70 # 71 # COLUMNS: 72 # 'id' is a unique identifier for each run to be used in other 73 # tables. 74 # 'name' is the user designated name for the data generated. It is 75 # configured in the simulator. 76 # 'user' identifies the user that generated the data for the given 77 # run. 78 # 'project' another name to identify runs for a specific goal 79 # 'date' is a timestamp for when the data was generated. It can be 80 # used to easily expire data that was generated in the past. 81 # 'expire' is a timestamp for when the data should be removed from 82 # the database so we don't have years worth of junk. 83 # 84 # INDEXES: 85 # 'run' is indexed so you can find out details of a run if the run 86 # was retreived from the data table. 87 # 'name' is indexed so that two all run names are forced to be unique 88 # 89 self.query(''' 90 CREATE TABLE runs( 91 rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 92 rn_name VARCHAR(200) NOT NULL, 93 rn_sample VARCHAR(32) NOT NULL, 94 rn_user VARCHAR(32) NOT NULL, 95 rn_project VARCHAR(100) NOT NULL, 96 rn_date TIMESTAMP NOT NULL, 97 rn_expire TIMESTAMP NOT NULL, 98 PRIMARY KEY (rn_id), 99 UNIQUE (rn_name,rn_sample) 100 ) TYPE=InnoDB''') 101 102 #
	103 # We keep the bin names separate so that the data table doesn't get 104 # huge since bin names are frequently repeated. 105 # 106 # COLUMNS: 107 # 'id' is the unique bin identifer. 108 # 'name' is the string name for the bin. 109 # 110 # INDEXES: 111 # 'bin' is indexed to get the name of a bin when data is retrieved 112 # via the data table. 113 # 'name' is indexed to get the bin id for a named bin when you want 114 # to search the data table based on a specific bin. 115 # 116 self.query(''' 117 CREATE TABLE bins( 118 bn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 119 bn_name VARCHAR(255) NOT NULL, 120 PRIMARY KEY(bn_id), 121 UNIQUE (bn_name) 122 ) TYPE=InnoDB''') 123 124 #
101 # The stat table gives us all of the data for a particular stat. 102 # 103 # COLUMNS: 104 # 'stat' is a unique identifier for each stat to be used in other 105 # tables for references. 106 # 'name' is simply the simulator derived name for a given 107 # statistic. 108 # 'descr' is the description of the statistic and what it tells 109 # you. 110 # 'type' defines what the stat tells you. Types are: 111 # SCALAR: A simple scalar statistic that holds one value 112 # VECTOR: An array of statistic values. Such a something that 113 # is generated per-thread. Vectors exist to give averages, 114 # pdfs, cdfs, means, standard deviations, etc across the 115 # stat values. 116 # DIST: Is a distribution of data. When the statistic value is 117 # sampled, its value is counted in a particular bucket. 118 # Useful for keeping track of utilization of a resource. 119 # (e.g. fraction of time it is 25% used vs. 50% vs. 100%) 120 # VECTORDIST: Can be used when the distribution needs to be 121 # factored out into a per-thread distribution of data for 122 # example. It can still be summed across threads to find 123 # the total distribution. 124 # VECTOR2D: Can be used when you have a stat that is not only 125 # per-thread, but it is per-something else. Like 126 # per-message type. 127 # FORMULA: This statistic is a formula, and its data must be 128 # looked up in the formula table, for indicating how to 129 # present its values. 130 # 'subdata' is potentially used by any of the vector types to 131 # give a specific name to all of the data elements within a 132 # stat. 133 # 'print' indicates whether this stat should be printed ever. 134 # (Unnamed stats don't usually get printed) 135 # 'prereq' only print the stat if the prereq is not zero. 136 # 'prec' number of decimal places to print 137 # 'nozero' don't print zero values 138 # 'nonan' don't print NaN values 139 # 'total' for vector type stats, print the total. 140 # 'pdf' for vector type stats, print the pdf. 141 # 'cdf' for vector type stats, print the cdf. 142 # 143 # The Following are for dist type stats: 144 # 'min' is the minimum bucket value. Anything less is an underflow. 145 # 'max' is the maximum bucket value. Anything more is an overflow. 146 # 'bktsize' is the approximate number of entries in each bucket. 147 # 'size' is the number of buckets. equal to (min/max)/bktsize. 148 # 149 # INDEXES: 150 # 'stat' is indexed so that you can find out details about a stat 151 # if the stat id was retrieved from the data table. 152 # 'name' is indexed so that you can simply look up data about a 153 # named stat. 154 # 155 self.query(''' 156 CREATE TABLE stats( 157 st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 158 st_name VARCHAR(255) NOT NULL, 159 st_descr TEXT NOT NULL, 160 st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST", 161 "VECTOR2D", "FORMULA") NOT NULL, 162 st_print BOOL NOT NULL, 163 st_prereq SMALLINT UNSIGNED NOT NULL, 164 st_prec TINYINT NOT NULL, 165 st_nozero BOOL NOT NULL, 166 st_nonan BOOL NOT NULL, 167 st_total BOOL NOT NULL, 168 st_pdf BOOL NOT NULL, 169 st_cdf BOOL NOT NULL, 170 st_min DOUBLE NOT NULL, 171 st_max DOUBLE NOT NULL, 172 st_bktsize DOUBLE NOT NULL, 173 st_size SMALLINT UNSIGNED NOT NULL, 174 PRIMARY KEY (st_id), 175 UNIQUE (st_name) 176 ) TYPE=InnoDB''') 177 178 # 179 # This is the main table of data for stats. 180 # 181 # COLUMNS: 182 # 'stat' refers to the stat field given in the stat table. 183 # 184 # 'x' referrs to the first dimension of a multi-dimensional stat. For 185 # a vector, x will start at 0 and increase for each vector 186 # element. 187 # For a distribution: 188 # -1: sum (for calculating standard deviation) 189 # -2: sum of squares (for calculating standard deviation) 190 # -3: total number of samples taken (for calculating 191 # standard deviation) 192 # -4: minimum value 193 # -5: maximum value 194 # -6: underflow 195 # -7: overflow 196 # 'y' is used by a VECTORDIST and the VECTOR2D to describe the second 197 # dimension. 198 # 'run' is the run that the data was generated from. Details up in 199 # the run table 200 # 'tick' is a timestamp generated by the simulator.	125 # The stat table gives us all of the data for a particular stat. 126 # 127 # COLUMNS: 128 # 'stat' is a unique identifier for each stat to be used in other 129 # tables for references. 130 # 'name' is simply the simulator derived name for a given 131 # statistic. 132 # 'descr' is the description of the statistic and what it tells 133 # you. 134 # 'type' defines what the stat tells you. Types are: 135 # SCALAR: A simple scalar statistic that holds one value 136 # VECTOR: An array of statistic values. Such a something that 137 # is generated per-thread. Vectors exist to give averages, 138 # pdfs, cdfs, means, standard deviations, etc across the 139 # stat values. 140 # DIST: Is a distribution of data. When the statistic value is 141 # sampled, its value is counted in a particular bucket. 142 # Useful for keeping track of utilization of a resource. 143 # (e.g. fraction of time it is 25% used vs. 50% vs. 100%) 144 # VECTORDIST: Can be used when the distribution needs to be 145 # factored out into a per-thread distribution of data for 146 # example. It can still be summed across threads to find 147 # the total distribution. 148 # VECTOR2D: Can be used when you have a stat that is not only 149 # per-thread, but it is per-something else. Like 150 # per-message type. 151 # FORMULA: This statistic is a formula, and its data must be 152 # looked up in the formula table, for indicating how to 153 # present its values. 154 # 'subdata' is potentially used by any of the vector types to 155 # give a specific name to all of the data elements within a 156 # stat. 157 # 'print' indicates whether this stat should be printed ever. 158 # (Unnamed stats don't usually get printed) 159 # 'prereq' only print the stat if the prereq is not zero. 160 # 'prec' number of decimal places to print 161 # 'nozero' don't print zero values 162 # 'nonan' don't print NaN values 163 # 'total' for vector type stats, print the total. 164 # 'pdf' for vector type stats, print the pdf. 165 # 'cdf' for vector type stats, print the cdf. 166 # 167 # The Following are for dist type stats: 168 # 'min' is the minimum bucket value. Anything less is an underflow. 169 # 'max' is the maximum bucket value. Anything more is an overflow. 170 # 'bktsize' is the approximate number of entries in each bucket. 171 # 'size' is the number of buckets. equal to (min/max)/bktsize. 172 # 173 # INDEXES: 174 # 'stat' is indexed so that you can find out details about a stat 175 # if the stat id was retrieved from the data table. 176 # 'name' is indexed so that you can simply look up data about a 177 # named stat. 178 # 179 self.query(''' 180 CREATE TABLE stats( 181 st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 182 st_name VARCHAR(255) NOT NULL, 183 st_descr TEXT NOT NULL, 184 st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST", 185 "VECTOR2D", "FORMULA") NOT NULL, 186 st_print BOOL NOT NULL, 187 st_prereq SMALLINT UNSIGNED NOT NULL, 188 st_prec TINYINT NOT NULL, 189 st_nozero BOOL NOT NULL, 190 st_nonan BOOL NOT NULL, 191 st_total BOOL NOT NULL, 192 st_pdf BOOL NOT NULL, 193 st_cdf BOOL NOT NULL, 194 st_min DOUBLE NOT NULL, 195 st_max DOUBLE NOT NULL, 196 st_bktsize DOUBLE NOT NULL, 197 st_size SMALLINT UNSIGNED NOT NULL, 198 PRIMARY KEY (st_id), 199 UNIQUE (st_name) 200 ) TYPE=InnoDB''') 201 202 # 203 # This is the main table of data for stats. 204 # 205 # COLUMNS: 206 # 'stat' refers to the stat field given in the stat table. 207 # 208 # 'x' referrs to the first dimension of a multi-dimensional stat. For 209 # a vector, x will start at 0 and increase for each vector 210 # element. 211 # For a distribution: 212 # -1: sum (for calculating standard deviation) 213 # -2: sum of squares (for calculating standard deviation) 214 # -3: total number of samples taken (for calculating 215 # standard deviation) 216 # -4: minimum value 217 # -5: maximum value 218 # -6: underflow 219 # -7: overflow 220 # 'y' is used by a VECTORDIST and the VECTOR2D to describe the second 221 # dimension. 222 # 'run' is the run that the data was generated from. Details up in 223 # the run table 224 # 'tick' is a timestamp generated by the simulator.
	225 # 'bin' is the name of the bin that the data was generated in, if 226 # any.
201 # 'data' is the actual stat value. 202 # 203 # INDEXES: 204 # 'stat' is indexed so that a user can find all of the data for a 205 # particular stat. It is not unique, because that specific stat	227 # 'data' is the actual stat value. 228 # 229 # INDEXES: 230 # 'stat' is indexed so that a user can find all of the data for a 231 # particular stat. It is not unique, because that specific stat
206 # can be found in many runs and samples, in addition to	232 # can be found in many runs, bins, and samples, in addition to
207 # having entries for the mulidimensional cases. 208 # 'run' is indexed to allow a user to remove all of the data for a 209 # particular execution run. It can also be used to allow the 210 # user to print out all of the data for a given run. 211 # 212 self.query(''' 213 CREATE TABLE data( 214 dt_stat SMALLINT UNSIGNED NOT NULL, 215 dt_x SMALLINT NOT NULL, 216 dt_y SMALLINT NOT NULL, 217 dt_run SMALLINT UNSIGNED NOT NULL, 218 dt_tick BIGINT UNSIGNED NOT NULL,	233 # having entries for the mulidimensional cases. 234 # 'run' is indexed to allow a user to remove all of the data for a 235 # particular execution run. It can also be used to allow the 236 # user to print out all of the data for a given run. 237 # 238 self.query(''' 239 CREATE TABLE data( 240 dt_stat SMALLINT UNSIGNED NOT NULL, 241 dt_x SMALLINT NOT NULL, 242 dt_y SMALLINT NOT NULL, 243 dt_run SMALLINT UNSIGNED NOT NULL, 244 dt_tick BIGINT UNSIGNED NOT NULL,
	245 dt_bin SMALLINT UNSIGNED NOT NULL,
219 dt_data DOUBLE NOT NULL, 220 INDEX (dt_stat), 221 INDEX (dt_run),	246 dt_data DOUBLE NOT NULL, 247 INDEX (dt_stat), 248 INDEX (dt_run),
222 UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick)	249 UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin)
223 ) TYPE=InnoDB;''') 224 225 # 226 # Names and descriptions for multi-dimensional stats (vectors, etc.) 227 # are stored here instead of having their own entry in the statistics 228 # table. This allows all parts of a single stat to easily share a 229 # single id. 230 # 231 # COLUMNS: 232 # 'stat' is the unique stat identifier from the stat table. 233 # 'x' is the first dimension for multi-dimensional stats 234 # corresponding to the data table above. 235 # 'y' is the second dimension for multi-dimensional stats 236 # corresponding to the data table above. 237 # 'name' is the specific subname for the unique stat,x,y combination. 238 # 'descr' is the specific description for the uniqe stat,x,y 239 # combination. 240 # 241 # INDEXES: 242 # 'stat' is indexed so you can get the subdata for a specific stat. 243 # 244 self.query(''' 245 CREATE TABLE subdata( 246 sd_stat SMALLINT UNSIGNED NOT NULL, 247 sd_x SMALLINT NOT NULL, 248 sd_y SMALLINT NOT NULL, 249 sd_name VARCHAR(255) NOT NULL, 250 sd_descr TEXT, 251 UNIQUE (sd_stat,sd_x,sd_y) 252 ) TYPE=InnoDB''') 253 254 255 # 256 # The formula table is maintained separately from the data table 257 # because formula data, unlike other stat data cannot be represented 258 # there. 259 # 260 # COLUMNS: 261 # 'stat' refers to the stat field generated in the stat table. 262 # 'formula' is the actual string representation of the formula 263 # itself. 264 # 265 # INDEXES: 266 # 'stat' is indexed so that you can just look up a formula. 267 # 268 self.query(''' 269 CREATE TABLE formulas( 270 fm_stat SMALLINT UNSIGNED NOT NULL, 271 fm_formula BLOB NOT NULL, 272 PRIMARY KEY(fm_stat) 273 ) TYPE=InnoDB''') 274 275 # 276 # Each stat used in each formula is kept in this table. This way, if 277 # you want to print out a particular formula, you can simply find out 278 # which stats you need by looking in this table. Additionally, when 279 # you remove a stat from the stats table and data table, you remove 280 # any references to the formula in this table. When a formula is no 281 # longer referred to, you remove its entry. 282 # 283 # COLUMNS: 284 # 'stat' is the stat id from the stat table above. 285 # 'child' is the stat id of a stat that is used for this formula. 286 # There may be many children for any given 'stat' (formula) 287 # 288 # INDEXES: 289 # 'stat' is indexed so you can look up all of the children for a 290 # particular stat. 291 # 'child' is indexed so that you can remove an entry when a stat is 292 # removed. 293 # 294 self.query(''' 295 CREATE TABLE formula_ref( 296 fr_stat SMALLINT UNSIGNED NOT NULL, 297 fr_run SMALLINT UNSIGNED NOT NULL, 298 UNIQUE (fr_stat,fr_run), 299 INDEX (fr_stat), 300 INDEX (fr_run) 301 ) TYPE=InnoDB''') 302 303 # COLUMNS: 304 # 'event' is the unique event id from the event_desc table 305 # 'run' is simulation run id that this event took place in 306 # 'tick' is the tick when the event happened 307 # 308 # INDEXES: 309 # 'event' is indexed so you can look up all occurences of a 310 # specific event 311 # 'run' is indexed so you can find all events in a run 312 # 'tick' is indexed because we want the unique thing anyway 313 # 'event,run,tick' is unique combination 314 self.query(''' 315 CREATE TABLE events( 316 ev_event SMALLINT UNSIGNED NOT NULL, 317 ev_run SMALLINT UNSIGNED NOT NULL, 318 ev_tick BIGINT UNSIGNED NOT NULL, 319 INDEX(ev_event), 320 INDEX(ev_run), 321 INDEX(ev_tick), 322 UNIQUE(ev_event,ev_run,ev_tick) 323 ) TYPE=InnoDB''') 324 325 # COLUMNS: 326 # 'id' is the unique description id 327 # 'name' is the name of the event that occurred 328 # 329 # INDEXES: 330 # 'id' is indexed because it is the primary key and is what you use 331 # to look up the descriptions 332 # 'name' is indexed so one can find the event based on name 333 # 334 self.query(''' 335 CREATE TABLE event_names( 336 en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 337 en_name VARCHAR(255) NOT NULL, 338 PRIMARY KEY (en_id), 339 UNIQUE (en_name) 340 ) TYPE=InnoDB''') 341 342 def clean(self): 343 self.query(''' 344 DELETE data 345 FROM data 346 LEFT JOIN runs ON dt_run=rn_id 347 WHERE rn_id IS NULL''') 348 349 self.query(''' 350 DELETE formula_ref 351 FROM formula_ref 352 LEFT JOIN runs ON fr_run=rn_id 353 WHERE rn_id IS NULL''') 354 355 self.query(''' 356 DELETE formulas 357 FROM formulas 358 LEFT JOIN formula_ref ON fm_stat=fr_stat 359 WHERE fr_stat IS NULL''') 360 361 self.query(''' 362 DELETE stats 363 FROM stats 364 LEFT JOIN data ON st_id=dt_stat 365 WHERE dt_stat IS NULL''') 366 367 self.query(''' 368 DELETE subdata 369 FROM subdata 370 LEFT JOIN data ON sd_stat=dt_stat 371 WHERE dt_stat IS NULL''') 372 373 self.query('''	250 ) TYPE=InnoDB;''') 251 252 # 253 # Names and descriptions for multi-dimensional stats (vectors, etc.) 254 # are stored here instead of having their own entry in the statistics 255 # table. This allows all parts of a single stat to easily share a 256 # single id. 257 # 258 # COLUMNS: 259 # 'stat' is the unique stat identifier from the stat table. 260 # 'x' is the first dimension for multi-dimensional stats 261 # corresponding to the data table above. 262 # 'y' is the second dimension for multi-dimensional stats 263 # corresponding to the data table above. 264 # 'name' is the specific subname for the unique stat,x,y combination. 265 # 'descr' is the specific description for the uniqe stat,x,y 266 # combination. 267 # 268 # INDEXES: 269 # 'stat' is indexed so you can get the subdata for a specific stat. 270 # 271 self.query(''' 272 CREATE TABLE subdata( 273 sd_stat SMALLINT UNSIGNED NOT NULL, 274 sd_x SMALLINT NOT NULL, 275 sd_y SMALLINT NOT NULL, 276 sd_name VARCHAR(255) NOT NULL, 277 sd_descr TEXT, 278 UNIQUE (sd_stat,sd_x,sd_y) 279 ) TYPE=InnoDB''') 280 281 282 # 283 # The formula table is maintained separately from the data table 284 # because formula data, unlike other stat data cannot be represented 285 # there. 286 # 287 # COLUMNS: 288 # 'stat' refers to the stat field generated in the stat table. 289 # 'formula' is the actual string representation of the formula 290 # itself. 291 # 292 # INDEXES: 293 # 'stat' is indexed so that you can just look up a formula. 294 # 295 self.query(''' 296 CREATE TABLE formulas( 297 fm_stat SMALLINT UNSIGNED NOT NULL, 298 fm_formula BLOB NOT NULL, 299 PRIMARY KEY(fm_stat) 300 ) TYPE=InnoDB''') 301 302 # 303 # Each stat used in each formula is kept in this table. This way, if 304 # you want to print out a particular formula, you can simply find out 305 # which stats you need by looking in this table. Additionally, when 306 # you remove a stat from the stats table and data table, you remove 307 # any references to the formula in this table. When a formula is no 308 # longer referred to, you remove its entry. 309 # 310 # COLUMNS: 311 # 'stat' is the stat id from the stat table above. 312 # 'child' is the stat id of a stat that is used for this formula. 313 # There may be many children for any given 'stat' (formula) 314 # 315 # INDEXES: 316 # 'stat' is indexed so you can look up all of the children for a 317 # particular stat. 318 # 'child' is indexed so that you can remove an entry when a stat is 319 # removed. 320 # 321 self.query(''' 322 CREATE TABLE formula_ref( 323 fr_stat SMALLINT UNSIGNED NOT NULL, 324 fr_run SMALLINT UNSIGNED NOT NULL, 325 UNIQUE (fr_stat,fr_run), 326 INDEX (fr_stat), 327 INDEX (fr_run) 328 ) TYPE=InnoDB''') 329 330 # COLUMNS: 331 # 'event' is the unique event id from the event_desc table 332 # 'run' is simulation run id that this event took place in 333 # 'tick' is the tick when the event happened 334 # 335 # INDEXES: 336 # 'event' is indexed so you can look up all occurences of a 337 # specific event 338 # 'run' is indexed so you can find all events in a run 339 # 'tick' is indexed because we want the unique thing anyway 340 # 'event,run,tick' is unique combination 341 self.query(''' 342 CREATE TABLE events( 343 ev_event SMALLINT UNSIGNED NOT NULL, 344 ev_run SMALLINT UNSIGNED NOT NULL, 345 ev_tick BIGINT UNSIGNED NOT NULL, 346 INDEX(ev_event), 347 INDEX(ev_run), 348 INDEX(ev_tick), 349 UNIQUE(ev_event,ev_run,ev_tick) 350 ) TYPE=InnoDB''') 351 352 # COLUMNS: 353 # 'id' is the unique description id 354 # 'name' is the name of the event that occurred 355 # 356 # INDEXES: 357 # 'id' is indexed because it is the primary key and is what you use 358 # to look up the descriptions 359 # 'name' is indexed so one can find the event based on name 360 # 361 self.query(''' 362 CREATE TABLE event_names( 363 en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 364 en_name VARCHAR(255) NOT NULL, 365 PRIMARY KEY (en_id), 366 UNIQUE (en_name) 367 ) TYPE=InnoDB''') 368 369 def clean(self): 370 self.query(''' 371 DELETE data 372 FROM data 373 LEFT JOIN runs ON dt_run=rn_id 374 WHERE rn_id IS NULL''') 375 376 self.query(''' 377 DELETE formula_ref 378 FROM formula_ref 379 LEFT JOIN runs ON fr_run=rn_id 380 WHERE rn_id IS NULL''') 381 382 self.query(''' 383 DELETE formulas 384 FROM formulas 385 LEFT JOIN formula_ref ON fm_stat=fr_stat 386 WHERE fr_stat IS NULL''') 387 388 self.query(''' 389 DELETE stats 390 FROM stats 391 LEFT JOIN data ON st_id=dt_stat 392 WHERE dt_stat IS NULL''') 393 394 self.query(''' 395 DELETE subdata 396 FROM subdata 397 LEFT JOIN data ON sd_stat=dt_stat 398 WHERE dt_stat IS NULL''') 399 400 self.query('''
	401 DELETE bins 402 FROM bins 403 LEFT JOIN data ON bn_id=dt_bin 404 WHERE dt_bin IS NULL''') 405 406 self.query('''
374 DELETE events 375 FROM events 376 LEFT JOIN runs ON ev_run=rn_id 377 WHERE rn_id IS NULL''') 378 379 self.query(''' 380 DELETE event_names 381 FROM event_names 382 LEFT JOIN events ON en_id=ev_event 383 WHERE ev_event IS NULL''')	407 DELETE events 408 FROM events 409 LEFT JOIN runs ON ev_run=rn_id 410 WHERE rn_id IS NULL''') 411 412 self.query(''' 413 DELETE event_names 414 FROM event_names 415 LEFT JOIN events ON en_id=ev_event 416 WHERE ev_event IS NULL''')

1# Copyright (c) 2003-2004 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

26#
27# Authors: Nathan Binkert

26
27import MySQLdb
28
29class MyDB(object):
30 def __init__(self, options):
31 self.name = options.db
32 self.host = options.host
33 self.user = options.user
34 self.passwd = options.passwd
35 self.mydb = None
36 self.cursor = None
37
38 def admin(self):
39 self.close()
40 self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,
41 passwd=self.passwd)
42 self.cursor = self.mydb.cursor()
43
44 def connect(self):
45 self.close()
46 self.mydb = MySQLdb.connect(db=self.name, host=self.host,
47 user=self.user, passwd=self.passwd)
48 self.cursor = self.mydb.cursor()
49
50 def close(self):
51 if self.mydb is not None:
52 self.mydb.close()
53 self.cursor = None
54
55 def query(self, sql):
56 self.cursor.execute(sql)
57
58 def drop(self):
59 self.query('DROP DATABASE IF EXISTS %s' % self.name)
60
61 def create(self):
62 self.query('CREATE DATABASE %s' % self.name)
63
64 def populate(self):
65 #
66 # Each run (or simulation) gets its own entry in the runs table to
67 # group stats by where they were generated
68 #
69 # COLUMNS:
70 # 'id' is a unique identifier for each run to be used in other
71 # tables.
72 # 'name' is the user designated name for the data generated. It is
73 # configured in the simulator.
74 # 'user' identifies the user that generated the data for the given
75 # run.
76 # 'project' another name to identify runs for a specific goal
77 # 'date' is a timestamp for when the data was generated. It can be
78 # used to easily expire data that was generated in the past.
79 # 'expire' is a timestamp for when the data should be removed from
80 # the database so we don't have years worth of junk.
81 #
82 # INDEXES:
83 # 'run' is indexed so you can find out details of a run if the run
84 # was retreived from the data table.
85 # 'name' is indexed so that two all run names are forced to be unique
86 #
87 self.query('''
88 CREATE TABLE runs(
89 rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
90 rn_name VARCHAR(200) NOT NULL,
91 rn_sample VARCHAR(32) NOT NULL,
92 rn_user VARCHAR(32) NOT NULL,
93 rn_project VARCHAR(100) NOT NULL,
94 rn_date TIMESTAMP NOT NULL,
95 rn_expire TIMESTAMP NOT NULL,
96 PRIMARY KEY (rn_id),
97 UNIQUE (rn_name,rn_sample)
98 ) TYPE=InnoDB''')
99
100 #

28
29import MySQLdb
30
31class MyDB(object):
32 def __init__(self, options):
33 self.name = options.db
34 self.host = options.host
35 self.user = options.user
36 self.passwd = options.passwd
37 self.mydb = None
38 self.cursor = None
39
40 def admin(self):
41 self.close()
42 self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,
43 passwd=self.passwd)
44 self.cursor = self.mydb.cursor()
45
46 def connect(self):
47 self.close()
48 self.mydb = MySQLdb.connect(db=self.name, host=self.host,
49 user=self.user, passwd=self.passwd)
50 self.cursor = self.mydb.cursor()
51
52 def close(self):
53 if self.mydb is not None:
54 self.mydb.close()
55 self.cursor = None
56
57 def query(self, sql):
58 self.cursor.execute(sql)
59
60 def drop(self):
61 self.query('DROP DATABASE IF EXISTS %s' % self.name)
62
63 def create(self):
64 self.query('CREATE DATABASE %s' % self.name)
65
66 def populate(self):
67 #
68 # Each run (or simulation) gets its own entry in the runs table to
69 # group stats by where they were generated
70 #
71 # COLUMNS:
72 # 'id' is a unique identifier for each run to be used in other
73 # tables.
74 # 'name' is the user designated name for the data generated. It is
75 # configured in the simulator.
76 # 'user' identifies the user that generated the data for the given
77 # run.
78 # 'project' another name to identify runs for a specific goal
79 # 'date' is a timestamp for when the data was generated. It can be
80 # used to easily expire data that was generated in the past.
81 # 'expire' is a timestamp for when the data should be removed from
82 # the database so we don't have years worth of junk.
83 #
84 # INDEXES:
85 # 'run' is indexed so you can find out details of a run if the run
86 # was retreived from the data table.
87 # 'name' is indexed so that two all run names are forced to be unique
88 #
89 self.query('''
90 CREATE TABLE runs(
91 rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
92 rn_name VARCHAR(200) NOT NULL,
93 rn_sample VARCHAR(32) NOT NULL,
94 rn_user VARCHAR(32) NOT NULL,
95 rn_project VARCHAR(100) NOT NULL,
96 rn_date TIMESTAMP NOT NULL,
97 rn_expire TIMESTAMP NOT NULL,
98 PRIMARY KEY (rn_id),
99 UNIQUE (rn_name,rn_sample)
100 ) TYPE=InnoDB''')
101
102 #

103 # We keep the bin names separate so that the data table doesn't get
104 # huge since bin names are frequently repeated.
105 #
106 # COLUMNS:
107 # 'id' is the unique bin identifer.
108 # 'name' is the string name for the bin.
109 #
110 # INDEXES:
111 # 'bin' is indexed to get the name of a bin when data is retrieved
112 # via the data table.
113 # 'name' is indexed to get the bin id for a named bin when you want
114 # to search the data table based on a specific bin.
115 #
116 self.query('''
117 CREATE TABLE bins(
118 bn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
119 bn_name VARCHAR(255) NOT NULL,
120 PRIMARY KEY(bn_id),
121 UNIQUE (bn_name)
122 ) TYPE=InnoDB''')
123
124 #

101 # The stat table gives us all of the data for a particular stat.
102 #
103 # COLUMNS:
104 # 'stat' is a unique identifier for each stat to be used in other
105 # tables for references.
106 # 'name' is simply the simulator derived name for a given
107 # statistic.
108 # 'descr' is the description of the statistic and what it tells
109 # you.
110 # 'type' defines what the stat tells you. Types are:
111 # SCALAR: A simple scalar statistic that holds one value
112 # VECTOR: An array of statistic values. Such a something that
113 # is generated per-thread. Vectors exist to give averages,
114 # pdfs, cdfs, means, standard deviations, etc across the
115 # stat values.
116 # DIST: Is a distribution of data. When the statistic value is
117 # sampled, its value is counted in a particular bucket.
118 # Useful for keeping track of utilization of a resource.
119 # (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
120 # VECTORDIST: Can be used when the distribution needs to be
121 # factored out into a per-thread distribution of data for
122 # example. It can still be summed across threads to find
123 # the total distribution.
124 # VECTOR2D: Can be used when you have a stat that is not only
125 # per-thread, but it is per-something else. Like
126 # per-message type.
127 # FORMULA: This statistic is a formula, and its data must be
128 # looked up in the formula table, for indicating how to
129 # present its values.
130 # 'subdata' is potentially used by any of the vector types to
131 # give a specific name to all of the data elements within a
132 # stat.
133 # 'print' indicates whether this stat should be printed ever.
134 # (Unnamed stats don't usually get printed)
135 # 'prereq' only print the stat if the prereq is not zero.
136 # 'prec' number of decimal places to print
137 # 'nozero' don't print zero values
138 # 'nonan' don't print NaN values
139 # 'total' for vector type stats, print the total.
140 # 'pdf' for vector type stats, print the pdf.
141 # 'cdf' for vector type stats, print the cdf.
142 #
143 # The Following are for dist type stats:
144 # 'min' is the minimum bucket value. Anything less is an underflow.
145 # 'max' is the maximum bucket value. Anything more is an overflow.
146 # 'bktsize' is the approximate number of entries in each bucket.
147 # 'size' is the number of buckets. equal to (min/max)/bktsize.
148 #
149 # INDEXES:
150 # 'stat' is indexed so that you can find out details about a stat
151 # if the stat id was retrieved from the data table.
152 # 'name' is indexed so that you can simply look up data about a
153 # named stat.
154 #
155 self.query('''
156 CREATE TABLE stats(
157 st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
158 st_name VARCHAR(255) NOT NULL,
159 st_descr TEXT NOT NULL,
160 st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
161 "VECTOR2D", "FORMULA") NOT NULL,
162 st_print BOOL NOT NULL,
163 st_prereq SMALLINT UNSIGNED NOT NULL,
164 st_prec TINYINT NOT NULL,
165 st_nozero BOOL NOT NULL,
166 st_nonan BOOL NOT NULL,
167 st_total BOOL NOT NULL,
168 st_pdf BOOL NOT NULL,
169 st_cdf BOOL NOT NULL,
170 st_min DOUBLE NOT NULL,
171 st_max DOUBLE NOT NULL,
172 st_bktsize DOUBLE NOT NULL,
173 st_size SMALLINT UNSIGNED NOT NULL,
174 PRIMARY KEY (st_id),
175 UNIQUE (st_name)
176 ) TYPE=InnoDB''')
177
178 #
179 # This is the main table of data for stats.
180 #
181 # COLUMNS:
182 # 'stat' refers to the stat field given in the stat table.
183 #
184 # 'x' referrs to the first dimension of a multi-dimensional stat. For
185 # a vector, x will start at 0 and increase for each vector
186 # element.
187 # For a distribution:
188 # -1: sum (for calculating standard deviation)
189 # -2: sum of squares (for calculating standard deviation)
190 # -3: total number of samples taken (for calculating
191 # standard deviation)
192 # -4: minimum value
193 # -5: maximum value
194 # -6: underflow
195 # -7: overflow
196 # 'y' is used by a VECTORDIST and the VECTOR2D to describe the second
197 # dimension.
198 # 'run' is the run that the data was generated from. Details up in
199 # the run table
200 # 'tick' is a timestamp generated by the simulator.

125 # The stat table gives us all of the data for a particular stat.
126 #
127 # COLUMNS:
128 # 'stat' is a unique identifier for each stat to be used in other
129 # tables for references.
130 # 'name' is simply the simulator derived name for a given
131 # statistic.
132 # 'descr' is the description of the statistic and what it tells
133 # you.
134 # 'type' defines what the stat tells you. Types are:
135 # SCALAR: A simple scalar statistic that holds one value
136 # VECTOR: An array of statistic values. Such a something that
137 # is generated per-thread. Vectors exist to give averages,
138 # pdfs, cdfs, means, standard deviations, etc across the
139 # stat values.
140 # DIST: Is a distribution of data. When the statistic value is
141 # sampled, its value is counted in a particular bucket.
142 # Useful for keeping track of utilization of a resource.
143 # (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
144 # VECTORDIST: Can be used when the distribution needs to be
145 # factored out into a per-thread distribution of data for
146 # example. It can still be summed across threads to find
147 # the total distribution.
148 # VECTOR2D: Can be used when you have a stat that is not only
149 # per-thread, but it is per-something else. Like
150 # per-message type.
151 # FORMULA: This statistic is a formula, and its data must be
152 # looked up in the formula table, for indicating how to
153 # present its values.
154 # 'subdata' is potentially used by any of the vector types to
155 # give a specific name to all of the data elements within a
156 # stat.
157 # 'print' indicates whether this stat should be printed ever.
158 # (Unnamed stats don't usually get printed)
159 # 'prereq' only print the stat if the prereq is not zero.
160 # 'prec' number of decimal places to print
161 # 'nozero' don't print zero values
162 # 'nonan' don't print NaN values
163 # 'total' for vector type stats, print the total.
164 # 'pdf' for vector type stats, print the pdf.
165 # 'cdf' for vector type stats, print the cdf.
166 #
167 # The Following are for dist type stats:
168 # 'min' is the minimum bucket value. Anything less is an underflow.
169 # 'max' is the maximum bucket value. Anything more is an overflow.
170 # 'bktsize' is the approximate number of entries in each bucket.
171 # 'size' is the number of buckets. equal to (min/max)/bktsize.
172 #
173 # INDEXES:
174 # 'stat' is indexed so that you can find out details about a stat
175 # if the stat id was retrieved from the data table.
176 # 'name' is indexed so that you can simply look up data about a
177 # named stat.
178 #
179 self.query('''
180 CREATE TABLE stats(
181 st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
182 st_name VARCHAR(255) NOT NULL,
183 st_descr TEXT NOT NULL,
184 st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
185 "VECTOR2D", "FORMULA") NOT NULL,
186 st_print BOOL NOT NULL,
187 st_prereq SMALLINT UNSIGNED NOT NULL,
188 st_prec TINYINT NOT NULL,
189 st_nozero BOOL NOT NULL,
190 st_nonan BOOL NOT NULL,
191 st_total BOOL NOT NULL,
192 st_pdf BOOL NOT NULL,
193 st_cdf BOOL NOT NULL,
194 st_min DOUBLE NOT NULL,
195 st_max DOUBLE NOT NULL,
196 st_bktsize DOUBLE NOT NULL,
197 st_size SMALLINT UNSIGNED NOT NULL,
198 PRIMARY KEY (st_id),
199 UNIQUE (st_name)
200 ) TYPE=InnoDB''')
201
202 #
203 # This is the main table of data for stats.
204 #
205 # COLUMNS:
206 # 'stat' refers to the stat field given in the stat table.
207 #
208 # 'x' referrs to the first dimension of a multi-dimensional stat. For
209 # a vector, x will start at 0 and increase for each vector
210 # element.
211 # For a distribution:
212 # -1: sum (for calculating standard deviation)
213 # -2: sum of squares (for calculating standard deviation)
214 # -3: total number of samples taken (for calculating
215 # standard deviation)
216 # -4: minimum value
217 # -5: maximum value
218 # -6: underflow
219 # -7: overflow
220 # 'y' is used by a VECTORDIST and the VECTOR2D to describe the second
221 # dimension.
222 # 'run' is the run that the data was generated from. Details up in
223 # the run table
224 # 'tick' is a timestamp generated by the simulator.

225 # 'bin' is the name of the bin that the data was generated in, if
226 # any.

201 # 'data' is the actual stat value.
202 #
203 # INDEXES:
204 # 'stat' is indexed so that a user can find all of the data for a
205 # particular stat. It is not unique, because that specific stat

227 # 'data' is the actual stat value.
228 #
229 # INDEXES:
230 # 'stat' is indexed so that a user can find all of the data for a
231 # particular stat. It is not unique, because that specific stat

206 # can be found in many runs and samples, in addition to

232 # can be found in many runs, bins, and samples, in addition to

207 # having entries for the mulidimensional cases.
208 # 'run' is indexed to allow a user to remove all of the data for a
209 # particular execution run. It can also be used to allow the
210 # user to print out all of the data for a given run.
211 #
212 self.query('''
213 CREATE TABLE data(
214 dt_stat SMALLINT UNSIGNED NOT NULL,
215 dt_x SMALLINT NOT NULL,
216 dt_y SMALLINT NOT NULL,
217 dt_run SMALLINT UNSIGNED NOT NULL,
218 dt_tick BIGINT UNSIGNED NOT NULL,

233 # having entries for the mulidimensional cases.
234 # 'run' is indexed to allow a user to remove all of the data for a
235 # particular execution run. It can also be used to allow the
236 # user to print out all of the data for a given run.
237 #
238 self.query('''
239 CREATE TABLE data(
240 dt_stat SMALLINT UNSIGNED NOT NULL,
241 dt_x SMALLINT NOT NULL,
242 dt_y SMALLINT NOT NULL,
243 dt_run SMALLINT UNSIGNED NOT NULL,
244 dt_tick BIGINT UNSIGNED NOT NULL,

245 dt_bin SMALLINT UNSIGNED NOT NULL,

219 dt_data DOUBLE NOT NULL,
220 INDEX (dt_stat),
221 INDEX (dt_run),

246 dt_data DOUBLE NOT NULL,
247 INDEX (dt_stat),
248 INDEX (dt_run),

222 UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick)

249 UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin)

223 ) TYPE=InnoDB;''')
224
225 #
226 # Names and descriptions for multi-dimensional stats (vectors, etc.)
227 # are stored here instead of having their own entry in the statistics
228 # table. This allows all parts of a single stat to easily share a
229 # single id.
230 #
231 # COLUMNS:
232 # 'stat' is the unique stat identifier from the stat table.
233 # 'x' is the first dimension for multi-dimensional stats
234 # corresponding to the data table above.
235 # 'y' is the second dimension for multi-dimensional stats
236 # corresponding to the data table above.
237 # 'name' is the specific subname for the unique stat,x,y combination.
238 # 'descr' is the specific description for the uniqe stat,x,y
239 # combination.
240 #
241 # INDEXES:
242 # 'stat' is indexed so you can get the subdata for a specific stat.
243 #
244 self.query('''
245 CREATE TABLE subdata(
246 sd_stat SMALLINT UNSIGNED NOT NULL,
247 sd_x SMALLINT NOT NULL,
248 sd_y SMALLINT NOT NULL,
249 sd_name VARCHAR(255) NOT NULL,
250 sd_descr TEXT,
251 UNIQUE (sd_stat,sd_x,sd_y)
252 ) TYPE=InnoDB''')
253
254
255 #
256 # The formula table is maintained separately from the data table
257 # because formula data, unlike other stat data cannot be represented
258 # there.
259 #
260 # COLUMNS:
261 # 'stat' refers to the stat field generated in the stat table.
262 # 'formula' is the actual string representation of the formula
263 # itself.
264 #
265 # INDEXES:
266 # 'stat' is indexed so that you can just look up a formula.
267 #
268 self.query('''
269 CREATE TABLE formulas(
270 fm_stat SMALLINT UNSIGNED NOT NULL,
271 fm_formula BLOB NOT NULL,
272 PRIMARY KEY(fm_stat)
273 ) TYPE=InnoDB''')
274
275 #
276 # Each stat used in each formula is kept in this table. This way, if
277 # you want to print out a particular formula, you can simply find out
278 # which stats you need by looking in this table. Additionally, when
279 # you remove a stat from the stats table and data table, you remove
280 # any references to the formula in this table. When a formula is no
281 # longer referred to, you remove its entry.
282 #
283 # COLUMNS:
284 # 'stat' is the stat id from the stat table above.
285 # 'child' is the stat id of a stat that is used for this formula.
286 # There may be many children for any given 'stat' (formula)
287 #
288 # INDEXES:
289 # 'stat' is indexed so you can look up all of the children for a
290 # particular stat.
291 # 'child' is indexed so that you can remove an entry when a stat is
292 # removed.
293 #
294 self.query('''
295 CREATE TABLE formula_ref(
296 fr_stat SMALLINT UNSIGNED NOT NULL,
297 fr_run SMALLINT UNSIGNED NOT NULL,
298 UNIQUE (fr_stat,fr_run),
299 INDEX (fr_stat),
300 INDEX (fr_run)
301 ) TYPE=InnoDB''')
302
303 # COLUMNS:
304 # 'event' is the unique event id from the event_desc table
305 # 'run' is simulation run id that this event took place in
306 # 'tick' is the tick when the event happened
307 #
308 # INDEXES:
309 # 'event' is indexed so you can look up all occurences of a
310 # specific event
311 # 'run' is indexed so you can find all events in a run
312 # 'tick' is indexed because we want the unique thing anyway
313 # 'event,run,tick' is unique combination
314 self.query('''
315 CREATE TABLE events(
316 ev_event SMALLINT UNSIGNED NOT NULL,
317 ev_run SMALLINT UNSIGNED NOT NULL,
318 ev_tick BIGINT UNSIGNED NOT NULL,
319 INDEX(ev_event),
320 INDEX(ev_run),
321 INDEX(ev_tick),
322 UNIQUE(ev_event,ev_run,ev_tick)
323 ) TYPE=InnoDB''')
324
325 # COLUMNS:
326 # 'id' is the unique description id
327 # 'name' is the name of the event that occurred
328 #
329 # INDEXES:
330 # 'id' is indexed because it is the primary key and is what you use
331 # to look up the descriptions
332 # 'name' is indexed so one can find the event based on name
333 #
334 self.query('''
335 CREATE TABLE event_names(
336 en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
337 en_name VARCHAR(255) NOT NULL,
338 PRIMARY KEY (en_id),
339 UNIQUE (en_name)
340 ) TYPE=InnoDB''')
341
342 def clean(self):
343 self.query('''
344 DELETE data
345 FROM data
346 LEFT JOIN runs ON dt_run=rn_id
347 WHERE rn_id IS NULL''')
348
349 self.query('''
350 DELETE formula_ref
351 FROM formula_ref
352 LEFT JOIN runs ON fr_run=rn_id
353 WHERE rn_id IS NULL''')
354
355 self.query('''
356 DELETE formulas
357 FROM formulas
358 LEFT JOIN formula_ref ON fm_stat=fr_stat
359 WHERE fr_stat IS NULL''')
360
361 self.query('''
362 DELETE stats
363 FROM stats
364 LEFT JOIN data ON st_id=dt_stat
365 WHERE dt_stat IS NULL''')
366
367 self.query('''
368 DELETE subdata
369 FROM subdata
370 LEFT JOIN data ON sd_stat=dt_stat
371 WHERE dt_stat IS NULL''')
372
373 self.query('''

250 ) TYPE=InnoDB;''')
251
252 #
253 # Names and descriptions for multi-dimensional stats (vectors, etc.)
254 # are stored here instead of having their own entry in the statistics
255 # table. This allows all parts of a single stat to easily share a
256 # single id.
257 #
258 # COLUMNS:
259 # 'stat' is the unique stat identifier from the stat table.
260 # 'x' is the first dimension for multi-dimensional stats
261 # corresponding to the data table above.
262 # 'y' is the second dimension for multi-dimensional stats
263 # corresponding to the data table above.
264 # 'name' is the specific subname for the unique stat,x,y combination.
265 # 'descr' is the specific description for the uniqe stat,x,y
266 # combination.
267 #
268 # INDEXES:
269 # 'stat' is indexed so you can get the subdata for a specific stat.
270 #
271 self.query('''
272 CREATE TABLE subdata(
273 sd_stat SMALLINT UNSIGNED NOT NULL,
274 sd_x SMALLINT NOT NULL,
275 sd_y SMALLINT NOT NULL,
276 sd_name VARCHAR(255) NOT NULL,
277 sd_descr TEXT,
278 UNIQUE (sd_stat,sd_x,sd_y)
279 ) TYPE=InnoDB''')
280
281
282 #
283 # The formula table is maintained separately from the data table
284 # because formula data, unlike other stat data cannot be represented
285 # there.
286 #
287 # COLUMNS:
288 # 'stat' refers to the stat field generated in the stat table.
289 # 'formula' is the actual string representation of the formula
290 # itself.
291 #
292 # INDEXES:
293 # 'stat' is indexed so that you can just look up a formula.
294 #
295 self.query('''
296 CREATE TABLE formulas(
297 fm_stat SMALLINT UNSIGNED NOT NULL,
298 fm_formula BLOB NOT NULL,
299 PRIMARY KEY(fm_stat)
300 ) TYPE=InnoDB''')
301
302 #
303 # Each stat used in each formula is kept in this table. This way, if
304 # you want to print out a particular formula, you can simply find out
305 # which stats you need by looking in this table. Additionally, when
306 # you remove a stat from the stats table and data table, you remove
307 # any references to the formula in this table. When a formula is no
308 # longer referred to, you remove its entry.
309 #
310 # COLUMNS:
311 # 'stat' is the stat id from the stat table above.
312 # 'child' is the stat id of a stat that is used for this formula.
313 # There may be many children for any given 'stat' (formula)
314 #
315 # INDEXES:
316 # 'stat' is indexed so you can look up all of the children for a
317 # particular stat.
318 # 'child' is indexed so that you can remove an entry when a stat is
319 # removed.
320 #
321 self.query('''
322 CREATE TABLE formula_ref(
323 fr_stat SMALLINT UNSIGNED NOT NULL,
324 fr_run SMALLINT UNSIGNED NOT NULL,
325 UNIQUE (fr_stat,fr_run),
326 INDEX (fr_stat),
327 INDEX (fr_run)
328 ) TYPE=InnoDB''')
329
330 # COLUMNS:
331 # 'event' is the unique event id from the event_desc table
332 # 'run' is simulation run id that this event took place in
333 # 'tick' is the tick when the event happened
334 #
335 # INDEXES:
336 # 'event' is indexed so you can look up all occurences of a
337 # specific event
338 # 'run' is indexed so you can find all events in a run
339 # 'tick' is indexed because we want the unique thing anyway
340 # 'event,run,tick' is unique combination
341 self.query('''
342 CREATE TABLE events(
343 ev_event SMALLINT UNSIGNED NOT NULL,
344 ev_run SMALLINT UNSIGNED NOT NULL,
345 ev_tick BIGINT UNSIGNED NOT NULL,
346 INDEX(ev_event),
347 INDEX(ev_run),
348 INDEX(ev_tick),
349 UNIQUE(ev_event,ev_run,ev_tick)
350 ) TYPE=InnoDB''')
351
352 # COLUMNS:
353 # 'id' is the unique description id
354 # 'name' is the name of the event that occurred
355 #
356 # INDEXES:
357 # 'id' is indexed because it is the primary key and is what you use
358 # to look up the descriptions
359 # 'name' is indexed so one can find the event based on name
360 #
361 self.query('''
362 CREATE TABLE event_names(
363 en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
364 en_name VARCHAR(255) NOT NULL,
365 PRIMARY KEY (en_id),
366 UNIQUE (en_name)
367 ) TYPE=InnoDB''')
368
369 def clean(self):
370 self.query('''
371 DELETE data
372 FROM data
373 LEFT JOIN runs ON dt_run=rn_id
374 WHERE rn_id IS NULL''')
375
376 self.query('''
377 DELETE formula_ref
378 FROM formula_ref
379 LEFT JOIN runs ON fr_run=rn_id
380 WHERE rn_id IS NULL''')
381
382 self.query('''
383 DELETE formulas
384 FROM formulas
385 LEFT JOIN formula_ref ON fm_stat=fr_stat
386 WHERE fr_stat IS NULL''')
387
388 self.query('''
389 DELETE stats
390 FROM stats
391 LEFT JOIN data ON st_id=dt_stat
392 WHERE dt_stat IS NULL''')
393
394 self.query('''
395 DELETE subdata
396 FROM subdata
397 LEFT JOIN data ON sd_stat=dt_stat
398 WHERE dt_stat IS NULL''')
399
400 self.query('''

401 DELETE bins
402 FROM bins
403 LEFT JOIN data ON bn_id=dt_bin
404 WHERE dt_bin IS NULL''')
405
406 self.query('''