dbinit.py revision 2343
12SN/A# Copyright (c) 2003-2004 The Regents of The University of Michigan 21762SN/A# All rights reserved. 32SN/A# 42SN/A# Redistribution and use in source and binary forms, with or without 52SN/A# modification, are permitted provided that the following conditions are 62SN/A# met: redistributions of source code must retain the above copyright 72SN/A# notice, this list of conditions and the following disclaimer; 82SN/A# redistributions in binary form must reproduce the above copyright 92SN/A# notice, this list of conditions and the following disclaimer in the 102SN/A# documentation and/or other materials provided with the distribution; 112SN/A# neither the name of the copyright holders nor the names of its 122SN/A# contributors may be used to endorse or promote products derived from 132SN/A# this software without specific prior written permission. 142SN/A# 152SN/A# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 162SN/A# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 172SN/A# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 182SN/A# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 192SN/A# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 202SN/A# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 212SN/A# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 222SN/A# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 232SN/A# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 242SN/A# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 252SN/A# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 262SN/A 272665Ssaidi@eecs.umich.eduimport MySQLdb 282665Ssaidi@eecs.umich.edu 292665Ssaidi@eecs.umich.educlass MyDB(object): 302SN/A def __init__(self, options): 312SN/A self.name = options.db 322SN/A self.host = options.host 332SN/A self.user = options.user 342SN/A self.passwd = options.passwd 352SN/A self.mydb = None 361354SN/A self.cursor = None 371354SN/A 382SN/A def admin(self): 392SN/A self.close() 405501Snate@binkert.org self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user, 415546Snate@binkert.org passwd=self.passwd) 427004Snate@binkert.org self.cursor = self.mydb.cursor() 432SN/A 442SN/A def connect(self): 455769Snate@binkert.org self.close() 462361SN/A self.mydb = MySQLdb.connect(db=self.name, host=self.host, 476216Snate@binkert.org user=self.user, passwd=self.passwd) 488232Snate@binkert.org self.cursor = self.mydb.cursor() 4956SN/A 502SN/A def close(self): 515543Ssaidi@eecs.umich.edu if self.mydb is not None: 522SN/A self.mydb.close() 531354SN/A self.cursor = None 541354SN/A 552SN/A def query(self, sql): 562SN/A self.cursor.execute(sql) 572SN/A 582SN/A def drop(self): 595501Snate@binkert.org self.query('DROP DATABASE IF EXISTS %s' % self.name) 605501Snate@binkert.org 612SN/A def create(self): 629044SAli.Saidi@ARM.com self.query('CREATE DATABASE %s' % self.name) 632SN/A 642SN/A def populate(self): 652SN/A # 665769Snate@binkert.org # Each run (or simulation) gets its own entry in the runs table to 678902Sandreas.hansson@arm.com # group stats by where they were generated 685769Snate@binkert.org # 695769Snate@binkert.org # COLUMNS: 707059Snate@binkert.org # 'id' is a unique identifier for each run to be used in other 717059Snate@binkert.org # tables. 727059Snate@binkert.org # 'name' is the user designated name for the data generated. It is 737059Snate@binkert.org # configured in the simulator. 747059Snate@binkert.org # 'user' identifies the user that generated the data for the given 757059Snate@binkert.org # run. 767059Snate@binkert.org # 'project' another name to identify runs for a specific goal 777059Snate@binkert.org # 'date' is a timestamp for when the data was generated. It can be 787059Snate@binkert.org # used to easily expire data that was generated in the past. 797059Snate@binkert.org # 'expire' is a timestamp for when the data should be removed from 807059Snate@binkert.org # the database so we don't have years worth of junk. 817059Snate@binkert.org # 827059Snate@binkert.org # INDEXES: 837059Snate@binkert.org # 'run' is indexed so you can find out details of a run if the run 847059Snate@binkert.org # was retreived from the data table. 857059Snate@binkert.org # 'name' is indexed so that two all run names are forced to be unique 865769Snate@binkert.org # 877058Snate@binkert.org self.query(''' 887058Snate@binkert.org CREATE TABLE runs( 897058Snate@binkert.org rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 902SN/A rn_name VARCHAR(200) NOT NULL, 915502Snate@binkert.org rn_sample VARCHAR(32) NOT NULL, 925502Snate@binkert.org rn_user VARCHAR(32) NOT NULL, 935502Snate@binkert.org rn_project VARCHAR(100) NOT NULL, 945503Snate@binkert.org rn_date TIMESTAMP NOT NULL, 955503Snate@binkert.org rn_expire TIMESTAMP NOT NULL, 965502Snate@binkert.org PRIMARY KEY (rn_id), 975502Snate@binkert.org UNIQUE (rn_name,rn_sample) 985502Snate@binkert.org ) TYPE=InnoDB''') 995502Snate@binkert.org 1005502Snate@binkert.org # 1015502Snate@binkert.org # The stat table gives us all of the data for a particular stat. 1025502Snate@binkert.org # 1035602Snate@binkert.org # COLUMNS: 1045602Snate@binkert.org # 'stat' is a unique identifier for each stat to be used in other 1055501Snate@binkert.org # tables for references. 1065543Ssaidi@eecs.umich.edu # 'name' is simply the simulator derived name for a given 1077058Snate@binkert.org # statistic. 1085769Snate@binkert.org # 'descr' is the description of the statistic and what it tells 1094016Sstever@eecs.umich.edu # you. 1104016Sstever@eecs.umich.edu # 'type' defines what the stat tells you. Types are: 1114016Sstever@eecs.umich.edu # SCALAR: A simple scalar statistic that holds one value 1124016Sstever@eecs.umich.edu # VECTOR: An array of statistic values. Such a something that 1134016Sstever@eecs.umich.edu # is generated per-thread. Vectors exist to give averages, 1144016Sstever@eecs.umich.edu # pdfs, cdfs, means, standard deviations, etc across the 1154016Sstever@eecs.umich.edu # stat values. 1164016Sstever@eecs.umich.edu # DIST: Is a distribution of data. When the statistic value is 1174016Sstever@eecs.umich.edu # sampled, its value is counted in a particular bucket. 1185501Snate@binkert.org # Useful for keeping track of utilization of a resource. 1195605Snate@binkert.org # (e.g. fraction of time it is 25% used vs. 50% vs. 100%) 1205605Snate@binkert.org # VECTORDIST: Can be used when the distribution needs to be 1215605Snate@binkert.org # factored out into a per-thread distribution of data for 1225605Snate@binkert.org # example. It can still be summed across threads to find 1235501Snate@binkert.org # the total distribution. 1244016Sstever@eecs.umich.edu # VECTOR2D: Can be used when you have a stat that is not only 1255577SSteve.Reinhardt@amd.com # per-thread, but it is per-something else. Like 1265501Snate@binkert.org # per-message type. 1275501Snate@binkert.org # FORMULA: This statistic is a formula, and its data must be 1285501Snate@binkert.org # looked up in the formula table, for indicating how to 1295502Snate@binkert.org # present its values. 1305502Snate@binkert.org # 'subdata' is potentially used by any of the vector types to 1315605Snate@binkert.org # give a specific name to all of the data elements within a 1325502Snate@binkert.org # stat. 1335502Snate@binkert.org # 'print' indicates whether this stat should be printed ever. 1345605Snate@binkert.org # (Unnamed stats don't usually get printed) 1355605Snate@binkert.org # 'prereq' only print the stat if the prereq is not zero. 1365605Snate@binkert.org # 'prec' number of decimal places to print 1375577SSteve.Reinhardt@amd.com # 'nozero' don't print zero values 1387823Ssteve.reinhardt@amd.com # 'nonan' don't print NaN values 1395502Snate@binkert.org # 'total' for vector type stats, print the total. 1405502Snate@binkert.org # 'pdf' for vector type stats, print the pdf. 1415502Snate@binkert.org # 'cdf' for vector type stats, print the cdf. 1422SN/A # 1435769Snate@binkert.org # The Following are for dist type stats: 1445769Snate@binkert.org # 'min' is the minimum bucket value. Anything less is an underflow. 1455769Snate@binkert.org # 'max' is the maximum bucket value. Anything more is an overflow. 1465769Snate@binkert.org # 'bktsize' is the approximate number of entries in each bucket. 1475769Snate@binkert.org # 'size' is the number of buckets. equal to (min/max)/bktsize. 1485769Snate@binkert.org # 1492SN/A # INDEXES: 1508581Ssteve.reinhardt@amd.com # 'stat' is indexed so that you can find out details about a stat 1518581Ssteve.reinhardt@amd.com # if the stat id was retrieved from the data table. 1525769Snate@binkert.org # 'name' is indexed so that you can simply look up data about a 1537059Snate@binkert.org # named stat. 1545769Snate@binkert.org # 1555769Snate@binkert.org self.query(''' 1562SN/A CREATE TABLE stats( 1575769Snate@binkert.org st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 1585769Snate@binkert.org st_name VARCHAR(255) NOT NULL, 1595769Snate@binkert.org st_descr TEXT NOT NULL, 1605769Snate@binkert.org st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST", 1615769Snate@binkert.org "VECTOR2D", "FORMULA") NOT NULL, 1625769Snate@binkert.org st_print BOOL NOT NULL, 1635769Snate@binkert.org st_prereq SMALLINT UNSIGNED NOT NULL, 1645769Snate@binkert.org st_prec TINYINT NOT NULL, 1655769Snate@binkert.org st_nozero BOOL NOT NULL, 1665769Snate@binkert.org st_nonan BOOL NOT NULL, 1675769Snate@binkert.org st_total BOOL NOT NULL, 1685769Snate@binkert.org st_pdf BOOL NOT NULL, 1695769Snate@binkert.org st_cdf BOOL NOT NULL, 1705769Snate@binkert.org st_min DOUBLE NOT NULL, 1715769Snate@binkert.org st_max DOUBLE NOT NULL, 1725769Snate@binkert.org st_bktsize DOUBLE NOT NULL, 1735769Snate@binkert.org st_size SMALLINT UNSIGNED NOT NULL, 1745769Snate@binkert.org PRIMARY KEY (st_id), 1755769Snate@binkert.org UNIQUE (st_name) 1765769Snate@binkert.org ) TYPE=InnoDB''') 1775769Snate@binkert.org 1785501Snate@binkert.org # 1795543Ssaidi@eecs.umich.edu # This is the main table of data for stats. 1802SN/A # 1812SN/A # COLUMNS: 182396SN/A # 'stat' refers to the stat field given in the stat table. 183396SN/A # 184396SN/A # 'x' referrs to the first dimension of a multi-dimensional stat. For 185396SN/A # a vector, x will start at 0 and increase for each vector 1865501Snate@binkert.org # element. 1877058Snate@binkert.org # For a distribution: 1887058Snate@binkert.org # -1: sum (for calculating standard deviation) 1893329Sstever@eecs.umich.edu # -2: sum of squares (for calculating standard deviation) 1907058Snate@binkert.org # -3: total number of samples taken (for calculating 1917058Snate@binkert.org # standard deviation) 1927058Snate@binkert.org # -4: minimum value 1937058Snate@binkert.org # -5: maximum value 194396SN/A # -6: underflow 1957058Snate@binkert.org # -7: overflow 1967058Snate@binkert.org # 'y' is used by a VECTORDIST and the VECTOR2D to describe the second 1977058Snate@binkert.org # dimension. 1987058Snate@binkert.org # 'run' is the run that the data was generated from. Details up in 1993329Sstever@eecs.umich.edu # the run table 2007058Snate@binkert.org # 'tick' is a timestamp generated by the simulator. 2017058Snate@binkert.org # 'data' is the actual stat value. 2027058Snate@binkert.org # 2037058Snate@binkert.org # INDEXES: 2047058Snate@binkert.org # 'stat' is indexed so that a user can find all of the data for a 205396SN/A # particular stat. It is not unique, because that specific stat 2067058Snate@binkert.org # can be found in many runs and samples, in addition to 2077058Snate@binkert.org # having entries for the mulidimensional cases. 2087058Snate@binkert.org # 'run' is indexed to allow a user to remove all of the data for a 2097058Snate@binkert.org # particular execution run. It can also be used to allow the 210396SN/A # user to print out all of the data for a given run. 2117058Snate@binkert.org # 2127058Snate@binkert.org self.query(''' 213396SN/A CREATE TABLE data( 2147058Snate@binkert.org dt_stat SMALLINT UNSIGNED NOT NULL, 2157058Snate@binkert.org dt_x SMALLINT NOT NULL, 2167058Snate@binkert.org dt_y SMALLINT NOT NULL, 2177058Snate@binkert.org dt_run SMALLINT UNSIGNED NOT NULL, 218396SN/A dt_tick BIGINT UNSIGNED NOT NULL, 2197058Snate@binkert.org dt_data DOUBLE NOT NULL, 2207058Snate@binkert.org INDEX (dt_stat), 2217058Snate@binkert.org INDEX (dt_run), 222396SN/A UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick) 2237058Snate@binkert.org ) TYPE=InnoDB;''') 2247058Snate@binkert.org 2257058Snate@binkert.org # 2264075Sbinkertn@umich.edu # Names and descriptions for multi-dimensional stats (vectors, etc.) 2277058Snate@binkert.org # are stored here instead of having their own entry in the statistics 2287058Snate@binkert.org # table. This allows all parts of a single stat to easily share a 2295501Snate@binkert.org # single id. 2307058Snate@binkert.org # 2317058Snate@binkert.org # COLUMNS: 2327058Snate@binkert.org # 'stat' is the unique stat identifier from the stat table. 2337058Snate@binkert.org # 'x' is the first dimension for multi-dimensional stats 2347058Snate@binkert.org # corresponding to the data table above. 2357058Snate@binkert.org # 'y' is the second dimension for multi-dimensional stats 236396SN/A # corresponding to the data table above. 2372SN/A # 'name' is the specific subname for the unique stat,x,y combination. 2382SN/A # 'descr' is the specific description for the uniqe stat,x,y 2392SN/A # combination. 2402SN/A # 2418581Ssteve.reinhardt@amd.com # INDEXES: 2428581Ssteve.reinhardt@amd.com # 'stat' is indexed so you can get the subdata for a specific stat. 2438581Ssteve.reinhardt@amd.com # 244224SN/A self.query(''' 2458581Ssteve.reinhardt@amd.com CREATE TABLE subdata( 2464016Sstever@eecs.umich.edu sd_stat SMALLINT UNSIGNED NOT NULL, 2475501Snate@binkert.org sd_x SMALLINT NOT NULL, 2485605Snate@binkert.org sd_y SMALLINT NOT NULL, 2495501Snate@binkert.org sd_name VARCHAR(255) NOT NULL, 2505501Snate@binkert.org sd_descr TEXT, 2517823Ssteve.reinhardt@amd.com UNIQUE (sd_stat,sd_x,sd_y) 2525501Snate@binkert.org ) TYPE=InnoDB''') 2534016Sstever@eecs.umich.edu 254224SN/A 255224SN/A # 2565768Snate@binkert.org # The formula table is maintained separately from the data table 2575768Snate@binkert.org # because formula data, unlike other stat data cannot be represented 258265SN/A # there. 2595501Snate@binkert.org # 2605501Snate@binkert.org # COLUMNS: 2615501Snate@binkert.org # 'stat' refers to the stat field generated in the stat table. 2625501Snate@binkert.org # 'formula' is the actual string representation of the formula 2635501Snate@binkert.org # itself. 2645501Snate@binkert.org # 2655501Snate@binkert.org # INDEXES: 2665501Snate@binkert.org # 'stat' is indexed so that you can just look up a formula. 2675501Snate@binkert.org # 2685501Snate@binkert.org self.query(''' 2695501Snate@binkert.org CREATE TABLE formulas( 2705501Snate@binkert.org fm_stat SMALLINT UNSIGNED NOT NULL, 2715501Snate@binkert.org fm_formula BLOB NOT NULL, 2725501Snate@binkert.org PRIMARY KEY(fm_stat) 2735501Snate@binkert.org ) TYPE=InnoDB''') 2745501Snate@binkert.org 2755501Snate@binkert.org # 2765501Snate@binkert.org # Each stat used in each formula is kept in this table. This way, if 2775501Snate@binkert.org # you want to print out a particular formula, you can simply find out 2785501Snate@binkert.org # which stats you need by looking in this table. Additionally, when 2795501Snate@binkert.org # you remove a stat from the stats table and data table, you remove 2802SN/A # any references to the formula in this table. When a formula is no 2815769Snate@binkert.org # longer referred to, you remove its entry. 2822SN/A # 2832SN/A # COLUMNS: 2845769Snate@binkert.org # 'stat' is the stat id from the stat table above. 2852SN/A # 'child' is the stat id of a stat that is used for this formula. 2862SN/A # There may be many children for any given 'stat' (formula) 2875769Snate@binkert.org # 2882SN/A # INDEXES: 2892667Sstever@eecs.umich.edu # 'stat' is indexed so you can look up all of the children for a 2905769Snate@binkert.org # particular stat. 2912667Sstever@eecs.umich.edu # 'child' is indexed so that you can remove an entry when a stat is 2922SN/A # removed. 2932SN/A # 2942SN/A self.query(''' 2952SN/A CREATE TABLE formula_ref( 2967058Snate@binkert.org fr_stat SMALLINT UNSIGNED NOT NULL, 2972SN/A fr_run SMALLINT UNSIGNED NOT NULL, 2985605Snate@binkert.org UNIQUE (fr_stat,fr_run), 2995501Snate@binkert.org INDEX (fr_stat), 3005501Snate@binkert.org INDEX (fr_run) 3012SN/A ) TYPE=InnoDB''') 3025501Snate@binkert.org 3035501Snate@binkert.org # COLUMNS: 3045501Snate@binkert.org # 'event' is the unique event id from the event_desc table 3052SN/A # 'run' is simulation run id that this event took place in 3062SN/A # 'tick' is the tick when the event happened 3072SN/A # 308224SN/A # INDEXES: 309224SN/A # 'event' is indexed so you can look up all occurences of a 310237SN/A # specific event 3115605Snate@binkert.org # 'run' is indexed so you can find all events in a run 312571SN/A # 'tick' is indexed because we want the unique thing anyway 313571SN/A # 'event,run,tick' is unique combination 3147005Snate@binkert.org self.query(''' 3157005Snate@binkert.org CREATE TABLE events( 3167005Snate@binkert.org ev_event SMALLINT UNSIGNED NOT NULL, 3177005Snate@binkert.org ev_run SMALLINT UNSIGNED NOT NULL, 3187005Snate@binkert.org ev_tick BIGINT UNSIGNED NOT NULL, 3197005Snate@binkert.org INDEX(ev_event), 3207005Snate@binkert.org INDEX(ev_run), 3217005Snate@binkert.org INDEX(ev_tick), 3227005Snate@binkert.org UNIQUE(ev_event,ev_run,ev_tick) 3237005Snate@binkert.org ) TYPE=InnoDB''') 3247005Snate@binkert.org 3257005Snate@binkert.org # COLUMNS: 3267005Snate@binkert.org # 'id' is the unique description id 3277005Snate@binkert.org # 'name' is the name of the event that occurred 3287005Snate@binkert.org # 3297005Snate@binkert.org # INDEXES: 3307005Snate@binkert.org # 'id' is indexed because it is the primary key and is what you use 3317005Snate@binkert.org # to look up the descriptions 3327005Snate@binkert.org # 'name' is indexed so one can find the event based on name 3337005Snate@binkert.org # 3347005Snate@binkert.org self.query(''' 3357005Snate@binkert.org CREATE TABLE event_names( 3367005Snate@binkert.org en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 3377005Snate@binkert.org en_name VARCHAR(255) NOT NULL, 3387005Snate@binkert.org PRIMARY KEY (en_id), 3397005Snate@binkert.org UNIQUE (en_name) 3407005Snate@binkert.org ) TYPE=InnoDB''') 3417005Snate@binkert.org 3427005Snate@binkert.org def clean(self): 3437005Snate@binkert.org self.query(''' 3447005Snate@binkert.org DELETE data 3457005Snate@binkert.org FROM data 3467005Snate@binkert.org LEFT JOIN runs ON dt_run=rn_id 3477005Snate@binkert.org WHERE rn_id IS NULL''') 3487005Snate@binkert.org 3497005Snate@binkert.org self.query(''' 3507005Snate@binkert.org DELETE formula_ref 3517005Snate@binkert.org FROM formula_ref 3527005Snate@binkert.org LEFT JOIN runs ON fr_run=rn_id 3537005Snate@binkert.org WHERE rn_id IS NULL''') 3547005Snate@binkert.org 3552SN/A self.query(''' 3562SN/A DELETE formulas 3572SN/A FROM formulas 358395SN/A LEFT JOIN formula_ref ON fm_stat=fr_stat 3592SN/A WHERE fr_stat IS NULL''') 3605605Snate@binkert.org 361265SN/A self.query(''' 3622SN/A DELETE stats 3639356Snilay@cs.wisc.edu FROM stats 3642SN/A LEFT JOIN data ON st_id=dt_stat 3652SN/A WHERE dt_stat IS NULL''') 3662SN/A 3672SN/A self.query(''' 3687063Snate@binkert.org DELETE subdata 3697063Snate@binkert.org FROM subdata 3707063Snate@binkert.org LEFT JOIN data ON sd_stat=dt_stat 3712SN/A WHERE dt_stat IS NULL''') 3727063Snate@binkert.org 3732SN/A self.query(''' 374512SN/A DELETE events 375265SN/A FROM events 3762SN/A LEFT JOIN runs ON ev_run=rn_id 3775738Snate@binkert.org WHERE rn_id IS NULL''') 3785738Snate@binkert.org 3795738Snate@binkert.org self.query(''' 3802SN/A DELETE event_names 3815501Snate@binkert.org FROM event_names 3829356Snilay@cs.wisc.edu LEFT JOIN events ON en_id=ev_event 3839356Snilay@cs.wisc.edu WHERE ev_event IS NULL''') 3849356Snilay@cs.wisc.edu