dbinit.py revision 1758
11758Ssaidi@eecs.umich.edu
21758Ssaidi@eecs.umich.edu# Copyright (c) 2003-2004 The Regents of The University of Michigan
31758Ssaidi@eecs.umich.edu# All rights reserved.
41758Ssaidi@eecs.umich.edu#
51758Ssaidi@eecs.umich.edu# Redistribution and use in source and binary forms, with or without
61758Ssaidi@eecs.umich.edu# modification, are permitted provided that the following conditions are
71758Ssaidi@eecs.umich.edu# met: redistributions of source code must retain the above copyright
81758Ssaidi@eecs.umich.edu# notice, this list of conditions and the following disclaimer;
91758Ssaidi@eecs.umich.edu# redistributions in binary form must reproduce the above copyright
101758Ssaidi@eecs.umich.edu# notice, this list of conditions and the following disclaimer in the
111758Ssaidi@eecs.umich.edu# documentation and/or other materials provided with the distribution;
121758Ssaidi@eecs.umich.edu# neither the name of the copyright holders nor the names of its
131758Ssaidi@eecs.umich.edu# contributors may be used to endorse or promote products derived from
141758Ssaidi@eecs.umich.edu# this software without specific prior written permission.
151758Ssaidi@eecs.umich.edu#
161758Ssaidi@eecs.umich.edu# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
171758Ssaidi@eecs.umich.edu# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
181758Ssaidi@eecs.umich.edu# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
191758Ssaidi@eecs.umich.edu# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
201758Ssaidi@eecs.umich.edu# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
211758Ssaidi@eecs.umich.edu# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
221758Ssaidi@eecs.umich.edu# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
231758Ssaidi@eecs.umich.edu# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
241758Ssaidi@eecs.umich.edu# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
251758Ssaidi@eecs.umich.edu# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
261758Ssaidi@eecs.umich.edu# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
271758Ssaidi@eecs.umich.edu
281758Ssaidi@eecs.umich.edu#Permission is granted to use, copy, create derivative works and
291758Ssaidi@eecs.umich.edu#redistribute this software and such derivative works for any purpose,
301758Ssaidi@eecs.umich.edu#so long as the copyright notice above, this grant of permission, and
311758Ssaidi@eecs.umich.edu#the disclaimer below appear in all copies made; and so long as the
321758Ssaidi@eecs.umich.edu#name of The University of Michigan is not used in any advertising or
331758Ssaidi@eecs.umich.edu#publicity pertaining to the use or distribution of this software
341758Ssaidi@eecs.umich.edu#without specific, written prior authorization.
351758Ssaidi@eecs.umich.edu#
361758Ssaidi@eecs.umich.edu#THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION FROM THE
371758Ssaidi@eecs.umich.edu#UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY PURPOSE, AND WITHOUT
381758Ssaidi@eecs.umich.edu#WARRANTY BY THE UNIVERSITY OF MICHIGAN OF ANY KIND, EITHER EXPRESS OR
391758Ssaidi@eecs.umich.edu#IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
401758Ssaidi@eecs.umich.edu#MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE REGENTS OF
411758Ssaidi@eecs.umich.edu#THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE FOR ANY DAMAGES,
421758Ssaidi@eecs.umich.edu#INCLUDING DIRECT, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
431758Ssaidi@eecs.umich.edu#DAMAGES, WITH RESPECT TO ANY CLAIM ARISING OUT OF OR IN CONNECTION
441758Ssaidi@eecs.umich.edu#WITH THE USE OF THE SOFTWARE, EVEN IF IT HAS BEEN OR IS HEREAFTER
451758Ssaidi@eecs.umich.edu#ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
461758Ssaidi@eecs.umich.edu
471049Sbinkertn@umich.eduimport MySQLdb
481049Sbinkertn@umich.edu
491049Sbinkertn@umich.educlass MyDB(object):
501049Sbinkertn@umich.edu    def __init__(self, options):
511049Sbinkertn@umich.edu        self.name = options.db
521049Sbinkertn@umich.edu        self.host = options.host
531049Sbinkertn@umich.edu        self.user = options.user
541049Sbinkertn@umich.edu        self.passwd = options.passwd
551049Sbinkertn@umich.edu        self.mydb = None
561049Sbinkertn@umich.edu        self.cursor = None
571049Sbinkertn@umich.edu
581049Sbinkertn@umich.edu    def admin(self):
591049Sbinkertn@umich.edu        self.close()
601049Sbinkertn@umich.edu        self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,
611049Sbinkertn@umich.edu                                    passwd=self.passwd)
621049Sbinkertn@umich.edu        self.cursor = self.mydb.cursor()
631049Sbinkertn@umich.edu
641049Sbinkertn@umich.edu    def connect(self):
651049Sbinkertn@umich.edu        self.close()
661049Sbinkertn@umich.edu        self.mydb = MySQLdb.connect(db=self.name, host=self.host,
671049Sbinkertn@umich.edu                                    user=self.user, passwd=self.passwd)
681049Sbinkertn@umich.edu        self.cursor = self.mydb.cursor()
691049Sbinkertn@umich.edu
701049Sbinkertn@umich.edu    def close(self):
711049Sbinkertn@umich.edu        if self.mydb is not None:
721049Sbinkertn@umich.edu            self.mydb.close()
731049Sbinkertn@umich.edu        self.cursor = None
741049Sbinkertn@umich.edu
751049Sbinkertn@umich.edu    def query(self, sql):
761049Sbinkertn@umich.edu        self.cursor.execute(sql)
771049Sbinkertn@umich.edu
781049Sbinkertn@umich.edu    def drop(self):
791049Sbinkertn@umich.edu        self.query('DROP DATABASE IF EXISTS %s' % self.name)
801049Sbinkertn@umich.edu
811049Sbinkertn@umich.edu    def create(self):
821049Sbinkertn@umich.edu        self.query('CREATE DATABASE %s' % self.name)
831049Sbinkertn@umich.edu
841049Sbinkertn@umich.edu    def populate(self):
851049Sbinkertn@umich.edu        #
861049Sbinkertn@umich.edu        # Each run (or simulation) gets its own entry in the runs table to
871049Sbinkertn@umich.edu        # group stats by where they were generated
881049Sbinkertn@umich.edu        #
891049Sbinkertn@umich.edu        # COLUMNS:
901049Sbinkertn@umich.edu        #   'id' is a unique identifier for each run to be used in other
911049Sbinkertn@umich.edu        #       tables.
921049Sbinkertn@umich.edu        #   'name' is the user designated name for the data generated.  It is
931049Sbinkertn@umich.edu        #       configured in the simulator.
941049Sbinkertn@umich.edu        #   'user' identifies the user that generated the data for the given
951049Sbinkertn@umich.edu        #       run.
961049Sbinkertn@umich.edu        #   'project' another name to identify runs for a specific goal
971049Sbinkertn@umich.edu        #   'date' is a timestamp for when the data was generated.  It can be
981049Sbinkertn@umich.edu        #       used to easily expire data that was generated in the past.
991049Sbinkertn@umich.edu        #   'expire' is a timestamp for when the data should be removed from
1001049Sbinkertn@umich.edu        #       the database so we don't have years worth of junk.
1011049Sbinkertn@umich.edu        #
1021049Sbinkertn@umich.edu        # INDEXES:
1031049Sbinkertn@umich.edu        #   'run' is indexed so you can find out details of a run if the run
1041049Sbinkertn@umich.edu        #       was retreived from the data table.
1051049Sbinkertn@umich.edu        #   'name' is indexed so that two all run names are forced to be unique
1061049Sbinkertn@umich.edu        #
1071049Sbinkertn@umich.edu        self.query('''
1081049Sbinkertn@umich.edu        CREATE TABLE runs(
1091049Sbinkertn@umich.edu            rn_id	SMALLINT UNSIGNED	NOT NULL AUTO_INCREMENT,
1101049Sbinkertn@umich.edu            rn_name	VARCHAR(200)		NOT NULL,
1111049Sbinkertn@umich.edu            rn_sample	VARCHAR(32)		NOT NULL,
1121049Sbinkertn@umich.edu            rn_user	VARCHAR(32)		NOT NULL,
1131049Sbinkertn@umich.edu            rn_project	VARCHAR(100)            NOT NULL,
1141049Sbinkertn@umich.edu            rn_date	TIMESTAMP		NOT NULL,
1151049Sbinkertn@umich.edu            rn_expire	TIMESTAMP               NOT NULL,
1161049Sbinkertn@umich.edu            PRIMARY KEY (rn_id),
1171049Sbinkertn@umich.edu            UNIQUE (rn_name,rn_sample)
1181049Sbinkertn@umich.edu        ) TYPE=InnoDB''')
1191049Sbinkertn@umich.edu
1201049Sbinkertn@umich.edu        #
1211049Sbinkertn@umich.edu        # We keep the bin names separate so that the data table doesn't get
1221049Sbinkertn@umich.edu        # huge since bin names are frequently repeated.
1231049Sbinkertn@umich.edu        #
1241049Sbinkertn@umich.edu        # COLUMNS:
1251049Sbinkertn@umich.edu        #   'id' is the unique bin identifer.
1261049Sbinkertn@umich.edu        #   'name' is the string name for the bin.
1271049Sbinkertn@umich.edu        #
1281049Sbinkertn@umich.edu        # INDEXES:
1291049Sbinkertn@umich.edu        #   'bin' is indexed to get the name of a bin when data is retrieved
1301049Sbinkertn@umich.edu        #       via the data table.
1311049Sbinkertn@umich.edu        #   'name' is indexed to get the bin id for a named bin when you want
1321049Sbinkertn@umich.edu        #       to search the data table based on a specific bin.
1331049Sbinkertn@umich.edu        #
1341049Sbinkertn@umich.edu        self.query('''
1351049Sbinkertn@umich.edu        CREATE TABLE bins(
1361049Sbinkertn@umich.edu            bn_id	SMALLINT UNSIGNED	NOT NULL AUTO_INCREMENT,
1371049Sbinkertn@umich.edu            bn_name	VARCHAR(255)		NOT NULL,
1381049Sbinkertn@umich.edu            PRIMARY KEY(bn_id),
1391049Sbinkertn@umich.edu            UNIQUE (bn_name)
1401049Sbinkertn@umich.edu        ) TYPE=InnoDB''')
1411049Sbinkertn@umich.edu
1421049Sbinkertn@umich.edu        #
1431049Sbinkertn@umich.edu        # The stat table gives us all of the data for a particular stat.
1441049Sbinkertn@umich.edu        #
1451049Sbinkertn@umich.edu        # COLUMNS:
1461049Sbinkertn@umich.edu        #   'stat' is a unique identifier for each stat to be used in other
1471049Sbinkertn@umich.edu        #       tables for references.
1481049Sbinkertn@umich.edu        #   'name' is simply the simulator derived name for a given
1491049Sbinkertn@umich.edu        #       statistic.
1501049Sbinkertn@umich.edu        #   'descr' is the description of the statistic and what it tells
1511049Sbinkertn@umich.edu        #       you.
1521049Sbinkertn@umich.edu        #   'type' defines what the stat tells you.  Types are:
1531049Sbinkertn@umich.edu        #       SCALAR: A simple scalar statistic that holds one value
1541049Sbinkertn@umich.edu        #       VECTOR: An array of statistic values.  Such a something that
1551049Sbinkertn@umich.edu        #           is generated per-thread.  Vectors exist to give averages,
1561049Sbinkertn@umich.edu        #	     pdfs, cdfs, means, standard deviations, etc across the
1571049Sbinkertn@umich.edu        #           stat values.
1581049Sbinkertn@umich.edu        #       DIST: Is a distribution of data.  When the statistic value is
1591049Sbinkertn@umich.edu        #	     sampled, its value is counted in a particular bucket.
1601049Sbinkertn@umich.edu        #           Useful for keeping track of utilization of a resource.
1611049Sbinkertn@umich.edu        #           (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
1621049Sbinkertn@umich.edu        #       VECTORDIST: Can be used when the distribution needs to be
1631049Sbinkertn@umich.edu        #	     factored out into a per-thread distribution of data for
1641049Sbinkertn@umich.edu        #	     example.  It can still be summed across threads to find
1651049Sbinkertn@umich.edu        #           the total distribution.
1661049Sbinkertn@umich.edu        #       VECTOR2D: Can be used when you have a stat that is not only
1671049Sbinkertn@umich.edu        #           per-thread, but it is per-something else.  Like
1681049Sbinkertn@umich.edu        #           per-message type.
1691049Sbinkertn@umich.edu        #       FORMULA: This statistic is a formula, and its data must be
1701049Sbinkertn@umich.edu        #	     looked up in the formula table, for indicating how to
1711049Sbinkertn@umich.edu        #           present its values.
1721049Sbinkertn@umich.edu        #   'subdata' is potentially used by any of the vector types to
1731049Sbinkertn@umich.edu        #       give a specific name to all of the data elements within a
1741049Sbinkertn@umich.edu        #       stat.
1751049Sbinkertn@umich.edu        #   'print' indicates whether this stat should be printed ever.
1761049Sbinkertn@umich.edu        #       (Unnamed stats don't usually get printed)
1771049Sbinkertn@umich.edu        #   'prereq' only print the stat if the prereq is not zero.
1781049Sbinkertn@umich.edu        #   'prec' number of decimal places to print
1791049Sbinkertn@umich.edu        #   'nozero' don't print zero values
1801049Sbinkertn@umich.edu        #   'nonan' don't print NaN values
1811049Sbinkertn@umich.edu        #   'total' for vector type stats, print the total.
1821049Sbinkertn@umich.edu        #   'pdf' for vector type stats, print the pdf.
1831049Sbinkertn@umich.edu        #   'cdf' for vector type stats, print the cdf.
1841049Sbinkertn@umich.edu        #
1851049Sbinkertn@umich.edu        #   The Following are for dist type stats:
1861049Sbinkertn@umich.edu        #   'min' is the minimum bucket value. Anything less is an underflow.
1871049Sbinkertn@umich.edu        #   'max' is the maximum bucket value. Anything more is an overflow.
1881049Sbinkertn@umich.edu        #   'bktsize' is the approximate number of entries in each bucket.
1891049Sbinkertn@umich.edu        #   'size' is the number of buckets. equal to (min/max)/bktsize.
1901049Sbinkertn@umich.edu        #
1911049Sbinkertn@umich.edu        # INDEXES:
1921049Sbinkertn@umich.edu        #   'stat' is indexed so that you can find out details about a stat
1931049Sbinkertn@umich.edu        #       if the stat id was retrieved from the data table.
1941049Sbinkertn@umich.edu        #   'name' is indexed so that you can simply look up data about a
1951049Sbinkertn@umich.edu        #       named stat.
1961049Sbinkertn@umich.edu        #
1971049Sbinkertn@umich.edu        self.query('''
1981049Sbinkertn@umich.edu        CREATE TABLE stats(
1991049Sbinkertn@umich.edu            st_id	SMALLINT UNSIGNED	NOT NULL AUTO_INCREMENT,
2001049Sbinkertn@umich.edu            st_name	VARCHAR(255)		NOT NULL,
2011049Sbinkertn@umich.edu            st_descr	TEXT			NOT NULL,
2021049Sbinkertn@umich.edu            st_type	ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
2031049Sbinkertn@umich.edu                "VECTOR2D", "FORMULA")	NOT NULL,
2041049Sbinkertn@umich.edu            st_print	BOOL			NOT NULL,
2051049Sbinkertn@umich.edu            st_prereq	SMALLINT UNSIGNED	NOT NULL,
2061049Sbinkertn@umich.edu            st_prec	TINYINT			NOT NULL,
2071049Sbinkertn@umich.edu            st_nozero	BOOL			NOT NULL,
2081049Sbinkertn@umich.edu            st_nonan	BOOL			NOT NULL,
2091049Sbinkertn@umich.edu            st_total	BOOL			NOT NULL,
2101049Sbinkertn@umich.edu            st_pdf	BOOL			NOT NULL,
2111049Sbinkertn@umich.edu            st_cdf	BOOL			NOT NULL,
2121049Sbinkertn@umich.edu            st_min	DOUBLE			NOT NULL,
2131049Sbinkertn@umich.edu            st_max	DOUBLE			NOT NULL,
2141049Sbinkertn@umich.edu            st_bktsize	DOUBLE			NOT NULL,
2151049Sbinkertn@umich.edu            st_size	SMALLINT UNSIGNED	NOT NULL,
2161049Sbinkertn@umich.edu            PRIMARY KEY (st_id),
2171049Sbinkertn@umich.edu            UNIQUE (st_name)
2181049Sbinkertn@umich.edu        ) TYPE=InnoDB''')
2191049Sbinkertn@umich.edu
2201049Sbinkertn@umich.edu        #
2211049Sbinkertn@umich.edu        # This is the main table of data for stats.
2221049Sbinkertn@umich.edu        #
2231049Sbinkertn@umich.edu        # COLUMNS:
2241049Sbinkertn@umich.edu        #   'stat' refers to the stat field given in the stat table.
2251049Sbinkertn@umich.edu        #
2261049Sbinkertn@umich.edu        #   'x' referrs to the first dimension of a multi-dimensional stat. For
2271049Sbinkertn@umich.edu        #       a vector, x will start at 0 and increase for each vector
2281049Sbinkertn@umich.edu        #       element.
2291049Sbinkertn@umich.edu        #       For a distribution:
2301049Sbinkertn@umich.edu        #       -1: sum (for calculating standard deviation)
2311049Sbinkertn@umich.edu        #       -2: sum of squares (for calculating standard deviation)
2321049Sbinkertn@umich.edu        #       -3: total number of samples taken (for calculating
2331049Sbinkertn@umich.edu        #           standard deviation)
2341049Sbinkertn@umich.edu        #       -4: minimum value
2351049Sbinkertn@umich.edu        #       -5: maximum value
2361049Sbinkertn@umich.edu        #       -6: underflow
2371049Sbinkertn@umich.edu        #       -7: overflow
2381049Sbinkertn@umich.edu        #   'y' is used by a VECTORDIST and the VECTOR2D to describe the second
2391049Sbinkertn@umich.edu        #       dimension.
2401049Sbinkertn@umich.edu        #   'run' is the run that the data was generated from.  Details up in
2411049Sbinkertn@umich.edu        #       the run table
2421049Sbinkertn@umich.edu        #   'tick' is a timestamp generated by the simulator.
2431049Sbinkertn@umich.edu        #   'bin' is the name of the bin that the data was generated in, if
2441049Sbinkertn@umich.edu        #       any.
2451049Sbinkertn@umich.edu        #   'data' is the actual stat value.
2461049Sbinkertn@umich.edu        #
2471049Sbinkertn@umich.edu        # INDEXES:
2481049Sbinkertn@umich.edu        #   'stat' is indexed so that a user can find all of the data for a
2491049Sbinkertn@umich.edu        #       particular stat. It is not unique, because that specific stat
2501049Sbinkertn@umich.edu        #       can be found in many runs, bins, and samples, in addition to
2511049Sbinkertn@umich.edu        #       having entries for the mulidimensional cases.
2521049Sbinkertn@umich.edu        #   'run' is indexed to allow a user to remove all of the data for a
2531049Sbinkertn@umich.edu        #       particular execution run.  It can also be used to allow the
2541049Sbinkertn@umich.edu        #       user to print out all of the data for a given run.
2551049Sbinkertn@umich.edu        #
2561049Sbinkertn@umich.edu        self.query('''
2571049Sbinkertn@umich.edu        CREATE TABLE data(
2581049Sbinkertn@umich.edu            dt_stat	SMALLINT UNSIGNED	NOT NULL,
2591049Sbinkertn@umich.edu            dt_x	SMALLINT		NOT NULL,
2601049Sbinkertn@umich.edu            dt_y	SMALLINT		NOT NULL,
2611049Sbinkertn@umich.edu            dt_run	SMALLINT UNSIGNED	NOT NULL,
2621049Sbinkertn@umich.edu            dt_tick	BIGINT UNSIGNED		NOT NULL,
2631049Sbinkertn@umich.edu            dt_bin	SMALLINT UNSIGNED	NOT NULL,
2641049Sbinkertn@umich.edu            dt_data	DOUBLE			NOT NULL,
2651049Sbinkertn@umich.edu            INDEX (dt_stat),
2661049Sbinkertn@umich.edu            INDEX (dt_run),
2671049Sbinkertn@umich.edu            UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin)
2681049Sbinkertn@umich.edu        ) TYPE=InnoDB;''')
2691049Sbinkertn@umich.edu
2701049Sbinkertn@umich.edu        #
2711049Sbinkertn@umich.edu        # Names and descriptions for multi-dimensional stats (vectors, etc.)
2721049Sbinkertn@umich.edu        # are stored here instead of having their own entry in the statistics
2731049Sbinkertn@umich.edu        # table. This allows all parts of a single stat to easily share a
2741049Sbinkertn@umich.edu        # single id.
2751049Sbinkertn@umich.edu        #
2761049Sbinkertn@umich.edu        # COLUMNS:
2771049Sbinkertn@umich.edu        #   'stat' is the unique stat identifier from the stat table.
2781049Sbinkertn@umich.edu        #   'x' is the first dimension for multi-dimensional stats
2791049Sbinkertn@umich.edu        #       corresponding to the data table above.
2801049Sbinkertn@umich.edu        #   'y' is the second dimension for multi-dimensional stats
2811049Sbinkertn@umich.edu        #       corresponding to the data table above.
2821049Sbinkertn@umich.edu        #   'name' is the specific subname for the unique stat,x,y combination.
2831049Sbinkertn@umich.edu        #   'descr' is the specific description for the uniqe stat,x,y
2841049Sbinkertn@umich.edu        #        combination.
2851049Sbinkertn@umich.edu        #
2861049Sbinkertn@umich.edu        # INDEXES:
2871049Sbinkertn@umich.edu        #   'stat' is indexed so you can get the subdata for a specific stat.
2881049Sbinkertn@umich.edu        #
2891049Sbinkertn@umich.edu        self.query('''
2901049Sbinkertn@umich.edu        CREATE TABLE subdata(
2911049Sbinkertn@umich.edu            sd_stat	SMALLINT UNSIGNED	NOT NULL,
2921049Sbinkertn@umich.edu            sd_x	SMALLINT		NOT NULL,
2931049Sbinkertn@umich.edu            sd_y	SMALLINT		NOT NULL,
2941049Sbinkertn@umich.edu            sd_name	VARCHAR(255)		NOT NULL,
2951049Sbinkertn@umich.edu            sd_descr	TEXT,
2961049Sbinkertn@umich.edu            UNIQUE (sd_stat,sd_x,sd_y)
2971049Sbinkertn@umich.edu        ) TYPE=InnoDB''')
2981049Sbinkertn@umich.edu
2991049Sbinkertn@umich.edu
3001049Sbinkertn@umich.edu        #
3011049Sbinkertn@umich.edu        # The formula table is maintained separately from the data table
3021049Sbinkertn@umich.edu        # because formula data, unlike other stat data cannot be represented
3031049Sbinkertn@umich.edu        # there.
3041049Sbinkertn@umich.edu        #
3051049Sbinkertn@umich.edu        # COLUMNS:
3061049Sbinkertn@umich.edu        #   'stat' refers to the stat field generated in the stat table.
3071049Sbinkertn@umich.edu        #   'formula' is the actual string representation of the formula
3081049Sbinkertn@umich.edu        #       itself.
3091049Sbinkertn@umich.edu        #
3101049Sbinkertn@umich.edu        # INDEXES:
3111049Sbinkertn@umich.edu        #   'stat' is indexed so that you can just look up a formula.
3121049Sbinkertn@umich.edu        #
3131049Sbinkertn@umich.edu        self.query('''
3141049Sbinkertn@umich.edu        CREATE TABLE formulas(
3151049Sbinkertn@umich.edu            fm_stat	SMALLINT UNSIGNED	NOT NULL,
3161049Sbinkertn@umich.edu            fm_formula	BLOB			NOT NULL,
3171049Sbinkertn@umich.edu            PRIMARY KEY(fm_stat)
3181049Sbinkertn@umich.edu        ) TYPE=InnoDB''')
3191049Sbinkertn@umich.edu
3201049Sbinkertn@umich.edu        #
3211049Sbinkertn@umich.edu        # Each stat used in each formula is kept in this table.  This way, if
3221049Sbinkertn@umich.edu        # you want to print out a particular formula, you can simply find out
3231049Sbinkertn@umich.edu        # which stats you need by looking in this table.  Additionally, when
3241049Sbinkertn@umich.edu        # you remove a stat from the stats table and data table, you remove
3251049Sbinkertn@umich.edu        # any references to the formula in this table.  When a formula is no
3261049Sbinkertn@umich.edu        # longer referred to, you remove its entry.
3271049Sbinkertn@umich.edu        #
3281049Sbinkertn@umich.edu        # COLUMNS:
3291049Sbinkertn@umich.edu        #   'stat' is the stat id from the stat table above.
3301049Sbinkertn@umich.edu        #   'child' is the stat id of a stat that is used for this formula.
3311049Sbinkertn@umich.edu        #       There may be many children for any given 'stat' (formula)
3321049Sbinkertn@umich.edu        #
3331049Sbinkertn@umich.edu        # INDEXES:
3341049Sbinkertn@umich.edu        #   'stat' is indexed so you can look up all of the children for a
3351049Sbinkertn@umich.edu        #       particular stat.
3361049Sbinkertn@umich.edu        #   'child' is indexed so that you can remove an entry when a stat is
3371049Sbinkertn@umich.edu        #       removed.
3381049Sbinkertn@umich.edu        #
3391049Sbinkertn@umich.edu        self.query('''
3401049Sbinkertn@umich.edu        CREATE TABLE formula_ref(
3411049Sbinkertn@umich.edu            fr_stat	SMALLINT UNSIGNED	NOT NULL,
3421049Sbinkertn@umich.edu            fr_run	SMALLINT UNSIGNED	NOT NULL,
3431049Sbinkertn@umich.edu            UNIQUE (fr_stat,fr_run),
3441049Sbinkertn@umich.edu            INDEX (fr_stat),
3451049Sbinkertn@umich.edu            INDEX (fr_run)
3461049Sbinkertn@umich.edu        ) TYPE=InnoDB''')
3471049Sbinkertn@umich.edu
3481049Sbinkertn@umich.edu        # COLUMNS:
3491049Sbinkertn@umich.edu        #   'event' is the unique event id from the event_desc table
3501049Sbinkertn@umich.edu        #   'run' is simulation run id that this event took place in
3511049Sbinkertn@umich.edu        #   'tick' is the tick when the event happened
3521049Sbinkertn@umich.edu        #
3531049Sbinkertn@umich.edu        # INDEXES:
3541049Sbinkertn@umich.edu        #   'event' is indexed so you can look up all occurences of a
3551049Sbinkertn@umich.edu        #       specific event
3561049Sbinkertn@umich.edu        #   'run' is indexed so you can find all events in a run
3571049Sbinkertn@umich.edu        #   'tick' is indexed because we want the unique thing anyway
3581049Sbinkertn@umich.edu        #   'event,run,tick' is unique combination
3591049Sbinkertn@umich.edu        self.query('''
3601049Sbinkertn@umich.edu        CREATE TABLE events(
3611049Sbinkertn@umich.edu            ev_event	SMALLINT UNSIGNED	NOT NULL,
3621049Sbinkertn@umich.edu            ev_run	SMALLINT UNSIGNED	NOT NULL,
3631049Sbinkertn@umich.edu            ev_tick	BIGINT   UNSIGNED       NOT NULL,
3641049Sbinkertn@umich.edu            INDEX(ev_event),
3651049Sbinkertn@umich.edu            INDEX(ev_run),
3661049Sbinkertn@umich.edu            INDEX(ev_tick),
3671049Sbinkertn@umich.edu            UNIQUE(ev_event,ev_run,ev_tick)
3681049Sbinkertn@umich.edu        ) TYPE=InnoDB''')
3691049Sbinkertn@umich.edu
3701049Sbinkertn@umich.edu        # COLUMNS:
3711049Sbinkertn@umich.edu        #   'id' is the unique description id
3721049Sbinkertn@umich.edu        #   'name' is the name of the event that occurred
3731049Sbinkertn@umich.edu        #
3741049Sbinkertn@umich.edu        # INDEXES:
3751049Sbinkertn@umich.edu        #   'id' is indexed because it is the primary key and is what you use
3761049Sbinkertn@umich.edu        #       to look up the descriptions
3771049Sbinkertn@umich.edu        #   'name' is indexed so one can find the event based on name
3781049Sbinkertn@umich.edu        #
3791049Sbinkertn@umich.edu        self.query('''
3801049Sbinkertn@umich.edu        CREATE TABLE event_names(
3811049Sbinkertn@umich.edu            en_id	SMALLINT UNSIGNED	NOT NULL AUTO_INCREMENT,
3821049Sbinkertn@umich.edu            en_name	VARCHAR(255)		NOT NULL,
3831049Sbinkertn@umich.edu            PRIMARY KEY (en_id),
3841049Sbinkertn@umich.edu            UNIQUE (en_name)
3851049Sbinkertn@umich.edu        ) TYPE=InnoDB''')
3861049Sbinkertn@umich.edu
3871049Sbinkertn@umich.edu    def clean(self):
3881049Sbinkertn@umich.edu        self.query('''
3891049Sbinkertn@umich.edu        DELETE data
3901049Sbinkertn@umich.edu        FROM data
3911049Sbinkertn@umich.edu        LEFT JOIN runs ON dt_run=rn_id
3921049Sbinkertn@umich.edu        WHERE rn_id IS NULL''')
3931049Sbinkertn@umich.edu
3941049Sbinkertn@umich.edu        self.query('''
3951049Sbinkertn@umich.edu        DELETE formula_ref
3961049Sbinkertn@umich.edu        FROM formula_ref
3971049Sbinkertn@umich.edu        LEFT JOIN runs ON fr_run=rn_id
3981049Sbinkertn@umich.edu        WHERE rn_id IS NULL''')
3991049Sbinkertn@umich.edu
4001049Sbinkertn@umich.edu        self.query('''
4011049Sbinkertn@umich.edu        DELETE formulas
4021049Sbinkertn@umich.edu        FROM formulas
4031049Sbinkertn@umich.edu        LEFT JOIN formula_ref ON fm_stat=fr_stat
4041049Sbinkertn@umich.edu        WHERE fr_stat IS NULL''')
4051049Sbinkertn@umich.edu
4061049Sbinkertn@umich.edu        self.query('''
4071049Sbinkertn@umich.edu        DELETE stats
4081049Sbinkertn@umich.edu        FROM stats
4091049Sbinkertn@umich.edu        LEFT JOIN data ON st_id=dt_stat
4101049Sbinkertn@umich.edu        WHERE dt_stat IS NULL''')
4111049Sbinkertn@umich.edu
4121049Sbinkertn@umich.edu        self.query('''
4131049Sbinkertn@umich.edu        DELETE subdata
4141049Sbinkertn@umich.edu        FROM subdata
4151049Sbinkertn@umich.edu        LEFT JOIN data ON sd_stat=dt_stat
4161049Sbinkertn@umich.edu        WHERE dt_stat IS NULL''')
4171049Sbinkertn@umich.edu
4181049Sbinkertn@umich.edu        self.query('''
4191049Sbinkertn@umich.edu        DELETE bins
4201049Sbinkertn@umich.edu        FROM bins
4211049Sbinkertn@umich.edu        LEFT JOIN data ON bn_id=dt_bin
4221049Sbinkertn@umich.edu        WHERE dt_bin IS NULL''')
4231049Sbinkertn@umich.edu
4241049Sbinkertn@umich.edu        self.query('''
4251049Sbinkertn@umich.edu        DELETE events
4261049Sbinkertn@umich.edu        FROM events
4271049Sbinkertn@umich.edu        LEFT JOIN runs ON ev_run=rn_id
4281049Sbinkertn@umich.edu        WHERE rn_id IS NULL''')
4291049Sbinkertn@umich.edu
4301049Sbinkertn@umich.edu        self.query('''
4311049Sbinkertn@umich.edu        DELETE event_names
4321049Sbinkertn@umich.edu        FROM event_names
4331049Sbinkertn@umich.edu        LEFT JOIN events ON en_id=ev_event
4341049Sbinkertn@umich.edu        WHERE ev_event IS NULL''')
435