dbinit.py revision 2343
12SN/A# Copyright (c) 2003-2004 The Regents of The University of Michigan
21762SN/A# All rights reserved.
32SN/A#
42SN/A# Redistribution and use in source and binary forms, with or without
52SN/A# modification, are permitted provided that the following conditions are
62SN/A# met: redistributions of source code must retain the above copyright
72SN/A# notice, this list of conditions and the following disclaimer;
82SN/A# redistributions in binary form must reproduce the above copyright
92SN/A# notice, this list of conditions and the following disclaimer in the
102SN/A# documentation and/or other materials provided with the distribution;
112SN/A# neither the name of the copyright holders nor the names of its
122SN/A# contributors may be used to endorse or promote products derived from
132SN/A# this software without specific prior written permission.
142SN/A#
152SN/A# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
162SN/A# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
172SN/A# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
182SN/A# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
192SN/A# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
202SN/A# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
212SN/A# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
222SN/A# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
232SN/A# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
242SN/A# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
252SN/A# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
262SN/A
272665Ssaidi@eecs.umich.eduimport MySQLdb
282665Ssaidi@eecs.umich.edu
292665Ssaidi@eecs.umich.educlass MyDB(object):
302SN/A    def __init__(self, options):
312SN/A        self.name = options.db
322SN/A        self.host = options.host
332SN/A        self.user = options.user
342SN/A        self.passwd = options.passwd
352SN/A        self.mydb = None
361354SN/A        self.cursor = None
371354SN/A
382SN/A    def admin(self):
392SN/A        self.close()
405501Snate@binkert.org        self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,
415546Snate@binkert.org                                    passwd=self.passwd)
427004Snate@binkert.org        self.cursor = self.mydb.cursor()
432SN/A
442SN/A    def connect(self):
455769Snate@binkert.org        self.close()
462361SN/A        self.mydb = MySQLdb.connect(db=self.name, host=self.host,
476216Snate@binkert.org                                    user=self.user, passwd=self.passwd)
488232Snate@binkert.org        self.cursor = self.mydb.cursor()
4956SN/A
502SN/A    def close(self):
515543Ssaidi@eecs.umich.edu        if self.mydb is not None:
522SN/A            self.mydb.close()
531354SN/A        self.cursor = None
541354SN/A
552SN/A    def query(self, sql):
562SN/A        self.cursor.execute(sql)
572SN/A
582SN/A    def drop(self):
595501Snate@binkert.org        self.query('DROP DATABASE IF EXISTS %s' % self.name)
605501Snate@binkert.org
612SN/A    def create(self):
629044SAli.Saidi@ARM.com        self.query('CREATE DATABASE %s' % self.name)
632SN/A
642SN/A    def populate(self):
652SN/A        #
665769Snate@binkert.org        # Each run (or simulation) gets its own entry in the runs table to
678902Sandreas.hansson@arm.com        # group stats by where they were generated
685769Snate@binkert.org        #
695769Snate@binkert.org        # COLUMNS:
707059Snate@binkert.org        #   'id' is a unique identifier for each run to be used in other
717059Snate@binkert.org        #       tables.
727059Snate@binkert.org        #   'name' is the user designated name for the data generated.  It is
737059Snate@binkert.org        #       configured in the simulator.
747059Snate@binkert.org        #   'user' identifies the user that generated the data for the given
757059Snate@binkert.org        #       run.
767059Snate@binkert.org        #   'project' another name to identify runs for a specific goal
777059Snate@binkert.org        #   'date' is a timestamp for when the data was generated.  It can be
787059Snate@binkert.org        #       used to easily expire data that was generated in the past.
797059Snate@binkert.org        #   'expire' is a timestamp for when the data should be removed from
807059Snate@binkert.org        #       the database so we don't have years worth of junk.
817059Snate@binkert.org        #
827059Snate@binkert.org        # INDEXES:
837059Snate@binkert.org        #   'run' is indexed so you can find out details of a run if the run
847059Snate@binkert.org        #       was retreived from the data table.
857059Snate@binkert.org        #   'name' is indexed so that two all run names are forced to be unique
865769Snate@binkert.org        #
877058Snate@binkert.org        self.query('''
887058Snate@binkert.org        CREATE TABLE runs(
897058Snate@binkert.org            rn_id	SMALLINT UNSIGNED	NOT NULL AUTO_INCREMENT,
902SN/A            rn_name	VARCHAR(200)		NOT NULL,
915502Snate@binkert.org            rn_sample	VARCHAR(32)		NOT NULL,
925502Snate@binkert.org            rn_user	VARCHAR(32)		NOT NULL,
935502Snate@binkert.org            rn_project	VARCHAR(100)            NOT NULL,
945503Snate@binkert.org            rn_date	TIMESTAMP		NOT NULL,
955503Snate@binkert.org            rn_expire	TIMESTAMP               NOT NULL,
965502Snate@binkert.org            PRIMARY KEY (rn_id),
975502Snate@binkert.org            UNIQUE (rn_name,rn_sample)
985502Snate@binkert.org        ) TYPE=InnoDB''')
995502Snate@binkert.org
1005502Snate@binkert.org        #
1015502Snate@binkert.org        # The stat table gives us all of the data for a particular stat.
1025502Snate@binkert.org        #
1035602Snate@binkert.org        # COLUMNS:
1045602Snate@binkert.org        #   'stat' is a unique identifier for each stat to be used in other
1055501Snate@binkert.org        #       tables for references.
1065543Ssaidi@eecs.umich.edu        #   'name' is simply the simulator derived name for a given
1077058Snate@binkert.org        #       statistic.
1085769Snate@binkert.org        #   'descr' is the description of the statistic and what it tells
1094016Sstever@eecs.umich.edu        #       you.
1104016Sstever@eecs.umich.edu        #   'type' defines what the stat tells you.  Types are:
1114016Sstever@eecs.umich.edu        #       SCALAR: A simple scalar statistic that holds one value
1124016Sstever@eecs.umich.edu        #       VECTOR: An array of statistic values.  Such a something that
1134016Sstever@eecs.umich.edu        #           is generated per-thread.  Vectors exist to give averages,
1144016Sstever@eecs.umich.edu        #	     pdfs, cdfs, means, standard deviations, etc across the
1154016Sstever@eecs.umich.edu        #           stat values.
1164016Sstever@eecs.umich.edu        #       DIST: Is a distribution of data.  When the statistic value is
1174016Sstever@eecs.umich.edu        #	     sampled, its value is counted in a particular bucket.
1185501Snate@binkert.org        #           Useful for keeping track of utilization of a resource.
1195605Snate@binkert.org        #           (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
1205605Snate@binkert.org        #       VECTORDIST: Can be used when the distribution needs to be
1215605Snate@binkert.org        #	     factored out into a per-thread distribution of data for
1225605Snate@binkert.org        #	     example.  It can still be summed across threads to find
1235501Snate@binkert.org        #           the total distribution.
1244016Sstever@eecs.umich.edu        #       VECTOR2D: Can be used when you have a stat that is not only
1255577SSteve.Reinhardt@amd.com        #           per-thread, but it is per-something else.  Like
1265501Snate@binkert.org        #           per-message type.
1275501Snate@binkert.org        #       FORMULA: This statistic is a formula, and its data must be
1285501Snate@binkert.org        #	     looked up in the formula table, for indicating how to
1295502Snate@binkert.org        #           present its values.
1305502Snate@binkert.org        #   'subdata' is potentially used by any of the vector types to
1315605Snate@binkert.org        #       give a specific name to all of the data elements within a
1325502Snate@binkert.org        #       stat.
1335502Snate@binkert.org        #   'print' indicates whether this stat should be printed ever.
1345605Snate@binkert.org        #       (Unnamed stats don't usually get printed)
1355605Snate@binkert.org        #   'prereq' only print the stat if the prereq is not zero.
1365605Snate@binkert.org        #   'prec' number of decimal places to print
1375577SSteve.Reinhardt@amd.com        #   'nozero' don't print zero values
1387823Ssteve.reinhardt@amd.com        #   'nonan' don't print NaN values
1395502Snate@binkert.org        #   'total' for vector type stats, print the total.
1405502Snate@binkert.org        #   'pdf' for vector type stats, print the pdf.
1415502Snate@binkert.org        #   'cdf' for vector type stats, print the cdf.
1422SN/A        #
1435769Snate@binkert.org        #   The Following are for dist type stats:
1445769Snate@binkert.org        #   'min' is the minimum bucket value. Anything less is an underflow.
1455769Snate@binkert.org        #   'max' is the maximum bucket value. Anything more is an overflow.
1465769Snate@binkert.org        #   'bktsize' is the approximate number of entries in each bucket.
1475769Snate@binkert.org        #   'size' is the number of buckets. equal to (min/max)/bktsize.
1485769Snate@binkert.org        #
1492SN/A        # INDEXES:
1508581Ssteve.reinhardt@amd.com        #   'stat' is indexed so that you can find out details about a stat
1518581Ssteve.reinhardt@amd.com        #       if the stat id was retrieved from the data table.
1525769Snate@binkert.org        #   'name' is indexed so that you can simply look up data about a
1537059Snate@binkert.org        #       named stat.
1545769Snate@binkert.org        #
1555769Snate@binkert.org        self.query('''
1562SN/A        CREATE TABLE stats(
1575769Snate@binkert.org            st_id	SMALLINT UNSIGNED	NOT NULL AUTO_INCREMENT,
1585769Snate@binkert.org            st_name	VARCHAR(255)		NOT NULL,
1595769Snate@binkert.org            st_descr	TEXT			NOT NULL,
1605769Snate@binkert.org            st_type	ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
1615769Snate@binkert.org                "VECTOR2D", "FORMULA")	NOT NULL,
1625769Snate@binkert.org            st_print	BOOL			NOT NULL,
1635769Snate@binkert.org            st_prereq	SMALLINT UNSIGNED	NOT NULL,
1645769Snate@binkert.org            st_prec	TINYINT			NOT NULL,
1655769Snate@binkert.org            st_nozero	BOOL			NOT NULL,
1665769Snate@binkert.org            st_nonan	BOOL			NOT NULL,
1675769Snate@binkert.org            st_total	BOOL			NOT NULL,
1685769Snate@binkert.org            st_pdf	BOOL			NOT NULL,
1695769Snate@binkert.org            st_cdf	BOOL			NOT NULL,
1705769Snate@binkert.org            st_min	DOUBLE			NOT NULL,
1715769Snate@binkert.org            st_max	DOUBLE			NOT NULL,
1725769Snate@binkert.org            st_bktsize	DOUBLE			NOT NULL,
1735769Snate@binkert.org            st_size	SMALLINT UNSIGNED	NOT NULL,
1745769Snate@binkert.org            PRIMARY KEY (st_id),
1755769Snate@binkert.org            UNIQUE (st_name)
1765769Snate@binkert.org        ) TYPE=InnoDB''')
1775769Snate@binkert.org
1785501Snate@binkert.org        #
1795543Ssaidi@eecs.umich.edu        # This is the main table of data for stats.
1802SN/A        #
1812SN/A        # COLUMNS:
182396SN/A        #   'stat' refers to the stat field given in the stat table.
183396SN/A        #
184396SN/A        #   'x' referrs to the first dimension of a multi-dimensional stat. For
185396SN/A        #       a vector, x will start at 0 and increase for each vector
1865501Snate@binkert.org        #       element.
1877058Snate@binkert.org        #       For a distribution:
1887058Snate@binkert.org        #       -1: sum (for calculating standard deviation)
1893329Sstever@eecs.umich.edu        #       -2: sum of squares (for calculating standard deviation)
1907058Snate@binkert.org        #       -3: total number of samples taken (for calculating
1917058Snate@binkert.org        #           standard deviation)
1927058Snate@binkert.org        #       -4: minimum value
1937058Snate@binkert.org        #       -5: maximum value
194396SN/A        #       -6: underflow
1957058Snate@binkert.org        #       -7: overflow
1967058Snate@binkert.org        #   'y' is used by a VECTORDIST and the VECTOR2D to describe the second
1977058Snate@binkert.org        #       dimension.
1987058Snate@binkert.org        #   'run' is the run that the data was generated from.  Details up in
1993329Sstever@eecs.umich.edu        #       the run table
2007058Snate@binkert.org        #   'tick' is a timestamp generated by the simulator.
2017058Snate@binkert.org        #   'data' is the actual stat value.
2027058Snate@binkert.org        #
2037058Snate@binkert.org        # INDEXES:
2047058Snate@binkert.org        #   'stat' is indexed so that a user can find all of the data for a
205396SN/A        #       particular stat. It is not unique, because that specific stat
2067058Snate@binkert.org        #       can be found in many runs and samples, in addition to
2077058Snate@binkert.org        #       having entries for the mulidimensional cases.
2087058Snate@binkert.org        #   'run' is indexed to allow a user to remove all of the data for a
2097058Snate@binkert.org        #       particular execution run.  It can also be used to allow the
210396SN/A        #       user to print out all of the data for a given run.
2117058Snate@binkert.org        #
2127058Snate@binkert.org        self.query('''
213396SN/A        CREATE TABLE data(
2147058Snate@binkert.org            dt_stat	SMALLINT UNSIGNED	NOT NULL,
2157058Snate@binkert.org            dt_x	SMALLINT		NOT NULL,
2167058Snate@binkert.org            dt_y	SMALLINT		NOT NULL,
2177058Snate@binkert.org            dt_run	SMALLINT UNSIGNED	NOT NULL,
218396SN/A            dt_tick	BIGINT UNSIGNED		NOT NULL,
2197058Snate@binkert.org            dt_data	DOUBLE			NOT NULL,
2207058Snate@binkert.org            INDEX (dt_stat),
2217058Snate@binkert.org            INDEX (dt_run),
222396SN/A            UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick)
2237058Snate@binkert.org        ) TYPE=InnoDB;''')
2247058Snate@binkert.org
2257058Snate@binkert.org        #
2264075Sbinkertn@umich.edu        # Names and descriptions for multi-dimensional stats (vectors, etc.)
2277058Snate@binkert.org        # are stored here instead of having their own entry in the statistics
2287058Snate@binkert.org        # table. This allows all parts of a single stat to easily share a
2295501Snate@binkert.org        # single id.
2307058Snate@binkert.org        #
2317058Snate@binkert.org        # COLUMNS:
2327058Snate@binkert.org        #   'stat' is the unique stat identifier from the stat table.
2337058Snate@binkert.org        #   'x' is the first dimension for multi-dimensional stats
2347058Snate@binkert.org        #       corresponding to the data table above.
2357058Snate@binkert.org        #   'y' is the second dimension for multi-dimensional stats
236396SN/A        #       corresponding to the data table above.
2372SN/A        #   'name' is the specific subname for the unique stat,x,y combination.
2382SN/A        #   'descr' is the specific description for the uniqe stat,x,y
2392SN/A        #        combination.
2402SN/A        #
2418581Ssteve.reinhardt@amd.com        # INDEXES:
2428581Ssteve.reinhardt@amd.com        #   'stat' is indexed so you can get the subdata for a specific stat.
2438581Ssteve.reinhardt@amd.com        #
244224SN/A        self.query('''
2458581Ssteve.reinhardt@amd.com        CREATE TABLE subdata(
2464016Sstever@eecs.umich.edu            sd_stat	SMALLINT UNSIGNED	NOT NULL,
2475501Snate@binkert.org            sd_x	SMALLINT		NOT NULL,
2485605Snate@binkert.org            sd_y	SMALLINT		NOT NULL,
2495501Snate@binkert.org            sd_name	VARCHAR(255)		NOT NULL,
2505501Snate@binkert.org            sd_descr	TEXT,
2517823Ssteve.reinhardt@amd.com            UNIQUE (sd_stat,sd_x,sd_y)
2525501Snate@binkert.org        ) TYPE=InnoDB''')
2534016Sstever@eecs.umich.edu
254224SN/A
255224SN/A        #
2565768Snate@binkert.org        # The formula table is maintained separately from the data table
2575768Snate@binkert.org        # because formula data, unlike other stat data cannot be represented
258265SN/A        # there.
2595501Snate@binkert.org        #
2605501Snate@binkert.org        # COLUMNS:
2615501Snate@binkert.org        #   'stat' refers to the stat field generated in the stat table.
2625501Snate@binkert.org        #   'formula' is the actual string representation of the formula
2635501Snate@binkert.org        #       itself.
2645501Snate@binkert.org        #
2655501Snate@binkert.org        # INDEXES:
2665501Snate@binkert.org        #   'stat' is indexed so that you can just look up a formula.
2675501Snate@binkert.org        #
2685501Snate@binkert.org        self.query('''
2695501Snate@binkert.org        CREATE TABLE formulas(
2705501Snate@binkert.org            fm_stat	SMALLINT UNSIGNED	NOT NULL,
2715501Snate@binkert.org            fm_formula	BLOB			NOT NULL,
2725501Snate@binkert.org            PRIMARY KEY(fm_stat)
2735501Snate@binkert.org        ) TYPE=InnoDB''')
2745501Snate@binkert.org
2755501Snate@binkert.org        #
2765501Snate@binkert.org        # Each stat used in each formula is kept in this table.  This way, if
2775501Snate@binkert.org        # you want to print out a particular formula, you can simply find out
2785501Snate@binkert.org        # which stats you need by looking in this table.  Additionally, when
2795501Snate@binkert.org        # you remove a stat from the stats table and data table, you remove
2802SN/A        # any references to the formula in this table.  When a formula is no
2815769Snate@binkert.org        # longer referred to, you remove its entry.
2822SN/A        #
2832SN/A        # COLUMNS:
2845769Snate@binkert.org        #   'stat' is the stat id from the stat table above.
2852SN/A        #   'child' is the stat id of a stat that is used for this formula.
2862SN/A        #       There may be many children for any given 'stat' (formula)
2875769Snate@binkert.org        #
2882SN/A        # INDEXES:
2892667Sstever@eecs.umich.edu        #   'stat' is indexed so you can look up all of the children for a
2905769Snate@binkert.org        #       particular stat.
2912667Sstever@eecs.umich.edu        #   'child' is indexed so that you can remove an entry when a stat is
2922SN/A        #       removed.
2932SN/A        #
2942SN/A        self.query('''
2952SN/A        CREATE TABLE formula_ref(
2967058Snate@binkert.org            fr_stat	SMALLINT UNSIGNED	NOT NULL,
2972SN/A            fr_run	SMALLINT UNSIGNED	NOT NULL,
2985605Snate@binkert.org            UNIQUE (fr_stat,fr_run),
2995501Snate@binkert.org            INDEX (fr_stat),
3005501Snate@binkert.org            INDEX (fr_run)
3012SN/A        ) TYPE=InnoDB''')
3025501Snate@binkert.org
3035501Snate@binkert.org        # COLUMNS:
3045501Snate@binkert.org        #   'event' is the unique event id from the event_desc table
3052SN/A        #   'run' is simulation run id that this event took place in
3062SN/A        #   'tick' is the tick when the event happened
3072SN/A        #
308224SN/A        # INDEXES:
309224SN/A        #   'event' is indexed so you can look up all occurences of a
310237SN/A        #       specific event
3115605Snate@binkert.org        #   'run' is indexed so you can find all events in a run
312571SN/A        #   'tick' is indexed because we want the unique thing anyway
313571SN/A        #   'event,run,tick' is unique combination
3147005Snate@binkert.org        self.query('''
3157005Snate@binkert.org        CREATE TABLE events(
3167005Snate@binkert.org            ev_event	SMALLINT UNSIGNED	NOT NULL,
3177005Snate@binkert.org            ev_run	SMALLINT UNSIGNED	NOT NULL,
3187005Snate@binkert.org            ev_tick	BIGINT   UNSIGNED       NOT NULL,
3197005Snate@binkert.org            INDEX(ev_event),
3207005Snate@binkert.org            INDEX(ev_run),
3217005Snate@binkert.org            INDEX(ev_tick),
3227005Snate@binkert.org            UNIQUE(ev_event,ev_run,ev_tick)
3237005Snate@binkert.org        ) TYPE=InnoDB''')
3247005Snate@binkert.org
3257005Snate@binkert.org        # COLUMNS:
3267005Snate@binkert.org        #   'id' is the unique description id
3277005Snate@binkert.org        #   'name' is the name of the event that occurred
3287005Snate@binkert.org        #
3297005Snate@binkert.org        # INDEXES:
3307005Snate@binkert.org        #   'id' is indexed because it is the primary key and is what you use
3317005Snate@binkert.org        #       to look up the descriptions
3327005Snate@binkert.org        #   'name' is indexed so one can find the event based on name
3337005Snate@binkert.org        #
3347005Snate@binkert.org        self.query('''
3357005Snate@binkert.org        CREATE TABLE event_names(
3367005Snate@binkert.org            en_id	SMALLINT UNSIGNED	NOT NULL AUTO_INCREMENT,
3377005Snate@binkert.org            en_name	VARCHAR(255)		NOT NULL,
3387005Snate@binkert.org            PRIMARY KEY (en_id),
3397005Snate@binkert.org            UNIQUE (en_name)
3407005Snate@binkert.org        ) TYPE=InnoDB''')
3417005Snate@binkert.org
3427005Snate@binkert.org    def clean(self):
3437005Snate@binkert.org        self.query('''
3447005Snate@binkert.org        DELETE data
3457005Snate@binkert.org        FROM data
3467005Snate@binkert.org        LEFT JOIN runs ON dt_run=rn_id
3477005Snate@binkert.org        WHERE rn_id IS NULL''')
3487005Snate@binkert.org
3497005Snate@binkert.org        self.query('''
3507005Snate@binkert.org        DELETE formula_ref
3517005Snate@binkert.org        FROM formula_ref
3527005Snate@binkert.org        LEFT JOIN runs ON fr_run=rn_id
3537005Snate@binkert.org        WHERE rn_id IS NULL''')
3547005Snate@binkert.org
3552SN/A        self.query('''
3562SN/A        DELETE formulas
3572SN/A        FROM formulas
358395SN/A        LEFT JOIN formula_ref ON fm_stat=fr_stat
3592SN/A        WHERE fr_stat IS NULL''')
3605605Snate@binkert.org
361265SN/A        self.query('''
3622SN/A        DELETE stats
3639356Snilay@cs.wisc.edu        FROM stats
3642SN/A        LEFT JOIN data ON st_id=dt_stat
3652SN/A        WHERE dt_stat IS NULL''')
3662SN/A
3672SN/A        self.query('''
3687063Snate@binkert.org        DELETE subdata
3697063Snate@binkert.org        FROM subdata
3707063Snate@binkert.org        LEFT JOIN data ON sd_stat=dt_stat
3712SN/A        WHERE dt_stat IS NULL''')
3727063Snate@binkert.org
3732SN/A        self.query('''
374512SN/A        DELETE events
375265SN/A        FROM events
3762SN/A        LEFT JOIN runs ON ev_run=rn_id
3775738Snate@binkert.org        WHERE rn_id IS NULL''')
3785738Snate@binkert.org
3795738Snate@binkert.org        self.query('''
3802SN/A        DELETE event_names
3815501Snate@binkert.org        FROM event_names
3829356Snilay@cs.wisc.edu        LEFT JOIN events ON en_id=ev_event
3839356Snilay@cs.wisc.edu        WHERE ev_event IS NULL''')
3849356Snilay@cs.wisc.edu