db.py revision 1929
18853Sandreas.hansson@arm.com# Copyright (c) 2003-2004 The Regents of The University of Michigan
212532Sandreas.sandberg@arm.com# All rights reserved.
38853Sandreas.hansson@arm.com#
48853Sandreas.hansson@arm.com# Redistribution and use in source and binary forms, with or without
58853Sandreas.hansson@arm.com# modification, are permitted provided that the following conditions are
68853Sandreas.hansson@arm.com# met: redistributions of source code must retain the above copyright
78853Sandreas.hansson@arm.com# notice, this list of conditions and the following disclaimer;
88853Sandreas.hansson@arm.com# redistributions in binary form must reproduce the above copyright
98853Sandreas.hansson@arm.com# notice, this list of conditions and the following disclaimer in the
108853Sandreas.hansson@arm.com# documentation and/or other materials provided with the distribution;
118853Sandreas.hansson@arm.com# neither the name of the copyright holders nor the names of its
128853Sandreas.hansson@arm.com# contributors may be used to endorse or promote products derived from
138853Sandreas.hansson@arm.com# this software without specific prior written permission.
148853Sandreas.hansson@arm.com#
158853Sandreas.hansson@arm.com# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
168853Sandreas.hansson@arm.com# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
178853Sandreas.hansson@arm.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
188853Sandreas.hansson@arm.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
198853Sandreas.hansson@arm.com# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
208853Sandreas.hansson@arm.com# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
218853Sandreas.hansson@arm.com# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
228853Sandreas.hansson@arm.com# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
238853Sandreas.hansson@arm.com# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
248853Sandreas.hansson@arm.com# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
258853Sandreas.hansson@arm.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
268853Sandreas.hansson@arm.com
278853Sandreas.hansson@arm.comimport MySQLdb, re, string
288853Sandreas.hansson@arm.com
298853Sandreas.hansson@arm.comdef statcmp(a, b):
308853Sandreas.hansson@arm.com    v1 = a.split('.')
318853Sandreas.hansson@arm.com    v2 = b.split('.')
328853Sandreas.hansson@arm.com
338853Sandreas.hansson@arm.com    last = min(len(v1), len(v2)) - 1
348853Sandreas.hansson@arm.com    for i,j in zip(v1[0:last], v2[0:last]):
358853Sandreas.hansson@arm.com        if i != j:
368853Sandreas.hansson@arm.com            return cmp(i, j)
378853Sandreas.hansson@arm.com
388853Sandreas.hansson@arm.com    # Special compare for last element.
398853Sandreas.hansson@arm.com    if len(v1) == len(v2):
4011793Sbrandon.potter@amd.com        return cmp(v1[last], v2[last])
4111793Sbrandon.potter@amd.com    else:
428853Sandreas.hansson@arm.com        return cmp(len(v1), len(v2))
438853Sandreas.hansson@arm.com
448853Sandreas.hansson@arm.comclass RunData:
4512532Sandreas.sandberg@arm.com    def __init__(self, row):
4614009Sgabeblack@google.com        self.run = int(row[0])
478853Sandreas.hansson@arm.com        self.name = row[1]
4810564Sandreas.hansson@arm.com        self.user = row[2]
4910564Sandreas.hansson@arm.com        self.project = row[3]
5012749Sgiacomo.travaglini@arm.com
5112749Sgiacomo.travaglini@arm.comclass SubData:
5212749Sgiacomo.travaglini@arm.com    def __init__(self, row):
5312749Sgiacomo.travaglini@arm.com        self.stat = int(row[0])
5412749Sgiacomo.travaglini@arm.com        self.x = int(row[1])
5514009Sgabeblack@google.com        self.y = int(row[2])
5614196Sgabeblack@google.com        self.name = row[3]
5714009Sgabeblack@google.com        self.descr = row[4]
588853Sandreas.hansson@arm.com
598853Sandreas.hansson@arm.comclass Data:
608853Sandreas.hansson@arm.com    def __init__(self, row):
618853Sandreas.hansson@arm.com        if len(row) != 5:
6212532Sandreas.sandberg@arm.com            raise 'stat db error'
6314009Sgabeblack@google.com        self.stat = int(row[0])
6410564Sandreas.hansson@arm.com        self.run = int(row[1])
6510564Sandreas.hansson@arm.com        self.x = int(row[2])
6610564Sandreas.hansson@arm.com        self.y = int(row[3])
6712749Sgiacomo.travaglini@arm.com        self.data = float(row[4])
6812749Sgiacomo.travaglini@arm.com
6912749Sgiacomo.travaglini@arm.com    def __repr__(self):
7012749Sgiacomo.travaglini@arm.com        return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
7112749Sgiacomo.travaglini@arm.com            self.run, self.x, self.y, self.data)
7214009Sgabeblack@google.com
7314196Sgabeblack@google.comclass StatData(object):
7414009Sgabeblack@google.com    def __init__(self, row):
7510564Sandreas.hansson@arm.com        self.stat = int(row[0])
7610564Sandreas.hansson@arm.com        self.name = row[1]
7710564Sandreas.hansson@arm.com        self.desc = row[2]
7810564Sandreas.hansson@arm.com        self.type = row[3]
7912532Sandreas.sandberg@arm.com        self.prereq = int(row[5])
8012532Sandreas.sandberg@arm.com        self.precision = int(row[6])
818853Sandreas.hansson@arm.com
828853Sandreas.hansson@arm.com        import flags
838853Sandreas.hansson@arm.com        self.flags = 0
848853Sandreas.hansson@arm.com        if int(row[4]): self.flags |= flags.printable
858853Sandreas.hansson@arm.com        if int(row[7]): self.flags |= flags.nozero
8612532Sandreas.sandberg@arm.com        if int(row[8]): self.flags |= flags.nonan
878853Sandreas.hansson@arm.com        if int(row[9]): self.flags |= flags.total
888853Sandreas.hansson@arm.com        if int(row[10]): self.flags |= flags.pdf
898853Sandreas.hansson@arm.com        if int(row[11]): self.flags |= flags.cdf
9014008Sgabeblack@google.com
9114008Sgabeblack@google.com        if self.type == 'DIST' or self.type == 'VECTORDIST':
9214008Sgabeblack@google.com            self.min = float(row[12])
9314008Sgabeblack@google.com            self.max = float(row[13])
9414008Sgabeblack@google.com            self.bktsize = float(row[14])
9514009Sgabeblack@google.com            self.size = int(row[15])
9614008Sgabeblack@google.com
9714008Sgabeblack@google.com        if self.type == 'FORMULA':
9814008Sgabeblack@google.com            self.formula = self.db.allFormulas[self.stat]
9914008Sgabeblack@google.com
10014008Sgabeblack@google.comclass Node(object):
10114008Sgabeblack@google.com    def __init__(self, name):
10214008Sgabeblack@google.com        self.name = name
10314008Sgabeblack@google.com    def __str__(self):
10414008Sgabeblack@google.com        return self.name
10514008Sgabeblack@google.com
10614008Sgabeblack@google.comclass Result(object):
10714008Sgabeblack@google.com    def __init__(self, x, y):
10814008Sgabeblack@google.com        self.data = {}
10914008Sgabeblack@google.com        self.x = x
11014008Sgabeblack@google.com        self.y = y
11114008Sgabeblack@google.com
11214008Sgabeblack@google.com    def __contains__(self, run):
11314012Sgabeblack@google.com        return run in self.data
11414012Sgabeblack@google.com
11514012Sgabeblack@google.com    def __getitem__(self, run):
11614012Sgabeblack@google.com        if run not in self.data:
11714012Sgabeblack@google.com            self.data[run] = [ [ 0.0 ] * self.y for i in xrange(self.x) ]
11814012Sgabeblack@google.com        return self.data[run]
11914012Sgabeblack@google.com
12014012Sgabeblack@google.comclass Database(object):
12114012Sgabeblack@google.com    def __init__(self):
12214012Sgabeblack@google.com        self.host = 'zizzer.pool'
12314012Sgabeblack@google.com        self.user = ''
12414012Sgabeblack@google.com        self.passwd = ''
12514012Sgabeblack@google.com        self.db = 'm5stats'
12614012Sgabeblack@google.com        self.cursor = None
12714012Sgabeblack@google.com
128        self.allStats = []
129        self.allStatIds = {}
130        self.allStatNames = {}
131
132        self.allSubData = {}
133
134        self.allRuns = []
135        self.allRunIds = {}
136        self.allRunNames = {}
137
138        self.allBins = []
139        self.allBinIds = {}
140        self.allBinNames = {}
141
142        self.allFormulas = {}
143
144        self.stattop = {}
145        self.statdict = {}
146        self.statlist = []
147
148        self.mode = 'sum';
149        self.runs = None
150        self.bins = None
151        self.ticks = None
152        self.method = 'sum'
153        self._method = type(self).sum
154
155    def get(self, job, stat):
156        run = self.allRunNames.get(job.name, None)
157        if run is None:
158            print 'run "%s" not found' % job
159            return None
160
161        from info import scalar, vector, value, total, len
162        stat.system = self[job.system]
163        if scalar(stat):
164            return value(stat, run.run)
165        if vector(stat):
166            return values(stat, run.run)
167
168        return None
169
170    def query(self, sql):
171        self.cursor.execute(sql)
172
173    def update_dict(self, dict):
174        dict.update(self.stattop)
175
176    def append(self, stat):
177        statname = re.sub(':', '__', stat.name)
178        path = string.split(statname, '.')
179        pathtop = path[0]
180        fullname = ''
181
182        x = self
183        while len(path) > 1:
184            name = path.pop(0)
185            if not x.__dict__.has_key(name):
186                x.__dict__[name] = Node(fullname + name)
187            x = x.__dict__[name]
188            fullname = '%s%s.' % (fullname, name)
189
190        name = path.pop(0)
191        x.__dict__[name] = stat
192
193        self.stattop[pathtop] = self.__dict__[pathtop]
194        self.statdict[statname] = stat
195        self.statlist.append(statname)
196
197    def connect(self):
198        # connect
199        self.thedb = MySQLdb.connect(db=self.db,
200                                     host=self.host,
201                                     user=self.user,
202                                     passwd=self.passwd)
203
204        # create a cursor
205        self.cursor = self.thedb.cursor()
206
207        self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
208                   from runs''')
209        for result in self.cursor.fetchall():
210            run = RunData(result);
211            self.allRuns.append(run)
212            self.allRunIds[run.run] = run
213            self.allRunNames[run.name] = run
214
215        self.query('select * from bins')
216        for id,name in self.cursor.fetchall():
217            self.allBinIds[int(id)] = name
218            self.allBinNames[name] = int(id)
219
220        self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
221        for result in self.cursor.fetchall():
222            subdata = SubData(result)
223            if self.allSubData.has_key(subdata.stat):
224                self.allSubData[subdata.stat].append(subdata)
225            else:
226                self.allSubData[subdata.stat] = [ subdata ]
227
228        self.query('select * from formulas')
229        for id,formula in self.cursor.fetchall():
230            self.allFormulas[int(id)] = formula.tostring()
231
232        StatData.db = self
233        self.query('select * from stats')
234        import info
235        for result in self.cursor.fetchall():
236            stat = info.NewStat(self, StatData(result))
237            self.append(stat)
238            self.allStats.append(stat)
239            self.allStatIds[stat.stat] = stat
240            self.allStatNames[stat.name] = stat
241
242    # Name: listbins
243    # Desc: Prints all bins matching regex argument, if no argument
244    #       is given all bins are returned
245    def listBins(self, regex='.*'):
246        print '%-50s %-10s' % ('bin name', 'id')
247        print '-' * 61
248        names = self.allBinNames.keys()
249        names.sort()
250        for name in names:
251            id = self.allBinNames[name]
252            print '%-50s %-10d' % (name, id)
253
254    # Name: listruns
255    # Desc: Prints all runs matching a given user, if no argument
256    #       is given all runs are returned
257    def listRuns(self, user=None):
258        print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
259        print '-' * 62
260        for run in self.allRuns:
261            if user == None or user == run.user:
262                print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
263
264    # Name: listTicks
265    # Desc: Prints all samples for a given run
266    def listTicks(self, runs=None):
267        print "tick"
268        print "----------------------------------------"
269        sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
270        if runs != None:
271            first = True
272            for run in runs:
273               if first:
274            #       sql += ' where'
275                   first = False
276               else:
277                   sql += ' or'
278               sql += ' dt_run=%s' % run.run
279            sql += ')'
280        self.query(sql)
281        for r in self.cursor.fetchall():
282            print r[0]
283
284    # Name: retTicks
285    # Desc: Prints all samples for a given run
286    def retTicks(self, runs=None):
287        sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
288        if runs != None:
289            first = True
290            for run in runs:
291               if first:
292                   first = False
293               else:
294                   sql += ' or'
295               sql += ' dt_run=%s' % run.run
296            sql += ')'
297        self.query(sql)
298        ret = []
299        for r in self.cursor.fetchall():
300            ret.append(r[0])
301        return ret
302
303    # Name: liststats
304    # Desc: Prints all statistics that appear in the database,
305    #         the optional argument is a regular expression that can
306    #         be used to prune the result set
307    def listStats(self, regex=None):
308        print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
309        print '-' * 80
310
311        rx = None
312        if regex != None:
313            rx = re.compile(regex)
314
315        stats = [ stat.name for stat in self.allStats ]
316        stats.sort(statcmp)
317        for stat in stats:
318            stat = self.allStatNames[stat]
319            if rx == None or rx.match(stat.name):
320                print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
321
322    # Name: liststats
323    # Desc: Prints all statistics that appear in the database,
324    #         the optional argument is a regular expression that can
325    #         be used to prune the result set
326    def listFormulas(self, regex=None):
327        print '%-60s %s' % ('formula name', 'formula')
328        print '-' * 80
329
330        rx = None
331        if regex != None:
332            rx = re.compile(regex)
333
334        stats = [ stat.name for stat in self.allStats ]
335        stats.sort(statcmp)
336        for stat in stats:
337            stat = self.allStatNames[stat]
338            if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
339                print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
340
341    def getStat(self, stats):
342        if type(stats) is not list:
343            stats = [ stats ]
344
345        ret = []
346        for stat in stats:
347            if type(stat) is int:
348                ret.append(self.allStatIds[stat])
349
350            if type(stat) is str:
351                rx = re.compile(stat)
352                for stat in self.allStats:
353                    if rx.match(stat.name):
354                        ret.append(stat)
355        return ret
356
357    def getBin(self, bins):
358        if type(bins) is not list:
359            bins = [ bins ]
360
361        ret = []
362        for bin in bins:
363            if type(bin) is int:
364                ret.append(bin)
365            elif type(bin) is str:
366                ret.append(self.allBinNames[bin])
367            else:
368                for name,id in self.allBinNames.items():
369                    if bin.match(name):
370                        ret.append(id)
371
372        return ret
373
374    def getNotBin(self, bin):
375        map = {}
376        for bin in getBin(bin):
377            map[bin] = 1
378
379        ret = []
380        for bin in self.allBinIds.keys():
381            if not map.has_key(bin):
382                ret.append(bin)
383
384        return ret
385
386    #########################################
387    # get the data
388    #
389    def inner(self, op, stat, bins, ticks, group=False):
390        sql = 'select '
391        sql += 'dt_stat as stat, '
392        sql += 'dt_run as run, '
393        sql += 'dt_x as x, '
394        sql += 'dt_y as y, '
395        if group:
396            sql += 'dt_tick as tick, '
397        sql += '%s(dt_data) as data ' % op
398        sql += 'from data '
399        sql += 'where '
400
401        if isinstance(stat, list):
402            val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
403            sql += ' (%s)' % val
404        else:
405            sql += ' dt_stat=%d' % stat.stat
406
407        if self.runs != None and len(self.runs):
408            val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
409            sql += ' and (%s)' % val
410
411        if bins != None and len(bins):
412            val = ' or '.join([ 'dt_bin=%d' % b for b in bins ])
413            sql += ' and (%s)' % val
414
415        if ticks != None and len(ticks):
416            val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
417            sql += ' and (%s)' % val
418
419        sql += ' group by dt_stat,dt_run,dt_x,dt_y'
420        if group:
421            sql += ',dt_tick'
422        return sql
423
424    def outer(self, op_out, op_in, stat, bins, ticks):
425        sql = self.inner(op_in, stat, bins, ticks, True)
426        sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql)
427        sql += 'group by stat,run,x,y'
428        return sql
429
430    # Name: sum
431    # Desc: given a run, a stat and an array of samples and bins,
432    #        sum all the bins and then get the standard deviation of the
433    #        samples for non-binned runs. This will just return the average
434    #        of samples, however a bin array still must be passed
435    def sum(self, stat, bins, ticks):
436        return self.inner('sum', stat, bins, ticks)
437
438    # Name: avg
439    # Desc: given a run, a stat and an array of samples and bins,
440    #        sum all the bins and then average the samples for non-binned
441    #        runs this will just return the average of samples, however
442    #        a bin array still must be passed
443    def avg(self, stat, bins, ticks):
444        return self.outer('avg', 'sum', stat, bins, ticks)
445
446    # Name: stdev
447    # Desc: given a run, a stat and an array of samples and bins,
448    #        sum all the bins and then get the standard deviation of the
449    #        samples for non-binned runs. This will just return the average
450    #        of samples, however a bin array still must be passed
451    def stdev(self, stat, bins, ticks):
452        return self.outer('stddev', 'sum', stat, bins, ticks)
453
454    def __setattr__(self, attr, value):
455        super(Database, self).__setattr__(attr, value)
456        if attr != 'method':
457            return
458
459        if value == 'sum':
460            self._method = self.sum
461        elif value == 'avg':
462            self._method = self.avg
463        elif value == 'stdev':
464            self._method = self.stdev
465        else:
466            raise AttributeError, "can only set get to: sum | avg | stdev"
467
468    def data(self, stat, bins=None, ticks=None):
469        if bins is None:
470            bins = self.bins
471        if ticks is None:
472            ticks = self.ticks
473        sql = self._method(self, stat, bins, ticks)
474        self.query(sql)
475
476        runs = {}
477        xmax = 0
478        ymax = 0
479        for x in self.cursor.fetchall():
480            data = Data(x)
481            if not runs.has_key(data.run):
482                runs[data.run] = {}
483            if not runs[data.run].has_key(data.x):
484                runs[data.run][data.x] = {}
485
486            xmax = max(xmax, data.x)
487            ymax = max(ymax, data.y)
488            runs[data.run][data.x][data.y] = data.data
489
490        results = Result(xmax + 1, ymax + 1)
491        for run,data in runs.iteritems():
492            result = results[run]
493            for x,ydata in data.iteritems():
494                for y,data in ydata.iteritems():
495                    result[x][y] = data
496        return results
497
498    def __getitem__(self, key):
499        return self.stattop[key]
500