db.py revision 1758:74acd5b23964
1
2# Copyright (c) 2003-2004 The Regents of The University of Michigan
3# All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met: redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer;
9# redistributions in binary form must reproduce the above copyright
10# notice, this list of conditions and the following disclaimer in the
11# documentation and/or other materials provided with the distribution;
12# neither the name of the copyright holders nor the names of its
13# contributors may be used to endorse or promote products derived from
14# this software without specific prior written permission.
15#
16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#Permission is granted to use, copy, create derivative works and
29#redistribute this software and such derivative works for any purpose,
30#so long as the copyright notice above, this grant of permission, and
31#the disclaimer below appear in all copies made; and so long as the
32#name of The University of Michigan is not used in any advertising or
33#publicity pertaining to the use or distribution of this software
34#without specific, written prior authorization.
35#
36#THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION FROM THE
37#UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY PURPOSE, AND WITHOUT
38#WARRANTY BY THE UNIVERSITY OF MICHIGAN OF ANY KIND, EITHER EXPRESS OR
39#IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
40#MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE REGENTS OF
41#THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE FOR ANY DAMAGES,
42#INCLUDING DIRECT, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
43#DAMAGES, WITH RESPECT TO ANY CLAIM ARISING OUT OF OR IN CONNECTION
44#WITH THE USE OF THE SOFTWARE, EVEN IF IT HAS BEEN OR IS HEREAFTER
45#ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
46
47import MySQLdb, re, string
48
49def statcmp(a, b):
50    v1 = a.split('.')
51    v2 = b.split('.')
52
53    last = min(len(v1), len(v2)) - 1
54    for i,j in zip(v1[0:last], v2[0:last]):
55        if i != j:
56            return cmp(i, j)
57
58    # Special compare for last element.
59    if len(v1) == len(v2):
60        return cmp(v1[last], v2[last])
61    else:
62        return cmp(len(v1), len(v2))
63
64class RunData:
65    def __init__(self, row):
66        self.run = int(row[0])
67        self.name = row[1]
68        self.user = row[2]
69        self.project = row[3]
70
71class SubData:
72    def __init__(self, row):
73        self.stat = int(row[0])
74        self.x = int(row[1])
75        self.y = int(row[2])
76        self.name = row[3]
77        self.descr = row[4]
78
79class Data:
80    def __init__(self, row):
81        if len(row) != 5:
82            raise 'stat db error'
83        self.stat = int(row[0])
84        self.run = int(row[1])
85        self.x = int(row[2])
86        self.y = int(row[3])
87        self.data = float(row[4])
88
89    def __repr__(self):
90        return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
91            self.run, self.x, self.y, self.data)
92
93class StatData(object):
94    def __init__(self, row):
95        self.stat = int(row[0])
96        self.name = row[1]
97        self.desc = row[2]
98        self.type = row[3]
99        self.prereq = int(row[5])
100        self.precision = int(row[6])
101
102        import flags
103        self.flags = 0
104        if int(row[4]): self.flags |= flags.printable
105        if int(row[7]): self.flags |= flags.nozero
106        if int(row[8]): self.flags |= flags.nonan
107        if int(row[9]): self.flags |= flags.total
108        if int(row[10]): self.flags |= flags.pdf
109        if int(row[11]): self.flags |= flags.cdf
110
111        if self.type == 'DIST' or self.type == 'VECTORDIST':
112            self.min = float(row[12])
113            self.max = float(row[13])
114            self.bktsize = float(row[14])
115            self.size = int(row[15])
116
117        if self.type == 'FORMULA':
118            self.formula = self.db.allFormulas[self.stat]
119
120class Node(object):
121    def __init__(self, name):
122        self.name = name
123    def __str__(self):
124        return name
125
126class Database(object):
127    def __init__(self):
128        self.host = 'zizzer.pool'
129        self.user = ''
130        self.passwd = ''
131        self.db = 'm5stats'
132        self.cursor = None
133
134        self.allStats = []
135        self.allStatIds = {}
136        self.allStatNames = {}
137
138        self.allSubData = {}
139
140        self.allRuns = []
141        self.allRunIds = {}
142        self.allRunNames = {}
143
144        self.allBins = []
145        self.allBinIds = {}
146        self.allBinNames = {}
147
148        self.allFormulas = {}
149
150        self.stattop = {}
151        self.statdict = {}
152        self.statlist = []
153
154        self.mode = 'sum';
155        self.runs = None
156        self.bins = None
157        self.ticks = None
158        self.__dict__['get'] = type(self).sum
159
160    def query(self, sql):
161        self.cursor.execute(sql)
162
163    def update_dict(self, dict):
164        dict.update(self.stattop)
165
166    def append(self, stat):
167        statname = re.sub(':', '__', stat.name)
168        path = string.split(statname, '.')
169        pathtop = path[0]
170        fullname = ''
171
172        x = self
173        while len(path) > 1:
174            name = path.pop(0)
175            if not x.__dict__.has_key(name):
176                x.__dict__[name] = Node(fullname + name)
177            x = x.__dict__[name]
178            fullname = '%s%s.' % (fullname, name)
179
180        name = path.pop(0)
181        x.__dict__[name] = stat
182
183        self.stattop[pathtop] = self.__dict__[pathtop]
184        self.statdict[statname] = stat
185        self.statlist.append(statname)
186
187    def connect(self):
188        # connect
189        self.thedb = MySQLdb.connect(db=self.db,
190                                     host=self.host,
191                                     user=self.user,
192                                     passwd=self.passwd)
193
194        # create a cursor
195        self.cursor = self.thedb.cursor()
196
197        self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
198                   from runs''')
199        for result in self.cursor.fetchall():
200            run = RunData(result);
201            self.allRuns.append(run)
202            self.allRunIds[run.run] = run
203            self.allRunNames[run.name] = run
204
205        self.query('select * from bins')
206        for id,name in self.cursor.fetchall():
207            self.allBinIds[int(id)] = name
208            self.allBinNames[name] = int(id)
209
210        self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
211        for result in self.cursor.fetchall():
212            subdata = SubData(result)
213            if self.allSubData.has_key(subdata.stat):
214                self.allSubData[subdata.stat].append(subdata)
215            else:
216                self.allSubData[subdata.stat] = [ subdata ]
217
218        self.query('select * from formulas')
219        for id,formula in self.cursor.fetchall():
220            self.allFormulas[int(id)] = formula.tostring()
221
222        StatData.db = self
223        self.query('select * from stats')
224        import info
225        for result in self.cursor.fetchall():
226            stat = info.NewStat(StatData(result))
227            self.append(stat)
228            self.allStats.append(stat)
229            self.allStatIds[stat.stat] = stat
230            self.allStatNames[stat.name] = stat
231
232    # Name: listbins
233    # Desc: Prints all bins matching regex argument, if no argument
234    #       is given all bins are returned
235    def listBins(self, regex='.*'):
236        print '%-50s %-10s' % ('bin name', 'id')
237        print '-' * 61
238        names = self.allBinNames.keys()
239        names.sort()
240        for name in names:
241            id = self.allBinNames[name]
242            print '%-50s %-10d' % (name, id)
243
244    # Name: listruns
245    # Desc: Prints all runs matching a given user, if no argument
246    #       is given all runs are returned
247    def listRuns(self, user=None):
248        print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
249        print '-' * 62
250        for run in self.allRuns:
251            if user == None or user == run.user:
252                print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
253
254    # Name: listTicks
255    # Desc: Prints all samples for a given run
256    def listTicks(self, runs=None):
257        print "tick"
258        print "----------------------------------------"
259        sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
260        if runs != None:
261            first = True
262            for run in runs:
263               if first:
264            #       sql += ' where'
265                   first = False
266               else:
267                   sql += ' or'
268               sql += ' dt_run=%s' % run.run
269            sql += ')'
270        self.query(sql)
271        for r in self.cursor.fetchall():
272            print r[0]
273
274    # Name: retTicks
275    # Desc: Prints all samples for a given run
276    def retTicks(self, runs=None):
277        sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
278        if runs != None:
279            first = True
280            for run in runs:
281               if first:
282                   first = False
283               else:
284                   sql += ' or'
285               sql += ' dt_run=%s' % run.run
286            sql += ')'
287        self.query(sql)
288        ret = []
289        for r in self.cursor.fetchall():
290            ret.append(r[0])
291        return ret
292
293    # Name: liststats
294    # Desc: Prints all statistics that appear in the database,
295    #         the optional argument is a regular expression that can
296    #         be used to prune the result set
297    def listStats(self, regex=None):
298        print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
299        print '-' * 80
300
301        rx = None
302        if regex != None:
303            rx = re.compile(regex)
304
305        stats = [ stat.name for stat in self.allStats ]
306        stats.sort(statcmp)
307        for stat in stats:
308            stat = self.allStatNames[stat]
309            if rx == None or rx.match(stat.name):
310                print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
311
312    # Name: liststats
313    # Desc: Prints all statistics that appear in the database,
314    #         the optional argument is a regular expression that can
315    #         be used to prune the result set
316    def listFormulas(self, regex=None):
317        print '%-60s %s' % ('formula name', 'formula')
318        print '-' * 80
319
320        rx = None
321        if regex != None:
322            rx = re.compile(regex)
323
324        stats = [ stat.name for stat in self.allStats ]
325        stats.sort(statcmp)
326        for stat in stats:
327            stat = self.allStatNames[stat]
328            if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
329                print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
330
331    def getStat(self, stats):
332        if type(stats) is not list:
333            stats = [ stats ]
334
335        ret = []
336        for stat in stats:
337            if type(stat) is int:
338                ret.append(self.allStatIds[stat])
339
340            if type(stat) is str:
341                rx = re.compile(stat)
342                for stat in self.allStats:
343                    if rx.match(stat.name):
344                        ret.append(stat)
345        return ret
346
347    def getBin(self, bins):
348        if type(bins) is not list:
349            bins = [ bins ]
350
351        ret = []
352        for bin in bins:
353            if type(bin) is int:
354                ret.append(bin)
355            elif type(bin) is str:
356                ret.append(self.allBinNames[bin])
357            else:
358                for name,id in self.allBinNames.items():
359                    if bin.match(name):
360                        ret.append(id)
361
362        return ret
363
364    def getNotBin(self, bin):
365        map = {}
366        for bin in getBin(bin):
367            map[bin] = 1
368
369        ret = []
370        for bin in self.allBinIds.keys():
371            if not map.has_key(bin):
372                ret.append(bin)
373
374        return ret
375
376    #########################################
377    # get the data
378    #
379    def inner(self, op, stat, bins, ticks, group=False):
380        sql = 'select '
381        sql += 'dt_stat as stat, '
382        sql += 'dt_run as run, '
383        sql += 'dt_x as x, '
384        sql += 'dt_y as y, '
385        if group:
386            sql += 'dt_tick as tick, '
387        sql += '%s(dt_data) as data ' % op
388        sql += 'from data '
389        sql += 'where '
390
391        if isinstance(stat, list):
392            val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
393            sql += ' (%s)' % val
394        else:
395            sql += ' dt_stat=%d' % stat.stat
396
397        if self.runs != None and len(self.runs):
398            val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
399            sql += ' and (%s)' % val
400
401        if bins != None and len(bins):
402            val = ' or '.join([ 'dt_bin=%d' % b for b in bins ])
403            sql += ' and (%s)' % val
404
405        if ticks != None and len(ticks):
406            val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
407            sql += ' and (%s)' % val
408
409        sql += ' group by dt_stat,dt_run,dt_x,dt_y'
410        if group:
411            sql += ',dt_tick'
412        return sql
413
414    def outer(self, op_out, op_in, stat, bins, ticks):
415        sql = self.inner(op_in, stat, bins, ticks, True)
416        sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql)
417        sql += 'group by stat,run,x,y'
418        return sql
419
420    # Name: sum
421    # Desc: given a run, a stat and an array of samples and bins,
422    #        sum all the bins and then get the standard deviation of the
423    #        samples for non-binned runs. This will just return the average
424    #        of samples, however a bin array still must be passed
425    def sum(self, stat, bins, ticks):
426        return self.inner('sum', stat, bins, ticks)
427
428    # Name: avg
429    # Desc: given a run, a stat and an array of samples and bins,
430    #        sum all the bins and then average the samples for non-binned
431    #        runs this will just return the average of samples, however
432    #        a bin array still must be passed
433    def avg(self, stat, bins, ticks):
434        return self.outer('avg', 'sum', stat, bins, ticks)
435
436    # Name: stdev
437    # Desc: given a run, a stat and an array of samples and bins,
438    #        sum all the bins and then get the standard deviation of the
439    #        samples for non-binned runs. This will just return the average
440    #        of samples, however a bin array still must be passed
441    def stdev(self, stat, bins, ticks):
442        return self.outer('stddev', 'sum', stat, bins, ticks)
443
444    def __getattribute__(self, attr):
445        if attr != 'get':
446            return super(Database, self).__getattribute__(attr)
447
448        if self.__dict__['get'] == type(self).sum:
449            return 'sum'
450        elif self.__dict__['get'] == type(self).avg:
451            return 'avg'
452        elif self.__dict__['get'] == type(self).stdev:
453            return 'stdev'
454        else:
455            return ''
456
457    def __setattr__(self, attr, value):
458        if attr != 'get':
459            super(Database, self).__setattr__(attr, value)
460            return
461
462        if value == 'sum':
463            self.__dict__['get'] = type(self).sum
464        elif value == 'avg':
465            self.__dict__['get'] = type(self).avg
466        elif value == 'stdev':
467            self.__dict__['get'] = type(self).stdev
468        else:
469            raise AttributeError, "can only set get to: sum | avg | stdev"
470
471    def data(self, stat, bins=None, ticks=None):
472        if bins is None:
473            bins = self.bins
474        if ticks is None:
475            ticks = self.ticks
476        sql = self.__dict__['get'](self, stat, bins, ticks)
477        self.query(sql)
478
479        runs = {}
480        for x in self.cursor.fetchall():
481            data = Data(x)
482            if not runs.has_key(data.run):
483                runs[data.run] = {}
484            if not runs[data.run].has_key(data.x):
485                runs[data.run][data.x] = {}
486
487            runs[data.run][data.x][data.y] = data.data
488        return runs
489