db.py revision 2014:7df693ff6fa4
1# Copyright (c) 2003-2004 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27import MySQLdb, re, string
28
29def statcmp(a, b):
30    v1 = a.split('.')
31    v2 = b.split('.')
32
33    last = min(len(v1), len(v2)) - 1
34    for i,j in zip(v1[0:last], v2[0:last]):
35        if i != j:
36            return cmp(i, j)
37
38    # Special compare for last element.
39    if len(v1) == len(v2):
40        return cmp(v1[last], v2[last])
41    else:
42        return cmp(len(v1), len(v2))
43
44class RunData:
45    def __init__(self, row):
46        self.run = int(row[0])
47        self.name = row[1]
48        self.user = row[2]
49        self.project = row[3]
50
51class SubData:
52    def __init__(self, row):
53        self.stat = int(row[0])
54        self.x = int(row[1])
55        self.y = int(row[2])
56        self.name = row[3]
57        self.descr = row[4]
58
59class Data:
60    def __init__(self, row):
61        if len(row) != 5:
62            raise 'stat db error'
63        self.stat = int(row[0])
64        self.run = int(row[1])
65        self.x = int(row[2])
66        self.y = int(row[3])
67        self.data = float(row[4])
68
69    def __repr__(self):
70        return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
71            self.run, self.x, self.y, self.data)
72
73class StatData(object):
74    def __init__(self, row):
75        self.stat = int(row[0])
76        self.name = row[1]
77        self.desc = row[2]
78        self.type = row[3]
79        self.prereq = int(row[5])
80        self.precision = int(row[6])
81
82        import flags
83        self.flags = 0
84        if int(row[4]): self.flags |= flags.printable
85        if int(row[7]): self.flags |= flags.nozero
86        if int(row[8]): self.flags |= flags.nonan
87        if int(row[9]): self.flags |= flags.total
88        if int(row[10]): self.flags |= flags.pdf
89        if int(row[11]): self.flags |= flags.cdf
90
91        if self.type == 'DIST' or self.type == 'VECTORDIST':
92            self.min = float(row[12])
93            self.max = float(row[13])
94            self.bktsize = float(row[14])
95            self.size = int(row[15])
96
97        if self.type == 'FORMULA':
98            self.formula = self.db.allFormulas[self.stat]
99
100class Node(object):
101    def __init__(self, name):
102        self.name = name
103    def __str__(self):
104        return self.name
105
106class Result(object):
107    def __init__(self, x, y):
108        self.data = {}
109        self.x = x
110        self.y = y
111
112    def __contains__(self, run):
113        return run in self.data
114
115    def __getitem__(self, run):
116        if run not in self.data:
117            self.data[run] = [ [ 0.0 ] * self.y for i in xrange(self.x) ]
118        return self.data[run]
119
120class Database(object):
121    def __init__(self):
122        self.host = 'zizzer.pool'
123        self.user = ''
124        self.passwd = ''
125        self.db = 'm5stats'
126        self.cursor = None
127
128        self.allStats = []
129        self.allStatIds = {}
130        self.allStatNames = {}
131
132        self.allSubData = {}
133
134        self.allRuns = []
135        self.allRunIds = {}
136        self.allRunNames = {}
137
138        self.allBins = []
139        self.allBinIds = {}
140        self.allBinNames = {}
141
142        self.allFormulas = {}
143
144        self.stattop = {}
145        self.statdict = {}
146        self.statlist = []
147
148        self.mode = 'sum';
149        self.runs = None
150        self.bins = None
151        self.ticks = None
152        self.method = 'sum'
153        self._method = type(self).sum
154
155    def get(self, job, stat, system=None):
156        run = self.allRunNames.get(str(job), None)
157        if run is None:
158            return None
159
160        from info import ProxyError, scalar, vector, value, values, total, len
161        if system is None and hasattr('system', job):
162            system = job.system
163
164        if system is not None:
165            stat.system = self[system]
166        try:
167            if scalar(stat):
168                return value(stat, run.run)
169            if vector(stat):
170                return values(stat, run.run)
171        except ProxyError:
172            return None
173
174        return None
175
176    def query(self, sql):
177        self.cursor.execute(sql)
178
179    def update_dict(self, dict):
180        dict.update(self.stattop)
181
182    def append(self, stat):
183        statname = re.sub(':', '__', stat.name)
184        path = string.split(statname, '.')
185        pathtop = path[0]
186        fullname = ''
187
188        x = self
189        while len(path) > 1:
190            name = path.pop(0)
191            if not x.__dict__.has_key(name):
192                x.__dict__[name] = Node(fullname + name)
193            x = x.__dict__[name]
194            fullname = '%s%s.' % (fullname, name)
195
196        name = path.pop(0)
197        x.__dict__[name] = stat
198
199        self.stattop[pathtop] = self.__dict__[pathtop]
200        self.statdict[statname] = stat
201        self.statlist.append(statname)
202
203    def connect(self):
204        # connect
205        self.thedb = MySQLdb.connect(db=self.db,
206                                     host=self.host,
207                                     user=self.user,
208                                     passwd=self.passwd)
209
210        # create a cursor
211        self.cursor = self.thedb.cursor()
212
213        self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
214                   from runs''')
215        for result in self.cursor.fetchall():
216            run = RunData(result);
217            self.allRuns.append(run)
218            self.allRunIds[run.run] = run
219            self.allRunNames[run.name] = run
220
221        self.query('select * from bins')
222        for id,name in self.cursor.fetchall():
223            self.allBinIds[int(id)] = name
224            self.allBinNames[name] = int(id)
225
226        self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
227        for result in self.cursor.fetchall():
228            subdata = SubData(result)
229            if self.allSubData.has_key(subdata.stat):
230                self.allSubData[subdata.stat].append(subdata)
231            else:
232                self.allSubData[subdata.stat] = [ subdata ]
233
234        self.query('select * from formulas')
235        for id,formula in self.cursor.fetchall():
236            self.allFormulas[int(id)] = formula.tostring()
237
238        StatData.db = self
239        self.query('select * from stats')
240        import info
241        for result in self.cursor.fetchall():
242            stat = info.NewStat(self, StatData(result))
243            self.append(stat)
244            self.allStats.append(stat)
245            self.allStatIds[stat.stat] = stat
246            self.allStatNames[stat.name] = stat
247
248    # Name: listbins
249    # Desc: Prints all bins matching regex argument, if no argument
250    #       is given all bins are returned
251    def listBins(self, regex='.*'):
252        print '%-50s %-10s' % ('bin name', 'id')
253        print '-' * 61
254        names = self.allBinNames.keys()
255        names.sort()
256        for name in names:
257            id = self.allBinNames[name]
258            print '%-50s %-10d' % (name, id)
259
260    # Name: listruns
261    # Desc: Prints all runs matching a given user, if no argument
262    #       is given all runs are returned
263    def listRuns(self, user=None):
264        print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
265        print '-' * 62
266        for run in self.allRuns:
267            if user == None or user == run.user:
268                print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
269
270    # Name: listTicks
271    # Desc: Prints all samples for a given run
272    def listTicks(self, runs=None):
273        print "tick"
274        print "----------------------------------------"
275        sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
276        if runs != None:
277            first = True
278            for run in runs:
279               if first:
280            #       sql += ' where'
281                   first = False
282               else:
283                   sql += ' or'
284               sql += ' dt_run=%s' % run.run
285            sql += ')'
286        self.query(sql)
287        for r in self.cursor.fetchall():
288            print r[0]
289
290    # Name: retTicks
291    # Desc: Prints all samples for a given run
292    def retTicks(self, runs=None):
293        sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
294        if runs != None:
295            first = True
296            for run in runs:
297               if first:
298                   first = False
299               else:
300                   sql += ' or'
301               sql += ' dt_run=%s' % run.run
302            sql += ')'
303        self.query(sql)
304        ret = []
305        for r in self.cursor.fetchall():
306            ret.append(r[0])
307        return ret
308
309    # Name: liststats
310    # Desc: Prints all statistics that appear in the database,
311    #         the optional argument is a regular expression that can
312    #         be used to prune the result set
313    def listStats(self, regex=None):
314        print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
315        print '-' * 80
316
317        rx = None
318        if regex != None:
319            rx = re.compile(regex)
320
321        stats = [ stat.name for stat in self.allStats ]
322        stats.sort(statcmp)
323        for stat in stats:
324            stat = self.allStatNames[stat]
325            if rx == None or rx.match(stat.name):
326                print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
327
328    # Name: liststats
329    # Desc: Prints all statistics that appear in the database,
330    #         the optional argument is a regular expression that can
331    #         be used to prune the result set
332    def listFormulas(self, regex=None):
333        print '%-60s %s' % ('formula name', 'formula')
334        print '-' * 80
335
336        rx = None
337        if regex != None:
338            rx = re.compile(regex)
339
340        stats = [ stat.name for stat in self.allStats ]
341        stats.sort(statcmp)
342        for stat in stats:
343            stat = self.allStatNames[stat]
344            if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
345                print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
346
347    def getStat(self, stats):
348        if type(stats) is not list:
349            stats = [ stats ]
350
351        ret = []
352        for stat in stats:
353            if type(stat) is int:
354                ret.append(self.allStatIds[stat])
355
356            if type(stat) is str:
357                rx = re.compile(stat)
358                for stat in self.allStats:
359                    if rx.match(stat.name):
360                        ret.append(stat)
361        return ret
362
363    def getBin(self, bins):
364        if type(bins) is not list:
365            bins = [ bins ]
366
367        ret = []
368        for bin in bins:
369            if type(bin) is int:
370                ret.append(bin)
371            elif type(bin) is str:
372                ret.append(self.allBinNames[bin])
373            else:
374                for name,id in self.allBinNames.items():
375                    if bin.match(name):
376                        ret.append(id)
377
378        return ret
379
380    def getNotBin(self, bin):
381        map = {}
382        for bin in getBin(bin):
383            map[bin] = 1
384
385        ret = []
386        for bin in self.allBinIds.keys():
387            if not map.has_key(bin):
388                ret.append(bin)
389
390        return ret
391
392    #########################################
393    # get the data
394    #
395    def inner(self, op, stat, bins, ticks, group=False):
396        sql = 'select '
397        sql += 'dt_stat as stat, '
398        sql += 'dt_run as run, '
399        sql += 'dt_x as x, '
400        sql += 'dt_y as y, '
401        if group:
402            sql += 'dt_tick as tick, '
403        sql += '%s(dt_data) as data ' % op
404        sql += 'from data '
405        sql += 'where '
406
407        if isinstance(stat, list):
408            val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
409            sql += ' (%s)' % val
410        else:
411            sql += ' dt_stat=%d' % stat.stat
412
413        if self.runs != None and len(self.runs):
414            val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
415            sql += ' and (%s)' % val
416
417        if bins != None and len(bins):
418            val = ' or '.join([ 'dt_bin=%d' % b for b in bins ])
419            sql += ' and (%s)' % val
420
421        if ticks != None and len(ticks):
422            val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
423            sql += ' and (%s)' % val
424
425        sql += ' group by dt_stat,dt_run,dt_x,dt_y'
426        if group:
427            sql += ',dt_tick'
428        return sql
429
430    def outer(self, op_out, op_in, stat, bins, ticks):
431        sql = self.inner(op_in, stat, bins, ticks, True)
432        sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql)
433        sql += 'group by stat,run,x,y'
434        return sql
435
436    # Name: sum
437    # Desc: given a run, a stat and an array of samples and bins,
438    #        sum all the bins and then get the standard deviation of the
439    #        samples for non-binned runs. This will just return the average
440    #        of samples, however a bin array still must be passed
441    def sum(self, stat, bins, ticks):
442        return self.inner('sum', stat, bins, ticks)
443
444    # Name: avg
445    # Desc: given a run, a stat and an array of samples and bins,
446    #        sum all the bins and then average the samples for non-binned
447    #        runs this will just return the average of samples, however
448    #        a bin array still must be passed
449    def avg(self, stat, bins, ticks):
450        return self.outer('avg', 'sum', stat, bins, ticks)
451
452    # Name: stdev
453    # Desc: given a run, a stat and an array of samples and bins,
454    #        sum all the bins and then get the standard deviation of the
455    #        samples for non-binned runs. This will just return the average
456    #        of samples, however a bin array still must be passed
457    def stdev(self, stat, bins, ticks):
458        return self.outer('stddev', 'sum', stat, bins, ticks)
459
460    def __setattr__(self, attr, value):
461        super(Database, self).__setattr__(attr, value)
462        if attr != 'method':
463            return
464
465        if value == 'sum':
466            self._method = self.sum
467        elif value == 'avg':
468            self._method = self.avg
469        elif value == 'stdev':
470            self._method = self.stdev
471        else:
472            raise AttributeError, "can only set get to: sum | avg | stdev"
473
474    def data(self, stat, bins=None, ticks=None):
475        if bins is None:
476            bins = self.bins
477        if ticks is None:
478            ticks = self.ticks
479        sql = self._method(self, stat, bins, ticks)
480        self.query(sql)
481
482        runs = {}
483        xmax = 0
484        ymax = 0
485        for x in self.cursor.fetchall():
486            data = Data(x)
487            if not runs.has_key(data.run):
488                runs[data.run] = {}
489            if not runs[data.run].has_key(data.x):
490                runs[data.run][data.x] = {}
491
492            xmax = max(xmax, data.x)
493            ymax = max(ymax, data.y)
494            runs[data.run][data.x][data.y] = data.data
495
496        results = Result(xmax + 1, ymax + 1)
497        for run,data in runs.iteritems():
498            result = results[run]
499            for x,ydata in data.iteritems():
500                for y,data in ydata.iteritems():
501                    result[x][y] = data
502        return results
503
504    def __getitem__(self, key):
505        return self.stattop[key]
506