db.py revision 1309:7daf185ddae8
1import MySQLdb, re, string
2
3def statcmp(a, b):
4    v1 = a.split('.')
5    v2 = b.split('.')
6
7    last = min(len(v1), len(v2)) - 1
8    for i,j in zip(v1[0:last], v2[0:last]):
9        if i != j:
10            return cmp(i, j)
11
12    # Special compare for last element.
13    if len(v1) == len(v2):
14        return cmp(v1[last], v2[last])
15    else:
16        return cmp(len(v1), len(v2))
17
18class RunData:
19    def __init__(self, row):
20        self.run = int(row[0])
21        self.name = row[1]
22        self.user = row[2]
23        self.project = row[3]
24
25class SubData:
26    def __init__(self, row):
27        self.stat = int(row[0])
28        self.x = int(row[1])
29        self.y = int(row[2])
30        self.name = row[3]
31        self.descr = row[4]
32
33class Data:
34    def __init__(self, row):
35        if len(row) != 5:
36            raise 'stat db error'
37        self.stat = int(row[0])
38        self.run = int(row[1])
39        self.x = int(row[2])
40        self.y = int(row[3])
41        self.data = float(row[4])
42
43    def __repr__(self):
44        return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
45            self.run, self.x, self.y, self.data)
46
47class StatData(object):
48    def __init__(self, row):
49        self.stat = int(row[0])
50        self.name = row[1]
51        self.desc = row[2]
52        self.type = row[3]
53        self.prereq = int(row[5])
54        self.precision = int(row[6])
55
56        import flags
57        self.flags = 0
58        if int(row[4]): self.flags |= flags.printable
59        if int(row[7]): self.flags |= flags.nozero
60        if int(row[8]): self.flags |= flags.nonan
61        if int(row[9]): self.flags |= flags.total
62        if int(row[10]): self.flags |= flags.pdf
63        if int(row[11]): self.flags |= flags.cdf
64
65        if self.type == 'DIST' or self.type == 'VECTORDIST':
66            self.min = float(row[12])
67            self.max = float(row[13])
68            self.bktsize = float(row[14])
69            self.size = int(row[15])
70
71        if self.type == 'FORMULA':
72            self.formula = self.db.allFormulas[self.stat]
73
74class Node(object):
75    def __init__(self, name):
76        self.name = name
77    def __str__(self):
78        return name
79
80class Database(object):
81    def __init__(self):
82        self.host = 'zizzer.pool'
83        self.user = ''
84        self.passwd = ''
85        self.db = 'm5stats'
86        self.cursor = None
87
88        self.allStats = []
89        self.allStatIds = {}
90        self.allStatNames = {}
91
92        self.allSubData = {}
93
94        self.allRuns = []
95        self.allRunIds = {}
96        self.allRunNames = {}
97
98        self.allBins = []
99        self.allBinIds = {}
100        self.allBinNames = {}
101
102        self.allFormulas = {}
103
104        self.stattop = {}
105        self.statdict = {}
106        self.statlist = []
107
108        self.mode = 'sum';
109        self.runs = None
110        self.bins = None
111        self.ticks = None
112        self.__dict__['get'] = type(self).sum
113
114    def query(self, sql):
115        self.cursor.execute(sql)
116
117    def update_dict(self, dict):
118        dict.update(self.stattop)
119
120    def append(self, stat):
121        statname = re.sub(':', '__', stat.name)
122        path = string.split(statname, '.')
123        pathtop = path[0]
124        fullname = ''
125
126        x = self
127        while len(path) > 1:
128            name = path.pop(0)
129            if not x.__dict__.has_key(name):
130                x.__dict__[name] = Node(fullname + name)
131            x = x.__dict__[name]
132            fullname = '%s%s.' % (fullname, name)
133
134        name = path.pop(0)
135        x.__dict__[name] = stat
136
137        self.stattop[pathtop] = self.__dict__[pathtop]
138        self.statdict[statname] = stat
139        self.statlist.append(statname)
140
141    def connect(self):
142        # connect
143        self.thedb = MySQLdb.connect(db=self.db,
144                                     host=self.host,
145                                     user=self.user,
146                                     passwd=self.passwd)
147
148        # create a cursor
149        self.cursor = self.thedb.cursor()
150
151        self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
152                   from runs''')
153        for result in self.cursor.fetchall():
154            run = RunData(result);
155            self.allRuns.append(run)
156            self.allRunIds[run.run] = run
157            self.allRunNames[run.name] = run
158
159        self.query('select * from bins')
160        for id,name in self.cursor.fetchall():
161            self.allBinIds[int(id)] = name
162            self.allBinNames[name] = int(id)
163
164        self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
165        for result in self.cursor.fetchall():
166            subdata = SubData(result)
167            if self.allSubData.has_key(subdata.stat):
168                self.allSubData[subdata.stat].append(subdata)
169            else:
170                self.allSubData[subdata.stat] = [ subdata ]
171
172        self.query('select * from formulas')
173        for id,formula in self.cursor.fetchall():
174            self.allFormulas[int(id)] = formula.tostring()
175
176        StatData.db = self
177        self.query('select * from stats')
178        import info
179        for result in self.cursor.fetchall():
180            stat = info.NewStat(StatData(result))
181            self.append(stat)
182            self.allStats.append(stat)
183            self.allStatIds[stat.stat] = stat
184            self.allStatNames[stat.name] = stat
185
186    # Name: listbins
187    # Desc: Prints all bins matching regex argument, if no argument
188    #       is given all bins are returned
189    def listBins(self, regex='.*'):
190        print '%-50s %-10s' % ('bin name', 'id')
191        print '-' * 61
192        names = self.allBinNames.keys()
193        names.sort()
194        for name in names:
195            id = self.allBinNames[name]
196            print '%-50s %-10d' % (name, id)
197
198    # Name: listruns
199    # Desc: Prints all runs matching a given user, if no argument
200    #       is given all runs are returned
201    def listRuns(self, user=None):
202        print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
203        print '-' * 62
204        for run in self.allRuns:
205            if user == None or user == run.user:
206                print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
207
208    # Name: listTicks
209    # Desc: Prints all samples for a given run
210    def listTicks(self, runs=None):
211        print "tick"
212        print "----------------------------------------"
213        sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
214        if runs != None:
215            first = True
216            for run in runs:
217               if first:
218            #       sql += ' where'
219                   first = False
220               else:
221                   sql += ' or'
222               sql += ' dt_run=%s' % run.run
223            sql += ')'
224        self.query(sql)
225        for r in self.cursor.fetchall():
226            print r[0]
227
228    # Name: retTicks
229    # Desc: Prints all samples for a given run
230    def retTicks(self, runs=None):
231        sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
232        if runs != None:
233            first = True
234            for run in runs:
235               if first:
236                   first = False
237               else:
238                   sql += ' or'
239               sql += ' dt_run=%s' % run.run
240            sql += ')'
241        self.query(sql)
242        ret = []
243        for r in self.cursor.fetchall():
244            ret.append(r[0])
245        return ret
246
247    # Name: liststats
248    # Desc: Prints all statistics that appear in the database,
249    #         the optional argument is a regular expression that can
250    #         be used to prune the result set
251    def listStats(self, regex=None):
252        print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
253        print '-' * 80
254
255        rx = None
256        if regex != None:
257            rx = re.compile(regex)
258
259        stats = [ stat.name for stat in self.allStats ]
260        stats.sort(statcmp)
261        for stat in stats:
262            stat = self.allStatNames[stat]
263            if rx == None or rx.match(stat.name):
264                print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
265
266    # Name: liststats
267    # Desc: Prints all statistics that appear in the database,
268    #         the optional argument is a regular expression that can
269    #         be used to prune the result set
270    def listFormulas(self, regex=None):
271        print '%-60s %s' % ('formula name', 'formula')
272        print '-' * 80
273
274        rx = None
275        if regex != None:
276            rx = re.compile(regex)
277
278        stats = [ stat.name for stat in self.allStats ]
279        stats.sort(statcmp)
280        for stat in stats:
281            stat = self.allStatNames[stat]
282            if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
283                print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
284
285    def getStat(self, stats):
286        if type(stats) is not list:
287            stats = [ stats ]
288
289        ret = []
290        for stat in stats:
291            if type(stat) is int:
292                ret.append(self.allStatIds[stat])
293
294            if type(stat) is str:
295                rx = re.compile(stat)
296                for stat in self.allStats:
297                    if rx.match(stat.name):
298                        ret.append(stat)
299        return ret
300
301    def getBin(self, bins):
302        if type(bins) is not list:
303            bins = [ bins ]
304
305        ret = []
306        for bin in bins:
307            if type(bin) is int:
308                ret.append(bin)
309            elif type(bin) is str:
310                ret.append(self.allBinNames[bin])
311            else:
312                for name,id in self.allBinNames.items():
313                    if bin.match(name):
314                        ret.append(id)
315
316        return ret
317
318    def getNotBin(self, bin):
319        map = {}
320        for bin in getBin(bin):
321            map[bin] = 1
322
323        ret = []
324        for bin in self.allBinIds.keys():
325            if not map.has_key(bin):
326                ret.append(bin)
327
328        return ret
329
330    #########################################
331    # get the data
332    #
333    def inner(self, op, stat, bins, ticks, group=False):
334        sql = 'select '
335        sql += 'dt_stat as stat, '
336        sql += 'dt_run as run, '
337        sql += 'dt_x as x, '
338        sql += 'dt_y as y, '
339        if group:
340            sql += 'dt_tick as tick, '
341        sql += '%s(dt_data) as data ' % op
342        sql += 'from data '
343        sql += 'where '
344
345        if isinstance(stat, list):
346            val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
347            sql += ' (%s)' % val
348        else:
349            sql += ' dt_stat=%d' % stat.stat
350
351        if self.runs != None and len(self.runs):
352            val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
353            sql += ' and (%s)' % val
354
355        if bins != None and len(bins):
356            val = ' or '.join([ 'dt_bin=%d' % b for b in bins ])
357            sql += ' and (%s)' % val
358
359        if ticks != None and len(ticks):
360            val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
361            sql += ' and (%s)' % val
362
363        sql += ' group by dt_stat,dt_run,dt_x,dt_y'
364        if group:
365            sql += ',dt_tick'
366        return sql
367
368    def outer(self, op_out, op_in, stat, bins, ticks):
369        sql = self.inner(op_in, stat, bins, ticks, True)
370        sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql)
371        sql += 'group by stat,run,x,y'
372        return sql
373
374    # Name: sum
375    # Desc: given a run, a stat and an array of samples and bins,
376    #        sum all the bins and then get the standard deviation of the
377    #        samples for non-binned runs. This will just return the average
378    #        of samples, however a bin array still must be passed
379    def sum(self, stat, bins, ticks):
380        return self.inner('sum', stat, bins, ticks)
381
382    # Name: avg
383    # Desc: given a run, a stat and an array of samples and bins,
384    #        sum all the bins and then average the samples for non-binned
385    #        runs this will just return the average of samples, however
386    #        a bin array still must be passed
387    def avg(self, stat, bins, ticks):
388        return self.outer('avg', 'sum', stat, bins, ticks)
389
390    # Name: stdev
391    # Desc: given a run, a stat and an array of samples and bins,
392    #        sum all the bins and then get the standard deviation of the
393    #        samples for non-binned runs. This will just return the average
394    #        of samples, however a bin array still must be passed
395    def stdev(self, stat, bins, ticks):
396        return self.outer('stddev', 'sum', stat, bins, ticks)
397
398    def __getattribute__(self, attr):
399        if attr != 'get':
400            return super(Database, self).__getattribute__(attr)
401
402        if self.__dict__['get'] == type(self).sum:
403            return 'sum'
404        elif self.__dict__['get'] == type(self).avg:
405            return 'avg'
406        elif self.__dict__['get'] == type(self).stdev:
407            return 'stdev'
408        else:
409            return ''
410
411    def __setattr__(self, attr, value):
412        if attr != 'get':
413            super(Database, self).__setattr__(attr, value)
414            return
415
416        if value == 'sum':
417            self.__dict__['get'] = type(self).sum
418        elif value == 'avg':
419            self.__dict__['get'] = type(self).avg
420        elif value == 'stdev':
421            self.__dict__['get'] = type(self).stdev
422        else:
423            raise AttributeError, "can only set get to: sum | avg | stdev"
424
425    def data(self, stat, bins=None, ticks=None):
426        if bins is None:
427            bins = self.bins
428        if ticks is None:
429            ticks = self.ticks
430        sql = self.__dict__['get'](self, stat, bins, ticks)
431        self.query(sql)
432
433        runs = {}
434        for x in self.cursor.fetchall():
435            data = Data(x)
436            if not runs.has_key(data.run):
437                runs[data.run] = {}
438            if not runs[data.run].has_key(data.x):
439                runs[data.run][data.x] = {}
440
441            runs[data.run][data.x][data.y] = data.data
442        return runs
443