Deleted Added
sdiff udiff text old ( 1309:7daf185ddae8 ) new ( 1758:74acd5b23964 )
full compact
1import MySQLdb, re, string
2
3def statcmp(a, b):
4 v1 = a.split('.')
5 v2 = b.split('.')
6
7 last = min(len(v1), len(v2)) - 1
8 for i,j in zip(v1[0:last], v2[0:last]):
9 if i != j:
10 return cmp(i, j)
11
12 # Special compare for last element.
13 if len(v1) == len(v2):
14 return cmp(v1[last], v2[last])
15 else:
16 return cmp(len(v1), len(v2))
17
18class RunData:
19 def __init__(self, row):
20 self.run = int(row[0])
21 self.name = row[1]
22 self.user = row[2]
23 self.project = row[3]
24
25class SubData:
26 def __init__(self, row):
27 self.stat = int(row[0])
28 self.x = int(row[1])
29 self.y = int(row[2])
30 self.name = row[3]
31 self.descr = row[4]
32
33class Data:
34 def __init__(self, row):
35 if len(row) != 5:
36 raise 'stat db error'
37 self.stat = int(row[0])
38 self.run = int(row[1])
39 self.x = int(row[2])
40 self.y = int(row[3])
41 self.data = float(row[4])
42
43 def __repr__(self):
44 return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
45 self.run, self.x, self.y, self.data)
46
47class StatData(object):
48 def __init__(self, row):
49 self.stat = int(row[0])
50 self.name = row[1]
51 self.desc = row[2]
52 self.type = row[3]
53 self.prereq = int(row[5])
54 self.precision = int(row[6])
55
56 import flags
57 self.flags = 0
58 if int(row[4]): self.flags |= flags.printable
59 if int(row[7]): self.flags |= flags.nozero
60 if int(row[8]): self.flags |= flags.nonan
61 if int(row[9]): self.flags |= flags.total
62 if int(row[10]): self.flags |= flags.pdf
63 if int(row[11]): self.flags |= flags.cdf
64
65 if self.type == 'DIST' or self.type == 'VECTORDIST':
66 self.min = float(row[12])
67 self.max = float(row[13])
68 self.bktsize = float(row[14])
69 self.size = int(row[15])
70
71 if self.type == 'FORMULA':
72 self.formula = self.db.allFormulas[self.stat]
73
74class Node(object):
75 def __init__(self, name):
76 self.name = name
77 def __str__(self):
78 return name
79
80class Database(object):
81 def __init__(self):
82 self.host = 'zizzer.pool'
83 self.user = ''
84 self.passwd = ''
85 self.db = 'm5stats'
86 self.cursor = None
87
88 self.allStats = []
89 self.allStatIds = {}
90 self.allStatNames = {}
91
92 self.allSubData = {}
93
94 self.allRuns = []
95 self.allRunIds = {}
96 self.allRunNames = {}
97
98 self.allBins = []
99 self.allBinIds = {}
100 self.allBinNames = {}
101
102 self.allFormulas = {}
103
104 self.stattop = {}
105 self.statdict = {}
106 self.statlist = []
107
108 self.mode = 'sum';
109 self.runs = None
110 self.bins = None
111 self.ticks = None
112 self.__dict__['get'] = type(self).sum
113
114 def query(self, sql):
115 self.cursor.execute(sql)
116
117 def update_dict(self, dict):
118 dict.update(self.stattop)
119
120 def append(self, stat):
121 statname = re.sub(':', '__', stat.name)
122 path = string.split(statname, '.')
123 pathtop = path[0]
124 fullname = ''
125
126 x = self
127 while len(path) > 1:
128 name = path.pop(0)
129 if not x.__dict__.has_key(name):
130 x.__dict__[name] = Node(fullname + name)
131 x = x.__dict__[name]
132 fullname = '%s%s.' % (fullname, name)
133
134 name = path.pop(0)
135 x.__dict__[name] = stat
136
137 self.stattop[pathtop] = self.__dict__[pathtop]
138 self.statdict[statname] = stat
139 self.statlist.append(statname)
140
141 def connect(self):
142 # connect
143 self.thedb = MySQLdb.connect(db=self.db,
144 host=self.host,
145 user=self.user,
146 passwd=self.passwd)
147
148 # create a cursor
149 self.cursor = self.thedb.cursor()
150
151 self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
152 from runs''')
153 for result in self.cursor.fetchall():
154 run = RunData(result);
155 self.allRuns.append(run)
156 self.allRunIds[run.run] = run
157 self.allRunNames[run.name] = run
158
159 self.query('select * from bins')
160 for id,name in self.cursor.fetchall():
161 self.allBinIds[int(id)] = name
162 self.allBinNames[name] = int(id)
163
164 self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
165 for result in self.cursor.fetchall():
166 subdata = SubData(result)
167 if self.allSubData.has_key(subdata.stat):
168 self.allSubData[subdata.stat].append(subdata)
169 else:
170 self.allSubData[subdata.stat] = [ subdata ]
171
172 self.query('select * from formulas')
173 for id,formula in self.cursor.fetchall():
174 self.allFormulas[int(id)] = formula.tostring()
175
176 StatData.db = self
177 self.query('select * from stats')
178 import info
179 for result in self.cursor.fetchall():
180 stat = info.NewStat(StatData(result))
181 self.append(stat)
182 self.allStats.append(stat)
183 self.allStatIds[stat.stat] = stat
184 self.allStatNames[stat.name] = stat
185
186 # Name: listbins
187 # Desc: Prints all bins matching regex argument, if no argument
188 # is given all bins are returned
189 def listBins(self, regex='.*'):
190 print '%-50s %-10s' % ('bin name', 'id')
191 print '-' * 61
192 names = self.allBinNames.keys()
193 names.sort()
194 for name in names:
195 id = self.allBinNames[name]
196 print '%-50s %-10d' % (name, id)
197
198 # Name: listruns
199 # Desc: Prints all runs matching a given user, if no argument
200 # is given all runs are returned
201 def listRuns(self, user=None):
202 print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
203 print '-' * 62
204 for run in self.allRuns:
205 if user == None or user == run.user:
206 print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
207
208 # Name: listTicks
209 # Desc: Prints all samples for a given run
210 def listTicks(self, runs=None):
211 print "tick"
212 print "----------------------------------------"
213 sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
214 if runs != None:
215 first = True
216 for run in runs:
217 if first:
218 # sql += ' where'
219 first = False
220 else:
221 sql += ' or'
222 sql += ' dt_run=%s' % run.run
223 sql += ')'
224 self.query(sql)
225 for r in self.cursor.fetchall():
226 print r[0]
227
228 # Name: retTicks
229 # Desc: Prints all samples for a given run
230 def retTicks(self, runs=None):
231 sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
232 if runs != None:
233 first = True
234 for run in runs:
235 if first:
236 first = False
237 else:
238 sql += ' or'
239 sql += ' dt_run=%s' % run.run
240 sql += ')'
241 self.query(sql)
242 ret = []
243 for r in self.cursor.fetchall():
244 ret.append(r[0])
245 return ret
246
247 # Name: liststats
248 # Desc: Prints all statistics that appear in the database,
249 # the optional argument is a regular expression that can
250 # be used to prune the result set
251 def listStats(self, regex=None):
252 print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
253 print '-' * 80
254
255 rx = None
256 if regex != None:
257 rx = re.compile(regex)
258
259 stats = [ stat.name for stat in self.allStats ]
260 stats.sort(statcmp)
261 for stat in stats:
262 stat = self.allStatNames[stat]
263 if rx == None or rx.match(stat.name):
264 print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
265
266 # Name: liststats
267 # Desc: Prints all statistics that appear in the database,
268 # the optional argument is a regular expression that can
269 # be used to prune the result set
270 def listFormulas(self, regex=None):
271 print '%-60s %s' % ('formula name', 'formula')
272 print '-' * 80
273
274 rx = None
275 if regex != None:
276 rx = re.compile(regex)
277
278 stats = [ stat.name for stat in self.allStats ]
279 stats.sort(statcmp)
280 for stat in stats:
281 stat = self.allStatNames[stat]
282 if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
283 print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
284
285 def getStat(self, stats):
286 if type(stats) is not list:
287 stats = [ stats ]
288
289 ret = []
290 for stat in stats:
291 if type(stat) is int:
292 ret.append(self.allStatIds[stat])
293
294 if type(stat) is str:
295 rx = re.compile(stat)
296 for stat in self.allStats:
297 if rx.match(stat.name):
298 ret.append(stat)
299 return ret
300
301 def getBin(self, bins):
302 if type(bins) is not list:
303 bins = [ bins ]
304
305 ret = []
306 for bin in bins:
307 if type(bin) is int:
308 ret.append(bin)
309 elif type(bin) is str:
310 ret.append(self.allBinNames[bin])
311 else:
312 for name,id in self.allBinNames.items():
313 if bin.match(name):
314 ret.append(id)
315
316 return ret
317
318 def getNotBin(self, bin):
319 map = {}
320 for bin in getBin(bin):
321 map[bin] = 1
322
323 ret = []
324 for bin in self.allBinIds.keys():
325 if not map.has_key(bin):
326 ret.append(bin)
327
328 return ret
329
330 #########################################
331 # get the data
332 #
333 def inner(self, op, stat, bins, ticks, group=False):
334 sql = 'select '
335 sql += 'dt_stat as stat, '
336 sql += 'dt_run as run, '
337 sql += 'dt_x as x, '
338 sql += 'dt_y as y, '
339 if group:
340 sql += 'dt_tick as tick, '
341 sql += '%s(dt_data) as data ' % op
342 sql += 'from data '
343 sql += 'where '
344
345 if isinstance(stat, list):
346 val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
347 sql += ' (%s)' % val
348 else:
349 sql += ' dt_stat=%d' % stat.stat
350
351 if self.runs != None and len(self.runs):
352 val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
353 sql += ' and (%s)' % val
354
355 if bins != None and len(bins):
356 val = ' or '.join([ 'dt_bin=%d' % b for b in bins ])
357 sql += ' and (%s)' % val
358
359 if ticks != None and len(ticks):
360 val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
361 sql += ' and (%s)' % val
362
363 sql += ' group by dt_stat,dt_run,dt_x,dt_y'
364 if group:
365 sql += ',dt_tick'
366 return sql
367
368 def outer(self, op_out, op_in, stat, bins, ticks):
369 sql = self.inner(op_in, stat, bins, ticks, True)
370 sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql)
371 sql += 'group by stat,run,x,y'
372 return sql
373
374 # Name: sum
375 # Desc: given a run, a stat and an array of samples and bins,
376 # sum all the bins and then get the standard deviation of the
377 # samples for non-binned runs. This will just return the average
378 # of samples, however a bin array still must be passed
379 def sum(self, stat, bins, ticks):
380 return self.inner('sum', stat, bins, ticks)
381
382 # Name: avg
383 # Desc: given a run, a stat and an array of samples and bins,
384 # sum all the bins and then average the samples for non-binned
385 # runs this will just return the average of samples, however
386 # a bin array still must be passed
387 def avg(self, stat, bins, ticks):
388 return self.outer('avg', 'sum', stat, bins, ticks)
389
390 # Name: stdev
391 # Desc: given a run, a stat and an array of samples and bins,
392 # sum all the bins and then get the standard deviation of the
393 # samples for non-binned runs. This will just return the average
394 # of samples, however a bin array still must be passed
395 def stdev(self, stat, bins, ticks):
396 return self.outer('stddev', 'sum', stat, bins, ticks)
397
398 def __getattribute__(self, attr):
399 if attr != 'get':
400 return super(Database, self).__getattribute__(attr)
401
402 if self.__dict__['get'] == type(self).sum:
403 return 'sum'
404 elif self.__dict__['get'] == type(self).avg:
405 return 'avg'
406 elif self.__dict__['get'] == type(self).stdev:
407 return 'stdev'
408 else:
409 return ''
410
411 def __setattr__(self, attr, value):
412 if attr != 'get':
413 super(Database, self).__setattr__(attr, value)
414 return
415
416 if value == 'sum':
417 self.__dict__['get'] = type(self).sum
418 elif value == 'avg':
419 self.__dict__['get'] = type(self).avg
420 elif value == 'stdev':
421 self.__dict__['get'] = type(self).stdev
422 else:
423 raise AttributeError, "can only set get to: sum | avg | stdev"
424
425 def data(self, stat, bins=None, ticks=None):
426 if bins is None:
427 bins = self.bins
428 if ticks is None:
429 ticks = self.ticks
430 sql = self.__dict__['get'](self, stat, bins, ticks)
431 self.query(sql)
432
433 runs = {}
434 for x in self.cursor.fetchall():
435 data = Data(x)
436 if not runs.has_key(data.run):
437 runs[data.run] = {}
438 if not runs[data.run].has_key(data.x):
439 runs[data.run][data.x] = {}
440
441 runs[data.run][data.x][data.y] = data.data
442 return runs