Cross Reference: /gem5/util/stats/db.py

Deleted Added

sdiff udiff text old ( 2343:a2b4a6ccee56 ) new ( 2665:a124942bacb8 )

full compact

1# Copyright (c) 2003-2004 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26#
27# Authors: Nathan Binkert
28
29import MySQLdb, re, string
30
31def statcmp(a, b):
32 v1 = a.split('.')
33 v2 = b.split('.')
34
35 last = min(len(v1), len(v2)) - 1
36 for i,j in zip(v1[0:last], v2[0:last]):
37 if i != j:
38 return cmp(i, j)
39
40 # Special compare for last element.
41 if len(v1) == len(v2):
42 return cmp(v1[last], v2[last])
43 else:
44 return cmp(len(v1), len(v2))
45
46class RunData:
47 def __init__(self, row):
48 self.run = int(row[0])
49 self.name = row[1]
50 self.user = row[2]
51 self.project = row[3]
52
53class SubData:
54 def __init__(self, row):
55 self.stat = int(row[0])
56 self.x = int(row[1])
57 self.y = int(row[2])
58 self.name = row[3]
59 self.descr = row[4]
60
61class Data:
62 def __init__(self, row):
63 if len(row) != 5:
64 raise 'stat db error'
65 self.stat = int(row[0])
66 self.run = int(row[1])
67 self.x = int(row[2])
68 self.y = int(row[3])
69 self.data = float(row[4])
70
71 def __repr__(self):
72 return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
73 self.run, self.x, self.y, self.data)
74
75class StatData(object):
76 def __init__(self, row):
77 self.stat = int(row[0])
78 self.name = row[1]
79 self.desc = row[2]
80 self.type = row[3]
81 self.prereq = int(row[5])
82 self.precision = int(row[6])
83
84 import flags
85 self.flags = 0
86 if int(row[4]): self.flags |= flags.printable
87 if int(row[7]): self.flags |= flags.nozero
88 if int(row[8]): self.flags |= flags.nonan
89 if int(row[9]): self.flags |= flags.total
90 if int(row[10]): self.flags |= flags.pdf
91 if int(row[11]): self.flags |= flags.cdf
92
93 if self.type == 'DIST' or self.type == 'VECTORDIST':
94 self.min = float(row[12])
95 self.max = float(row[13])
96 self.bktsize = float(row[14])
97 self.size = int(row[15])
98
99 if self.type == 'FORMULA':
100 self.formula = self.db.allFormulas[self.stat]
101
102class Node(object):
103 def __init__(self, name):
104 self.name = name
105 def __str__(self):
106 return self.name
107
108class Result(object):
109 def __init__(self, x, y):
110 self.data = {}
111 self.x = x
112 self.y = y
113
114 def __contains__(self, run):
115 return run in self.data
116
117 def __getitem__(self, run):
118 if run not in self.data:
119 self.data[run] = [ [ 0.0 ] * self.y for i in xrange(self.x) ]
120 return self.data[run]
121
122class Database(object):
123 def __init__(self):
124 self.host = 'zizzer.pool'
125 self.user = ''
126 self.passwd = ''
127 self.db = 'm5stats'
128 self.cursor = None
129
130 self.allStats = []
131 self.allStatIds = {}
132 self.allStatNames = {}
133
134 self.allSubData = {}
135
136 self.allRuns = []
137 self.allRunIds = {}
138 self.allRunNames = {}
139
140 self.allBins = []
141 self.allBinIds = {}
142 self.allBinNames = {}
143
144 self.allFormulas = {}
145
146 self.stattop = {}
147 self.statdict = {}
148 self.statlist = []
149
150 self.mode = 'sum';
151 self.runs = None
152 self.bins = None
153 self.ticks = None
154 self.method = 'sum'
155 self._method = type(self).sum
156
157 def get(self, job, stat, system=None):
158 run = self.allRunNames.get(str(job), None)
159 if run is None:
160 return None
161
162 from info import ProxyError, scalar, vector, value, values, total, len
163 if system is None and hasattr(job, 'system'):
164 system = job.system
165
166 if system is not None:
167 stat.system = self[system]
168 try:
169 if scalar(stat):
170 return value(stat, run.run)
171 if vector(stat):
172 return values(stat, run.run)
173 except ProxyError:
174 return None
175
176 return None
177
178 def query(self, sql):
179 self.cursor.execute(sql)
180
181 def update_dict(self, dict):
182 dict.update(self.stattop)
183
184 def append(self, stat):
185 statname = re.sub(':', '__', stat.name)
186 path = string.split(statname, '.')
187 pathtop = path[0]
188 fullname = ''
189
190 x = self
191 while len(path) > 1:
192 name = path.pop(0)
193 if not x.__dict__.has_key(name):
194 x.__dict__[name] = Node(fullname + name)
195 x = x.__dict__[name]
196 fullname = '%s%s.' % (fullname, name)
197
198 name = path.pop(0)
199 x.__dict__[name] = stat
200
201 self.stattop[pathtop] = self.__dict__[pathtop]
202 self.statdict[statname] = stat
203 self.statlist.append(statname)
204
205 def connect(self):
206 # connect
207 self.thedb = MySQLdb.connect(db=self.db,
208 host=self.host,
209 user=self.user,
210 passwd=self.passwd)
211
212 # create a cursor
213 self.cursor = self.thedb.cursor()
214
215 self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
216 from runs''')
217 for result in self.cursor.fetchall():
218 run = RunData(result);
219 self.allRuns.append(run)
220 self.allRunIds[run.run] = run
221 self.allRunNames[run.name] = run
222
223 self.query('select * from bins')
224 for id,name in self.cursor.fetchall():
225 self.allBinIds[int(id)] = name
226 self.allBinNames[name] = int(id)
227
228 self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
229 for result in self.cursor.fetchall():
230 subdata = SubData(result)
231 if self.allSubData.has_key(subdata.stat):
232 self.allSubData[subdata.stat].append(subdata)
233 else:
234 self.allSubData[subdata.stat] = [ subdata ]
235
236 self.query('select * from formulas')
237 for id,formula in self.cursor.fetchall():
238 self.allFormulas[int(id)] = formula.tostring()
239
240 StatData.db = self
241 self.query('select * from stats')
242 import info
243 for result in self.cursor.fetchall():
244 stat = info.NewStat(self, StatData(result))
245 self.append(stat)
246 self.allStats.append(stat)
247 self.allStatIds[stat.stat] = stat
248 self.allStatNames[stat.name] = stat
249
250 # Name: listbins
251 # Desc: Prints all bins matching regex argument, if no argument
252 # is given all bins are returned
253 def listBins(self, regex='.*'):
254 print '%-50s %-10s' % ('bin name', 'id')
255 print '-' * 61
256 names = self.allBinNames.keys()
257 names.sort()
258 for name in names:
259 id = self.allBinNames[name]
260 print '%-50s %-10d' % (name, id)
261
262 # Name: listruns
263 # Desc: Prints all runs matching a given user, if no argument
264 # is given all runs are returned
265 def listRuns(self, user=None):
266 print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
267 print '-' * 62
268 for run in self.allRuns:
269 if user == None or user == run.user:
270 print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
271
272 # Name: listTicks
273 # Desc: Prints all samples for a given run
274 def listTicks(self, runs=None):
275 print "tick"
276 print "----------------------------------------"
277 sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
278 if runs != None:
279 first = True
280 for run in runs:
281 if first:
282 # sql += ' where'
283 first = False
284 else:
285 sql += ' or'
286 sql += ' dt_run=%s' % run.run
287 sql += ')'
288 self.query(sql)
289 for r in self.cursor.fetchall():
290 print r[0]
291
292 # Name: retTicks
293 # Desc: Prints all samples for a given run
294 def retTicks(self, runs=None):
295 sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
296 if runs != None:
297 first = True
298 for run in runs:
299 if first:
300 first = False
301 else:
302 sql += ' or'
303 sql += ' dt_run=%s' % run.run
304 sql += ')'
305 self.query(sql)
306 ret = []
307 for r in self.cursor.fetchall():
308 ret.append(r[0])
309 return ret
310
311 # Name: liststats
312 # Desc: Prints all statistics that appear in the database,
313 # the optional argument is a regular expression that can
314 # be used to prune the result set
315 def listStats(self, regex=None):
316 print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
317 print '-' * 80
318
319 rx = None
320 if regex != None:
321 rx = re.compile(regex)
322
323 stats = [ stat.name for stat in self.allStats ]
324 stats.sort(statcmp)
325 for stat in stats:
326 stat = self.allStatNames[stat]
327 if rx == None or rx.match(stat.name):
328 print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
329
330 # Name: liststats
331 # Desc: Prints all statistics that appear in the database,
332 # the optional argument is a regular expression that can
333 # be used to prune the result set
334 def listFormulas(self, regex=None):
335 print '%-60s %s' % ('formula name', 'formula')
336 print '-' * 80
337
338 rx = None
339 if regex != None:
340 rx = re.compile(regex)
341
342 stats = [ stat.name for stat in self.allStats ]
343 stats.sort(statcmp)
344 for stat in stats:
345 stat = self.allStatNames[stat]
346 if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
347 print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
348
349 def getStat(self, stats):
350 if type(stats) is not list:
351 stats = [ stats ]
352
353 ret = []
354 for stat in stats:
355 if type(stat) is int:
356 ret.append(self.allStatIds[stat])
357
358 if type(stat) is str:
359 rx = re.compile(stat)
360 for stat in self.allStats:
361 if rx.match(stat.name):
362 ret.append(stat)
363 return ret
364
365 def getBin(self, bins):
366 if type(bins) is not list:
367 bins = [ bins ]
368
369 ret = []
370 for bin in bins:
371 if type(bin) is int:
372 ret.append(bin)
373 elif type(bin) is str:
374 ret.append(self.allBinNames[bin])
375 else:
376 for name,id in self.allBinNames.items():
377 if bin.match(name):
378 ret.append(id)
379
380 return ret
381
382 def getNotBin(self, bin):
383 map = {}
384 for bin in getBin(bin):
385 map[bin] = 1
386
387 ret = []
388 for bin in self.allBinIds.keys():
389 if not map.has_key(bin):
390 ret.append(bin)
391
392 return ret
393
394 #########################################
395 # get the data
396 #
397 def inner(self, op, stat, bins, ticks, group=False):
398 sql = 'select '
399 sql += 'dt_stat as stat, '
400 sql += 'dt_run as run, '
401 sql += 'dt_x as x, '
402 sql += 'dt_y as y, '
403 if group:
404 sql += 'dt_tick as tick, '
405 sql += '%s(dt_data) as data ' % op
406 sql += 'from data '
407 sql += 'where '
408
409 if isinstance(stat, list):
410 val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
411 sql += ' (%s)' % val
412 else:
413 sql += ' dt_stat=%d' % stat.stat
414
415 if self.runs != None and len(self.runs):
416 val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
417 sql += ' and (%s)' % val
418
419 if bins != None and len(bins):
420 val = ' or '.join([ 'dt_bin=%d' % b for b in bins ])
421 sql += ' and (%s)' % val
422
423 if ticks != None and len(ticks):
424 val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
425 sql += ' and (%s)' % val
426
427 sql += ' group by dt_stat,dt_run,dt_x,dt_y'
428 if group:
429 sql += ',dt_tick'
430 return sql
431
432 def outer(self, op_out, op_in, stat, bins, ticks):
433 sql = self.inner(op_in, stat, bins, ticks, True)
434 sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql)
435 sql += 'group by stat,run,x,y'
436 return sql
437
438 # Name: sum
439 # Desc: given a run, a stat and an array of samples and bins,
440 # sum all the bins and then get the standard deviation of the
441 # samples for non-binned runs. This will just return the average
442 # of samples, however a bin array still must be passed
443 def sum(self, stat, bins, ticks):
444 return self.inner('sum', stat, bins, ticks)
445
446 # Name: avg
447 # Desc: given a run, a stat and an array of samples and bins,
448 # sum all the bins and then average the samples for non-binned
449 # runs this will just return the average of samples, however
450 # a bin array still must be passed
451 def avg(self, stat, bins, ticks):
452 return self.outer('avg', 'sum', stat, bins, ticks)
453
454 # Name: stdev
455 # Desc: given a run, a stat and an array of samples and bins,
456 # sum all the bins and then get the standard deviation of the
457 # samples for non-binned runs. This will just return the average
458 # of samples, however a bin array still must be passed
459 def stdev(self, stat, bins, ticks):
460 return self.outer('stddev', 'sum', stat, bins, ticks)
461
462 def __setattr__(self, attr, value):
463 super(Database, self).__setattr__(attr, value)
464 if attr != 'method':
465 return
466
467 if value == 'sum':
468 self._method = self.sum
469 elif value == 'avg':
470 self._method = self.avg
471 elif value == 'stdev':
472 self._method = self.stdev
473 else:
474 raise AttributeError, "can only set get to: sum | avg | stdev"
475
476 def data(self, stat, bins=None, ticks=None):
477 if bins is None:
478 bins = self.bins
479 if ticks is None:
480 ticks = self.ticks
481 sql = self._method(self, stat, bins, ticks)
482 self.query(sql)
483
484 runs = {}
485 xmax = 0
486 ymax = 0
487 for x in self.cursor.fetchall():
488 data = Data(x)
489 if not runs.has_key(data.run):
490 runs[data.run] = {}
491 if not runs[data.run].has_key(data.x):
492 runs[data.run][data.x] = {}
493
494 xmax = max(xmax, data.x)
495 ymax = max(ymax, data.y)
496 runs[data.run][data.x][data.y] = data.data
497
498 results = Result(xmax + 1, ymax + 1)
499 for run,data in runs.iteritems():
500 result = results[run]
501 for x,ydata in data.iteritems():
502 for y,data in ydata.iteritems():
503 result[x][y] = data
504 return results
505
506 def __getitem__(self, key):
507 return self.stattop[key]