db.py revision 1929
1# Copyright (c) 2003-2004 The Regents of The University of Michigan 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer; 8# redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution; 11# neither the name of the copyright holders nor the names of its 12# contributors may be used to endorse or promote products derived from 13# this software without specific prior written permission. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27import MySQLdb, re, string 28 29def statcmp(a, b): 30 v1 = a.split('.') 31 v2 = b.split('.') 32 33 last = min(len(v1), len(v2)) - 1 34 for i,j in zip(v1[0:last], v2[0:last]): 35 if i != j: 36 return cmp(i, j) 37 38 # Special compare for last element. 39 if len(v1) == len(v2): 40 return cmp(v1[last], v2[last]) 41 else: 42 return cmp(len(v1), len(v2)) 43 44class RunData: 45 def __init__(self, row): 46 self.run = int(row[0]) 47 self.name = row[1] 48 self.user = row[2] 49 self.project = row[3] 50 51class SubData: 52 def __init__(self, row): 53 self.stat = int(row[0]) 54 self.x = int(row[1]) 55 self.y = int(row[2]) 56 self.name = row[3] 57 self.descr = row[4] 58 59class Data: 60 def __init__(self, row): 61 if len(row) != 5: 62 raise 'stat db error' 63 self.stat = int(row[0]) 64 self.run = int(row[1]) 65 self.x = int(row[2]) 66 self.y = int(row[3]) 67 self.data = float(row[4]) 68 69 def __repr__(self): 70 return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat, 71 self.run, self.x, self.y, self.data) 72 73class StatData(object): 74 def __init__(self, row): 75 self.stat = int(row[0]) 76 self.name = row[1] 77 self.desc = row[2] 78 self.type = row[3] 79 self.prereq = int(row[5]) 80 self.precision = int(row[6]) 81 82 import flags 83 self.flags = 0 84 if int(row[4]): self.flags |= flags.printable 85 if int(row[7]): self.flags |= flags.nozero 86 if int(row[8]): self.flags |= flags.nonan 87 if int(row[9]): self.flags |= flags.total 88 if int(row[10]): self.flags |= flags.pdf 89 if int(row[11]): self.flags |= flags.cdf 90 91 if self.type == 'DIST' or self.type == 'VECTORDIST': 92 self.min = float(row[12]) 93 self.max = float(row[13]) 94 self.bktsize = float(row[14]) 95 self.size = int(row[15]) 96 97 if self.type == 'FORMULA': 98 self.formula = self.db.allFormulas[self.stat] 99 100class Node(object): 101 def __init__(self, name): 102 self.name = name 103 def __str__(self): 104 return self.name 105 106class Result(object): 107 def __init__(self, x, y): 108 self.data = {} 109 self.x = x 110 self.y = y 111 112 def __contains__(self, run): 113 return run in self.data 114 115 def __getitem__(self, run): 116 if run not in self.data: 117 self.data[run] = [ [ 0.0 ] * self.y for i in xrange(self.x) ] 118 return self.data[run] 119 120class Database(object): 121 def __init__(self): 122 self.host = 'zizzer.pool' 123 self.user = '' 124 self.passwd = '' 125 self.db = 'm5stats' 126 self.cursor = None 127 128 self.allStats = [] 129 self.allStatIds = {} 130 self.allStatNames = {} 131 132 self.allSubData = {} 133 134 self.allRuns = [] 135 self.allRunIds = {} 136 self.allRunNames = {} 137 138 self.allBins = [] 139 self.allBinIds = {} 140 self.allBinNames = {} 141 142 self.allFormulas = {} 143 144 self.stattop = {} 145 self.statdict = {} 146 self.statlist = [] 147 148 self.mode = 'sum'; 149 self.runs = None 150 self.bins = None 151 self.ticks = None 152 self.method = 'sum' 153 self._method = type(self).sum 154 155 def get(self, job, stat): 156 run = self.allRunNames.get(job.name, None) 157 if run is None: 158 print 'run "%s" not found' % job 159 return None 160 161 from info import scalar, vector, value, total, len 162 stat.system = self[job.system] 163 if scalar(stat): 164 return value(stat, run.run) 165 if vector(stat): 166 return values(stat, run.run) 167 168 return None 169 170 def query(self, sql): 171 self.cursor.execute(sql) 172 173 def update_dict(self, dict): 174 dict.update(self.stattop) 175 176 def append(self, stat): 177 statname = re.sub(':', '__', stat.name) 178 path = string.split(statname, '.') 179 pathtop = path[0] 180 fullname = '' 181 182 x = self 183 while len(path) > 1: 184 name = path.pop(0) 185 if not x.__dict__.has_key(name): 186 x.__dict__[name] = Node(fullname + name) 187 x = x.__dict__[name] 188 fullname = '%s%s.' % (fullname, name) 189 190 name = path.pop(0) 191 x.__dict__[name] = stat 192 193 self.stattop[pathtop] = self.__dict__[pathtop] 194 self.statdict[statname] = stat 195 self.statlist.append(statname) 196 197 def connect(self): 198 # connect 199 self.thedb = MySQLdb.connect(db=self.db, 200 host=self.host, 201 user=self.user, 202 passwd=self.passwd) 203 204 # create a cursor 205 self.cursor = self.thedb.cursor() 206 207 self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project 208 from runs''') 209 for result in self.cursor.fetchall(): 210 run = RunData(result); 211 self.allRuns.append(run) 212 self.allRunIds[run.run] = run 213 self.allRunNames[run.name] = run 214 215 self.query('select * from bins') 216 for id,name in self.cursor.fetchall(): 217 self.allBinIds[int(id)] = name 218 self.allBinNames[name] = int(id) 219 220 self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata') 221 for result in self.cursor.fetchall(): 222 subdata = SubData(result) 223 if self.allSubData.has_key(subdata.stat): 224 self.allSubData[subdata.stat].append(subdata) 225 else: 226 self.allSubData[subdata.stat] = [ subdata ] 227 228 self.query('select * from formulas') 229 for id,formula in self.cursor.fetchall(): 230 self.allFormulas[int(id)] = formula.tostring() 231 232 StatData.db = self 233 self.query('select * from stats') 234 import info 235 for result in self.cursor.fetchall(): 236 stat = info.NewStat(self, StatData(result)) 237 self.append(stat) 238 self.allStats.append(stat) 239 self.allStatIds[stat.stat] = stat 240 self.allStatNames[stat.name] = stat 241 242 # Name: listbins 243 # Desc: Prints all bins matching regex argument, if no argument 244 # is given all bins are returned 245 def listBins(self, regex='.*'): 246 print '%-50s %-10s' % ('bin name', 'id') 247 print '-' * 61 248 names = self.allBinNames.keys() 249 names.sort() 250 for name in names: 251 id = self.allBinNames[name] 252 print '%-50s %-10d' % (name, id) 253 254 # Name: listruns 255 # Desc: Prints all runs matching a given user, if no argument 256 # is given all runs are returned 257 def listRuns(self, user=None): 258 print '%-40s %-10s %-5s' % ('run name', 'user', 'id') 259 print '-' * 62 260 for run in self.allRuns: 261 if user == None or user == run.user: 262 print '%-40s %-10s %-10d' % (run.name, run.user, run.run) 263 264 # Name: listTicks 265 # Desc: Prints all samples for a given run 266 def listTicks(self, runs=None): 267 print "tick" 268 print "----------------------------------------" 269 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 270 if runs != None: 271 first = True 272 for run in runs: 273 if first: 274 # sql += ' where' 275 first = False 276 else: 277 sql += ' or' 278 sql += ' dt_run=%s' % run.run 279 sql += ')' 280 self.query(sql) 281 for r in self.cursor.fetchall(): 282 print r[0] 283 284 # Name: retTicks 285 # Desc: Prints all samples for a given run 286 def retTicks(self, runs=None): 287 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 288 if runs != None: 289 first = True 290 for run in runs: 291 if first: 292 first = False 293 else: 294 sql += ' or' 295 sql += ' dt_run=%s' % run.run 296 sql += ')' 297 self.query(sql) 298 ret = [] 299 for r in self.cursor.fetchall(): 300 ret.append(r[0]) 301 return ret 302 303 # Name: liststats 304 # Desc: Prints all statistics that appear in the database, 305 # the optional argument is a regular expression that can 306 # be used to prune the result set 307 def listStats(self, regex=None): 308 print '%-60s %-8s %-10s' % ('stat name', 'id', 'type') 309 print '-' * 80 310 311 rx = None 312 if regex != None: 313 rx = re.compile(regex) 314 315 stats = [ stat.name for stat in self.allStats ] 316 stats.sort(statcmp) 317 for stat in stats: 318 stat = self.allStatNames[stat] 319 if rx == None or rx.match(stat.name): 320 print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type) 321 322 # Name: liststats 323 # Desc: Prints all statistics that appear in the database, 324 # the optional argument is a regular expression that can 325 # be used to prune the result set 326 def listFormulas(self, regex=None): 327 print '%-60s %s' % ('formula name', 'formula') 328 print '-' * 80 329 330 rx = None 331 if regex != None: 332 rx = re.compile(regex) 333 334 stats = [ stat.name for stat in self.allStats ] 335 stats.sort(statcmp) 336 for stat in stats: 337 stat = self.allStatNames[stat] 338 if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)): 339 print '%-60s %s' % (stat.name, self.allFormulas[stat.stat]) 340 341 def getStat(self, stats): 342 if type(stats) is not list: 343 stats = [ stats ] 344 345 ret = [] 346 for stat in stats: 347 if type(stat) is int: 348 ret.append(self.allStatIds[stat]) 349 350 if type(stat) is str: 351 rx = re.compile(stat) 352 for stat in self.allStats: 353 if rx.match(stat.name): 354 ret.append(stat) 355 return ret 356 357 def getBin(self, bins): 358 if type(bins) is not list: 359 bins = [ bins ] 360 361 ret = [] 362 for bin in bins: 363 if type(bin) is int: 364 ret.append(bin) 365 elif type(bin) is str: 366 ret.append(self.allBinNames[bin]) 367 else: 368 for name,id in self.allBinNames.items(): 369 if bin.match(name): 370 ret.append(id) 371 372 return ret 373 374 def getNotBin(self, bin): 375 map = {} 376 for bin in getBin(bin): 377 map[bin] = 1 378 379 ret = [] 380 for bin in self.allBinIds.keys(): 381 if not map.has_key(bin): 382 ret.append(bin) 383 384 return ret 385 386 ######################################### 387 # get the data 388 # 389 def inner(self, op, stat, bins, ticks, group=False): 390 sql = 'select ' 391 sql += 'dt_stat as stat, ' 392 sql += 'dt_run as run, ' 393 sql += 'dt_x as x, ' 394 sql += 'dt_y as y, ' 395 if group: 396 sql += 'dt_tick as tick, ' 397 sql += '%s(dt_data) as data ' % op 398 sql += 'from data ' 399 sql += 'where ' 400 401 if isinstance(stat, list): 402 val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ]) 403 sql += ' (%s)' % val 404 else: 405 sql += ' dt_stat=%d' % stat.stat 406 407 if self.runs != None and len(self.runs): 408 val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ]) 409 sql += ' and (%s)' % val 410 411 if bins != None and len(bins): 412 val = ' or '.join([ 'dt_bin=%d' % b for b in bins ]) 413 sql += ' and (%s)' % val 414 415 if ticks != None and len(ticks): 416 val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ]) 417 sql += ' and (%s)' % val 418 419 sql += ' group by dt_stat,dt_run,dt_x,dt_y' 420 if group: 421 sql += ',dt_tick' 422 return sql 423 424 def outer(self, op_out, op_in, stat, bins, ticks): 425 sql = self.inner(op_in, stat, bins, ticks, True) 426 sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql) 427 sql += 'group by stat,run,x,y' 428 return sql 429 430 # Name: sum 431 # Desc: given a run, a stat and an array of samples and bins, 432 # sum all the bins and then get the standard deviation of the 433 # samples for non-binned runs. This will just return the average 434 # of samples, however a bin array still must be passed 435 def sum(self, stat, bins, ticks): 436 return self.inner('sum', stat, bins, ticks) 437 438 # Name: avg 439 # Desc: given a run, a stat and an array of samples and bins, 440 # sum all the bins and then average the samples for non-binned 441 # runs this will just return the average of samples, however 442 # a bin array still must be passed 443 def avg(self, stat, bins, ticks): 444 return self.outer('avg', 'sum', stat, bins, ticks) 445 446 # Name: stdev 447 # Desc: given a run, a stat and an array of samples and bins, 448 # sum all the bins and then get the standard deviation of the 449 # samples for non-binned runs. This will just return the average 450 # of samples, however a bin array still must be passed 451 def stdev(self, stat, bins, ticks): 452 return self.outer('stddev', 'sum', stat, bins, ticks) 453 454 def __setattr__(self, attr, value): 455 super(Database, self).__setattr__(attr, value) 456 if attr != 'method': 457 return 458 459 if value == 'sum': 460 self._method = self.sum 461 elif value == 'avg': 462 self._method = self.avg 463 elif value == 'stdev': 464 self._method = self.stdev 465 else: 466 raise AttributeError, "can only set get to: sum | avg | stdev" 467 468 def data(self, stat, bins=None, ticks=None): 469 if bins is None: 470 bins = self.bins 471 if ticks is None: 472 ticks = self.ticks 473 sql = self._method(self, stat, bins, ticks) 474 self.query(sql) 475 476 runs = {} 477 xmax = 0 478 ymax = 0 479 for x in self.cursor.fetchall(): 480 data = Data(x) 481 if not runs.has_key(data.run): 482 runs[data.run] = {} 483 if not runs[data.run].has_key(data.x): 484 runs[data.run][data.x] = {} 485 486 xmax = max(xmax, data.x) 487 ymax = max(ymax, data.y) 488 runs[data.run][data.x][data.y] = data.data 489 490 results = Result(xmax + 1, ymax + 1) 491 for run,data in runs.iteritems(): 492 result = results[run] 493 for x,ydata in data.iteritems(): 494 for y,data in ydata.iteritems(): 495 result[x][y] = data 496 return results 497 498 def __getitem__(self, key): 499 return self.stattop[key] 500