db.py revision 2014
1# Copyright (c) 2003-2004 The Regents of The University of Michigan 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer; 8# redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution; 11# neither the name of the copyright holders nor the names of its 12# contributors may be used to endorse or promote products derived from 13# this software without specific prior written permission. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27import MySQLdb, re, string 28 29def statcmp(a, b): 30 v1 = a.split('.') 31 v2 = b.split('.') 32 33 last = min(len(v1), len(v2)) - 1 34 for i,j in zip(v1[0:last], v2[0:last]): 35 if i != j: 36 return cmp(i, j) 37 38 # Special compare for last element. 39 if len(v1) == len(v2): 40 return cmp(v1[last], v2[last]) 41 else: 42 return cmp(len(v1), len(v2)) 43 44class RunData: 45 def __init__(self, row): 46 self.run = int(row[0]) 47 self.name = row[1] 48 self.user = row[2] 49 self.project = row[3] 50 51class SubData: 52 def __init__(self, row): 53 self.stat = int(row[0]) 54 self.x = int(row[1]) 55 self.y = int(row[2]) 56 self.name = row[3] 57 self.descr = row[4] 58 59class Data: 60 def __init__(self, row): 61 if len(row) != 5: 62 raise 'stat db error' 63 self.stat = int(row[0]) 64 self.run = int(row[1]) 65 self.x = int(row[2]) 66 self.y = int(row[3]) 67 self.data = float(row[4]) 68 69 def __repr__(self): 70 return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat, 71 self.run, self.x, self.y, self.data) 72 73class StatData(object): 74 def __init__(self, row): 75 self.stat = int(row[0]) 76 self.name = row[1] 77 self.desc = row[2] 78 self.type = row[3] 79 self.prereq = int(row[5]) 80 self.precision = int(row[6]) 81 82 import flags 83 self.flags = 0 84 if int(row[4]): self.flags |= flags.printable 85 if int(row[7]): self.flags |= flags.nozero 86 if int(row[8]): self.flags |= flags.nonan 87 if int(row[9]): self.flags |= flags.total 88 if int(row[10]): self.flags |= flags.pdf 89 if int(row[11]): self.flags |= flags.cdf 90 91 if self.type == 'DIST' or self.type == 'VECTORDIST': 92 self.min = float(row[12]) 93 self.max = float(row[13]) 94 self.bktsize = float(row[14]) 95 self.size = int(row[15]) 96 97 if self.type == 'FORMULA': 98 self.formula = self.db.allFormulas[self.stat] 99 100class Node(object): 101 def __init__(self, name): 102 self.name = name 103 def __str__(self): 104 return self.name 105 106class Result(object): 107 def __init__(self, x, y): 108 self.data = {} 109 self.x = x 110 self.y = y 111 112 def __contains__(self, run): 113 return run in self.data 114 115 def __getitem__(self, run): 116 if run not in self.data: 117 self.data[run] = [ [ 0.0 ] * self.y for i in xrange(self.x) ] 118 return self.data[run] 119 120class Database(object): 121 def __init__(self): 122 self.host = 'zizzer.pool' 123 self.user = '' 124 self.passwd = '' 125 self.db = 'm5stats' 126 self.cursor = None 127 128 self.allStats = [] 129 self.allStatIds = {} 130 self.allStatNames = {} 131 132 self.allSubData = {} 133 134 self.allRuns = [] 135 self.allRunIds = {} 136 self.allRunNames = {} 137 138 self.allBins = [] 139 self.allBinIds = {} 140 self.allBinNames = {} 141 142 self.allFormulas = {} 143 144 self.stattop = {} 145 self.statdict = {} 146 self.statlist = [] 147 148 self.mode = 'sum'; 149 self.runs = None 150 self.bins = None 151 self.ticks = None 152 self.method = 'sum' 153 self._method = type(self).sum 154 155 def get(self, job, stat, system=None): 156 run = self.allRunNames.get(str(job), None) 157 if run is None: 158 return None 159 160 from info import ProxyError, scalar, vector, value, values, total, len 161 if system is None and hasattr('system', job): 162 system = job.system 163 164 if system is not None: 165 stat.system = self[system] 166 try: 167 if scalar(stat): 168 return value(stat, run.run) 169 if vector(stat): 170 return values(stat, run.run) 171 except ProxyError: 172 return None 173 174 return None 175 176 def query(self, sql): 177 self.cursor.execute(sql) 178 179 def update_dict(self, dict): 180 dict.update(self.stattop) 181 182 def append(self, stat): 183 statname = re.sub(':', '__', stat.name) 184 path = string.split(statname, '.') 185 pathtop = path[0] 186 fullname = '' 187 188 x = self 189 while len(path) > 1: 190 name = path.pop(0) 191 if not x.__dict__.has_key(name): 192 x.__dict__[name] = Node(fullname + name) 193 x = x.__dict__[name] 194 fullname = '%s%s.' % (fullname, name) 195 196 name = path.pop(0) 197 x.__dict__[name] = stat 198 199 self.stattop[pathtop] = self.__dict__[pathtop] 200 self.statdict[statname] = stat 201 self.statlist.append(statname) 202 203 def connect(self): 204 # connect 205 self.thedb = MySQLdb.connect(db=self.db, 206 host=self.host, 207 user=self.user, 208 passwd=self.passwd) 209 210 # create a cursor 211 self.cursor = self.thedb.cursor() 212 213 self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project 214 from runs''') 215 for result in self.cursor.fetchall(): 216 run = RunData(result); 217 self.allRuns.append(run) 218 self.allRunIds[run.run] = run 219 self.allRunNames[run.name] = run 220 221 self.query('select * from bins') 222 for id,name in self.cursor.fetchall(): 223 self.allBinIds[int(id)] = name 224 self.allBinNames[name] = int(id) 225 226 self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata') 227 for result in self.cursor.fetchall(): 228 subdata = SubData(result) 229 if self.allSubData.has_key(subdata.stat): 230 self.allSubData[subdata.stat].append(subdata) 231 else: 232 self.allSubData[subdata.stat] = [ subdata ] 233 234 self.query('select * from formulas') 235 for id,formula in self.cursor.fetchall(): 236 self.allFormulas[int(id)] = formula.tostring() 237 238 StatData.db = self 239 self.query('select * from stats') 240 import info 241 for result in self.cursor.fetchall(): 242 stat = info.NewStat(self, StatData(result)) 243 self.append(stat) 244 self.allStats.append(stat) 245 self.allStatIds[stat.stat] = stat 246 self.allStatNames[stat.name] = stat 247 248 # Name: listbins 249 # Desc: Prints all bins matching regex argument, if no argument 250 # is given all bins are returned 251 def listBins(self, regex='.*'): 252 print '%-50s %-10s' % ('bin name', 'id') 253 print '-' * 61 254 names = self.allBinNames.keys() 255 names.sort() 256 for name in names: 257 id = self.allBinNames[name] 258 print '%-50s %-10d' % (name, id) 259 260 # Name: listruns 261 # Desc: Prints all runs matching a given user, if no argument 262 # is given all runs are returned 263 def listRuns(self, user=None): 264 print '%-40s %-10s %-5s' % ('run name', 'user', 'id') 265 print '-' * 62 266 for run in self.allRuns: 267 if user == None or user == run.user: 268 print '%-40s %-10s %-10d' % (run.name, run.user, run.run) 269 270 # Name: listTicks 271 # Desc: Prints all samples for a given run 272 def listTicks(self, runs=None): 273 print "tick" 274 print "----------------------------------------" 275 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 276 if runs != None: 277 first = True 278 for run in runs: 279 if first: 280 # sql += ' where' 281 first = False 282 else: 283 sql += ' or' 284 sql += ' dt_run=%s' % run.run 285 sql += ')' 286 self.query(sql) 287 for r in self.cursor.fetchall(): 288 print r[0] 289 290 # Name: retTicks 291 # Desc: Prints all samples for a given run 292 def retTicks(self, runs=None): 293 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 294 if runs != None: 295 first = True 296 for run in runs: 297 if first: 298 first = False 299 else: 300 sql += ' or' 301 sql += ' dt_run=%s' % run.run 302 sql += ')' 303 self.query(sql) 304 ret = [] 305 for r in self.cursor.fetchall(): 306 ret.append(r[0]) 307 return ret 308 309 # Name: liststats 310 # Desc: Prints all statistics that appear in the database, 311 # the optional argument is a regular expression that can 312 # be used to prune the result set 313 def listStats(self, regex=None): 314 print '%-60s %-8s %-10s' % ('stat name', 'id', 'type') 315 print '-' * 80 316 317 rx = None 318 if regex != None: 319 rx = re.compile(regex) 320 321 stats = [ stat.name for stat in self.allStats ] 322 stats.sort(statcmp) 323 for stat in stats: 324 stat = self.allStatNames[stat] 325 if rx == None or rx.match(stat.name): 326 print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type) 327 328 # Name: liststats 329 # Desc: Prints all statistics that appear in the database, 330 # the optional argument is a regular expression that can 331 # be used to prune the result set 332 def listFormulas(self, regex=None): 333 print '%-60s %s' % ('formula name', 'formula') 334 print '-' * 80 335 336 rx = None 337 if regex != None: 338 rx = re.compile(regex) 339 340 stats = [ stat.name for stat in self.allStats ] 341 stats.sort(statcmp) 342 for stat in stats: 343 stat = self.allStatNames[stat] 344 if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)): 345 print '%-60s %s' % (stat.name, self.allFormulas[stat.stat]) 346 347 def getStat(self, stats): 348 if type(stats) is not list: 349 stats = [ stats ] 350 351 ret = [] 352 for stat in stats: 353 if type(stat) is int: 354 ret.append(self.allStatIds[stat]) 355 356 if type(stat) is str: 357 rx = re.compile(stat) 358 for stat in self.allStats: 359 if rx.match(stat.name): 360 ret.append(stat) 361 return ret 362 363 def getBin(self, bins): 364 if type(bins) is not list: 365 bins = [ bins ] 366 367 ret = [] 368 for bin in bins: 369 if type(bin) is int: 370 ret.append(bin) 371 elif type(bin) is str: 372 ret.append(self.allBinNames[bin]) 373 else: 374 for name,id in self.allBinNames.items(): 375 if bin.match(name): 376 ret.append(id) 377 378 return ret 379 380 def getNotBin(self, bin): 381 map = {} 382 for bin in getBin(bin): 383 map[bin] = 1 384 385 ret = [] 386 for bin in self.allBinIds.keys(): 387 if not map.has_key(bin): 388 ret.append(bin) 389 390 return ret 391 392 ######################################### 393 # get the data 394 # 395 def inner(self, op, stat, bins, ticks, group=False): 396 sql = 'select ' 397 sql += 'dt_stat as stat, ' 398 sql += 'dt_run as run, ' 399 sql += 'dt_x as x, ' 400 sql += 'dt_y as y, ' 401 if group: 402 sql += 'dt_tick as tick, ' 403 sql += '%s(dt_data) as data ' % op 404 sql += 'from data ' 405 sql += 'where ' 406 407 if isinstance(stat, list): 408 val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ]) 409 sql += ' (%s)' % val 410 else: 411 sql += ' dt_stat=%d' % stat.stat 412 413 if self.runs != None and len(self.runs): 414 val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ]) 415 sql += ' and (%s)' % val 416 417 if bins != None and len(bins): 418 val = ' or '.join([ 'dt_bin=%d' % b for b in bins ]) 419 sql += ' and (%s)' % val 420 421 if ticks != None and len(ticks): 422 val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ]) 423 sql += ' and (%s)' % val 424 425 sql += ' group by dt_stat,dt_run,dt_x,dt_y' 426 if group: 427 sql += ',dt_tick' 428 return sql 429 430 def outer(self, op_out, op_in, stat, bins, ticks): 431 sql = self.inner(op_in, stat, bins, ticks, True) 432 sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql) 433 sql += 'group by stat,run,x,y' 434 return sql 435 436 # Name: sum 437 # Desc: given a run, a stat and an array of samples and bins, 438 # sum all the bins and then get the standard deviation of the 439 # samples for non-binned runs. This will just return the average 440 # of samples, however a bin array still must be passed 441 def sum(self, stat, bins, ticks): 442 return self.inner('sum', stat, bins, ticks) 443 444 # Name: avg 445 # Desc: given a run, a stat and an array of samples and bins, 446 # sum all the bins and then average the samples for non-binned 447 # runs this will just return the average of samples, however 448 # a bin array still must be passed 449 def avg(self, stat, bins, ticks): 450 return self.outer('avg', 'sum', stat, bins, ticks) 451 452 # Name: stdev 453 # Desc: given a run, a stat and an array of samples and bins, 454 # sum all the bins and then get the standard deviation of the 455 # samples for non-binned runs. This will just return the average 456 # of samples, however a bin array still must be passed 457 def stdev(self, stat, bins, ticks): 458 return self.outer('stddev', 'sum', stat, bins, ticks) 459 460 def __setattr__(self, attr, value): 461 super(Database, self).__setattr__(attr, value) 462 if attr != 'method': 463 return 464 465 if value == 'sum': 466 self._method = self.sum 467 elif value == 'avg': 468 self._method = self.avg 469 elif value == 'stdev': 470 self._method = self.stdev 471 else: 472 raise AttributeError, "can only set get to: sum | avg | stdev" 473 474 def data(self, stat, bins=None, ticks=None): 475 if bins is None: 476 bins = self.bins 477 if ticks is None: 478 ticks = self.ticks 479 sql = self._method(self, stat, bins, ticks) 480 self.query(sql) 481 482 runs = {} 483 xmax = 0 484 ymax = 0 485 for x in self.cursor.fetchall(): 486 data = Data(x) 487 if not runs.has_key(data.run): 488 runs[data.run] = {} 489 if not runs[data.run].has_key(data.x): 490 runs[data.run][data.x] = {} 491 492 xmax = max(xmax, data.x) 493 ymax = max(ymax, data.y) 494 runs[data.run][data.x][data.y] = data.data 495 496 results = Result(xmax + 1, ymax + 1) 497 for run,data in runs.iteritems(): 498 result = results[run] 499 for x,ydata in data.iteritems(): 500 for y,data in ydata.iteritems(): 501 result[x][y] = data 502 return results 503 504 def __getitem__(self, key): 505 return self.stattop[key] 506