db.py revision 2665:a124942bacb8
1# Copyright (c) 2003-2004 The Regents of The University of Michigan 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer; 8# redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution; 11# neither the name of the copyright holders nor the names of its 12# contributors may be used to endorse or promote products derived from 13# this software without specific prior written permission. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26# 27# Authors: Nathan Binkert 28 29import MySQLdb, re, string 30 31def statcmp(a, b): 32 v1 = a.split('.') 33 v2 = b.split('.') 34 35 last = min(len(v1), len(v2)) - 1 36 for i,j in zip(v1[0:last], v2[0:last]): 37 if i != j: 38 return cmp(i, j) 39 40 # Special compare for last element. 41 if len(v1) == len(v2): 42 return cmp(v1[last], v2[last]) 43 else: 44 return cmp(len(v1), len(v2)) 45 46class RunData: 47 def __init__(self, row): 48 self.run = int(row[0]) 49 self.name = row[1] 50 self.user = row[2] 51 self.project = row[3] 52 53class SubData: 54 def __init__(self, row): 55 self.stat = int(row[0]) 56 self.x = int(row[1]) 57 self.y = int(row[2]) 58 self.name = row[3] 59 self.descr = row[4] 60 61class Data: 62 def __init__(self, row): 63 if len(row) != 5: 64 raise 'stat db error' 65 self.stat = int(row[0]) 66 self.run = int(row[1]) 67 self.x = int(row[2]) 68 self.y = int(row[3]) 69 self.data = float(row[4]) 70 71 def __repr__(self): 72 return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat, 73 self.run, self.x, self.y, self.data) 74 75class StatData(object): 76 def __init__(self, row): 77 self.stat = int(row[0]) 78 self.name = row[1] 79 self.desc = row[2] 80 self.type = row[3] 81 self.prereq = int(row[5]) 82 self.precision = int(row[6]) 83 84 import flags 85 self.flags = 0 86 if int(row[4]): self.flags |= flags.printable 87 if int(row[7]): self.flags |= flags.nozero 88 if int(row[8]): self.flags |= flags.nonan 89 if int(row[9]): self.flags |= flags.total 90 if int(row[10]): self.flags |= flags.pdf 91 if int(row[11]): self.flags |= flags.cdf 92 93 if self.type == 'DIST' or self.type == 'VECTORDIST': 94 self.min = float(row[12]) 95 self.max = float(row[13]) 96 self.bktsize = float(row[14]) 97 self.size = int(row[15]) 98 99 if self.type == 'FORMULA': 100 self.formula = self.db.allFormulas[self.stat] 101 102class Node(object): 103 def __init__(self, name): 104 self.name = name 105 def __str__(self): 106 return self.name 107 108class Result(object): 109 def __init__(self, x, y): 110 self.data = {} 111 self.x = x 112 self.y = y 113 114 def __contains__(self, run): 115 return run in self.data 116 117 def __getitem__(self, run): 118 if run not in self.data: 119 self.data[run] = [ [ 0.0 ] * self.y for i in xrange(self.x) ] 120 return self.data[run] 121 122class Database(object): 123 def __init__(self): 124 self.host = 'zizzer.pool' 125 self.user = '' 126 self.passwd = '' 127 self.db = 'm5stats' 128 self.cursor = None 129 130 self.allStats = [] 131 self.allStatIds = {} 132 self.allStatNames = {} 133 134 self.allSubData = {} 135 136 self.allRuns = [] 137 self.allRunIds = {} 138 self.allRunNames = {} 139 140 self.allBins = [] 141 self.allBinIds = {} 142 self.allBinNames = {} 143 144 self.allFormulas = {} 145 146 self.stattop = {} 147 self.statdict = {} 148 self.statlist = [] 149 150 self.mode = 'sum'; 151 self.runs = None 152 self.bins = None 153 self.ticks = None 154 self.method = 'sum' 155 self._method = type(self).sum 156 157 def get(self, job, stat, system=None): 158 run = self.allRunNames.get(str(job), None) 159 if run is None: 160 return None 161 162 from info import ProxyError, scalar, vector, value, values, total, len 163 if system is None and hasattr(job, 'system'): 164 system = job.system 165 166 if system is not None: 167 stat.system = self[system] 168 try: 169 if scalar(stat): 170 return value(stat, run.run) 171 if vector(stat): 172 return values(stat, run.run) 173 except ProxyError: 174 return None 175 176 return None 177 178 def query(self, sql): 179 self.cursor.execute(sql) 180 181 def update_dict(self, dict): 182 dict.update(self.stattop) 183 184 def append(self, stat): 185 statname = re.sub(':', '__', stat.name) 186 path = string.split(statname, '.') 187 pathtop = path[0] 188 fullname = '' 189 190 x = self 191 while len(path) > 1: 192 name = path.pop(0) 193 if not x.__dict__.has_key(name): 194 x.__dict__[name] = Node(fullname + name) 195 x = x.__dict__[name] 196 fullname = '%s%s.' % (fullname, name) 197 198 name = path.pop(0) 199 x.__dict__[name] = stat 200 201 self.stattop[pathtop] = self.__dict__[pathtop] 202 self.statdict[statname] = stat 203 self.statlist.append(statname) 204 205 def connect(self): 206 # connect 207 self.thedb = MySQLdb.connect(db=self.db, 208 host=self.host, 209 user=self.user, 210 passwd=self.passwd) 211 212 # create a cursor 213 self.cursor = self.thedb.cursor() 214 215 self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project 216 from runs''') 217 for result in self.cursor.fetchall(): 218 run = RunData(result); 219 self.allRuns.append(run) 220 self.allRunIds[run.run] = run 221 self.allRunNames[run.name] = run 222 223 self.query('select * from bins') 224 for id,name in self.cursor.fetchall(): 225 self.allBinIds[int(id)] = name 226 self.allBinNames[name] = int(id) 227 228 self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata') 229 for result in self.cursor.fetchall(): 230 subdata = SubData(result) 231 if self.allSubData.has_key(subdata.stat): 232 self.allSubData[subdata.stat].append(subdata) 233 else: 234 self.allSubData[subdata.stat] = [ subdata ] 235 236 self.query('select * from formulas') 237 for id,formula in self.cursor.fetchall(): 238 self.allFormulas[int(id)] = formula.tostring() 239 240 StatData.db = self 241 self.query('select * from stats') 242 import info 243 for result in self.cursor.fetchall(): 244 stat = info.NewStat(self, StatData(result)) 245 self.append(stat) 246 self.allStats.append(stat) 247 self.allStatIds[stat.stat] = stat 248 self.allStatNames[stat.name] = stat 249 250 # Name: listbins 251 # Desc: Prints all bins matching regex argument, if no argument 252 # is given all bins are returned 253 def listBins(self, regex='.*'): 254 print '%-50s %-10s' % ('bin name', 'id') 255 print '-' * 61 256 names = self.allBinNames.keys() 257 names.sort() 258 for name in names: 259 id = self.allBinNames[name] 260 print '%-50s %-10d' % (name, id) 261 262 # Name: listruns 263 # Desc: Prints all runs matching a given user, if no argument 264 # is given all runs are returned 265 def listRuns(self, user=None): 266 print '%-40s %-10s %-5s' % ('run name', 'user', 'id') 267 print '-' * 62 268 for run in self.allRuns: 269 if user == None or user == run.user: 270 print '%-40s %-10s %-10d' % (run.name, run.user, run.run) 271 272 # Name: listTicks 273 # Desc: Prints all samples for a given run 274 def listTicks(self, runs=None): 275 print "tick" 276 print "----------------------------------------" 277 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 278 if runs != None: 279 first = True 280 for run in runs: 281 if first: 282 # sql += ' where' 283 first = False 284 else: 285 sql += ' or' 286 sql += ' dt_run=%s' % run.run 287 sql += ')' 288 self.query(sql) 289 for r in self.cursor.fetchall(): 290 print r[0] 291 292 # Name: retTicks 293 # Desc: Prints all samples for a given run 294 def retTicks(self, runs=None): 295 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 296 if runs != None: 297 first = True 298 for run in runs: 299 if first: 300 first = False 301 else: 302 sql += ' or' 303 sql += ' dt_run=%s' % run.run 304 sql += ')' 305 self.query(sql) 306 ret = [] 307 for r in self.cursor.fetchall(): 308 ret.append(r[0]) 309 return ret 310 311 # Name: liststats 312 # Desc: Prints all statistics that appear in the database, 313 # the optional argument is a regular expression that can 314 # be used to prune the result set 315 def listStats(self, regex=None): 316 print '%-60s %-8s %-10s' % ('stat name', 'id', 'type') 317 print '-' * 80 318 319 rx = None 320 if regex != None: 321 rx = re.compile(regex) 322 323 stats = [ stat.name for stat in self.allStats ] 324 stats.sort(statcmp) 325 for stat in stats: 326 stat = self.allStatNames[stat] 327 if rx == None or rx.match(stat.name): 328 print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type) 329 330 # Name: liststats 331 # Desc: Prints all statistics that appear in the database, 332 # the optional argument is a regular expression that can 333 # be used to prune the result set 334 def listFormulas(self, regex=None): 335 print '%-60s %s' % ('formula name', 'formula') 336 print '-' * 80 337 338 rx = None 339 if regex != None: 340 rx = re.compile(regex) 341 342 stats = [ stat.name for stat in self.allStats ] 343 stats.sort(statcmp) 344 for stat in stats: 345 stat = self.allStatNames[stat] 346 if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)): 347 print '%-60s %s' % (stat.name, self.allFormulas[stat.stat]) 348 349 def getStat(self, stats): 350 if type(stats) is not list: 351 stats = [ stats ] 352 353 ret = [] 354 for stat in stats: 355 if type(stat) is int: 356 ret.append(self.allStatIds[stat]) 357 358 if type(stat) is str: 359 rx = re.compile(stat) 360 for stat in self.allStats: 361 if rx.match(stat.name): 362 ret.append(stat) 363 return ret 364 365 def getBin(self, bins): 366 if type(bins) is not list: 367 bins = [ bins ] 368 369 ret = [] 370 for bin in bins: 371 if type(bin) is int: 372 ret.append(bin) 373 elif type(bin) is str: 374 ret.append(self.allBinNames[bin]) 375 else: 376 for name,id in self.allBinNames.items(): 377 if bin.match(name): 378 ret.append(id) 379 380 return ret 381 382 def getNotBin(self, bin): 383 map = {} 384 for bin in getBin(bin): 385 map[bin] = 1 386 387 ret = [] 388 for bin in self.allBinIds.keys(): 389 if not map.has_key(bin): 390 ret.append(bin) 391 392 return ret 393 394 ######################################### 395 # get the data 396 # 397 def inner(self, op, stat, bins, ticks, group=False): 398 sql = 'select ' 399 sql += 'dt_stat as stat, ' 400 sql += 'dt_run as run, ' 401 sql += 'dt_x as x, ' 402 sql += 'dt_y as y, ' 403 if group: 404 sql += 'dt_tick as tick, ' 405 sql += '%s(dt_data) as data ' % op 406 sql += 'from data ' 407 sql += 'where ' 408 409 if isinstance(stat, list): 410 val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ]) 411 sql += ' (%s)' % val 412 else: 413 sql += ' dt_stat=%d' % stat.stat 414 415 if self.runs != None and len(self.runs): 416 val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ]) 417 sql += ' and (%s)' % val 418 419 if bins != None and len(bins): 420 val = ' or '.join([ 'dt_bin=%d' % b for b in bins ]) 421 sql += ' and (%s)' % val 422 423 if ticks != None and len(ticks): 424 val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ]) 425 sql += ' and (%s)' % val 426 427 sql += ' group by dt_stat,dt_run,dt_x,dt_y' 428 if group: 429 sql += ',dt_tick' 430 return sql 431 432 def outer(self, op_out, op_in, stat, bins, ticks): 433 sql = self.inner(op_in, stat, bins, ticks, True) 434 sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql) 435 sql += 'group by stat,run,x,y' 436 return sql 437 438 # Name: sum 439 # Desc: given a run, a stat and an array of samples and bins, 440 # sum all the bins and then get the standard deviation of the 441 # samples for non-binned runs. This will just return the average 442 # of samples, however a bin array still must be passed 443 def sum(self, stat, bins, ticks): 444 return self.inner('sum', stat, bins, ticks) 445 446 # Name: avg 447 # Desc: given a run, a stat and an array of samples and bins, 448 # sum all the bins and then average the samples for non-binned 449 # runs this will just return the average of samples, however 450 # a bin array still must be passed 451 def avg(self, stat, bins, ticks): 452 return self.outer('avg', 'sum', stat, bins, ticks) 453 454 # Name: stdev 455 # Desc: given a run, a stat and an array of samples and bins, 456 # sum all the bins and then get the standard deviation of the 457 # samples for non-binned runs. This will just return the average 458 # of samples, however a bin array still must be passed 459 def stdev(self, stat, bins, ticks): 460 return self.outer('stddev', 'sum', stat, bins, ticks) 461 462 def __setattr__(self, attr, value): 463 super(Database, self).__setattr__(attr, value) 464 if attr != 'method': 465 return 466 467 if value == 'sum': 468 self._method = self.sum 469 elif value == 'avg': 470 self._method = self.avg 471 elif value == 'stdev': 472 self._method = self.stdev 473 else: 474 raise AttributeError, "can only set get to: sum | avg | stdev" 475 476 def data(self, stat, bins=None, ticks=None): 477 if bins is None: 478 bins = self.bins 479 if ticks is None: 480 ticks = self.ticks 481 sql = self._method(self, stat, bins, ticks) 482 self.query(sql) 483 484 runs = {} 485 xmax = 0 486 ymax = 0 487 for x in self.cursor.fetchall(): 488 data = Data(x) 489 if not runs.has_key(data.run): 490 runs[data.run] = {} 491 if not runs[data.run].has_key(data.x): 492 runs[data.run][data.x] = {} 493 494 xmax = max(xmax, data.x) 495 ymax = max(ymax, data.y) 496 runs[data.run][data.x][data.y] = data.data 497 498 results = Result(xmax + 1, ymax + 1) 499 for run,data in runs.iteritems(): 500 result = results[run] 501 for x,ydata in data.iteritems(): 502 for y,data in ydata.iteritems(): 503 result[x][y] = data 504 return results 505 506 def __getitem__(self, key): 507 return self.stattop[key] 508