1# Copyright (c) 2003-2004 The Regents of The University of Michigan 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer; 8# redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution; 11# neither the name of the copyright holders nor the names of its 12# contributors may be used to endorse or promote products derived from 13# this software without specific prior written permission. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27import MySQLdb, re, string 28 29def statcmp(a, b): 30 v1 = a.split('.') 31 v2 = b.split('.') 32 33 last = min(len(v1), len(v2)) - 1 34 for i,j in zip(v1[0:last], v2[0:last]): 35 if i != j: 36 return cmp(i, j) 37 38 # Special compare for last element. 39 if len(v1) == len(v2): 40 return cmp(v1[last], v2[last]) 41 else: 42 return cmp(len(v1), len(v2)) 43 44class RunData: 45 def __init__(self, row): 46 self.run = int(row[0]) 47 self.name = row[1] 48 self.user = row[2] 49 self.project = row[3] 50 51class SubData: 52 def __init__(self, row): 53 self.stat = int(row[0]) 54 self.x = int(row[1]) 55 self.y = int(row[2]) 56 self.name = row[3] 57 self.descr = row[4] 58 59class Data: 60 def __init__(self, row): 61 if len(row) != 5: 62 raise 'stat db error' 63 self.stat = int(row[0]) 64 self.run = int(row[1]) 65 self.x = int(row[2]) 66 self.y = int(row[3]) 67 self.data = float(row[4]) 68 69 def __repr__(self): 70 return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat, 71 self.run, self.x, self.y, self.data) 72 73class StatData(object): 74 def __init__(self, row): 75 self.stat = int(row[0]) 76 self.name = row[1] 77 self.desc = row[2] 78 self.type = row[3] 79 self.prereq = int(row[5]) 80 self.precision = int(row[6]) 81 82 import flags 83 self.flags = 0 84 if int(row[4]): self.flags |= flags.printable 85 if int(row[7]): self.flags |= flags.nozero 86 if int(row[8]): self.flags |= flags.nonan 87 if int(row[9]): self.flags |= flags.total 88 if int(row[10]): self.flags |= flags.pdf 89 if int(row[11]): self.flags |= flags.cdf 90 91 if self.type == 'DIST' or self.type == 'VECTORDIST': 92 self.min = float(row[12]) 93 self.max = float(row[13]) 94 self.bktsize = float(row[14]) 95 self.size = int(row[15]) 96 97 if self.type == 'FORMULA': 98 self.formula = self.db.allFormulas[self.stat] 99 100class Node(object): 101 def __init__(self, name): 102 self.name = name 103 def __str__(self): 104 return self.name 105 106class Result(object): 107 def __init__(self, x, y): 108 self.data = {} 109 self.x = x 110 self.y = y 111 112 def __contains__(self, run): 113 return run in self.data 114 115 def __getitem__(self, run): 116 if run not in self.data: 117 self.data[run] = [ [ 0.0 ] * self.y for i in xrange(self.x) ] 118 return self.data[run] 119 120class Database(object): 121 def __init__(self): 122 self.host = 'zizzer.pool' 123 self.user = '' 124 self.passwd = '' 125 self.db = 'm5stats' 126 self.cursor = None 127 128 self.allStats = [] 129 self.allStatIds = {} 130 self.allStatNames = {} 131 132 self.allSubData = {} 133 134 self.allRuns = [] 135 self.allRunIds = {} 136 self.allRunNames = {} 137 138 self.allBins = [] 139 self.allBinIds = {} 140 self.allBinNames = {} 141 142 self.allFormulas = {} 143 144 self.stattop = {} 145 self.statdict = {} 146 self.statlist = [] 147 148 self.mode = 'sum'; 149 self.runs = None 150 self.bins = None 151 self.ticks = None 152 self.method = 'sum' 153 self._method = type(self).sum 154 155 def get(self, job, stat): 156 run = self.allRunNames.get(job.name, None) 157 if run is None: |
158 return None 159 160 from info import scalar, vector, value, values, total, len 161 stat.system = self[job.system] 162 if scalar(stat): 163 return value(stat, run.run) 164 if vector(stat): 165 return values(stat, run.run) 166 167 return None 168 169 def query(self, sql): 170 self.cursor.execute(sql) 171 172 def update_dict(self, dict): 173 dict.update(self.stattop) 174 175 def append(self, stat): 176 statname = re.sub(':', '__', stat.name) 177 path = string.split(statname, '.') 178 pathtop = path[0] 179 fullname = '' 180 181 x = self 182 while len(path) > 1: 183 name = path.pop(0) 184 if not x.__dict__.has_key(name): 185 x.__dict__[name] = Node(fullname + name) 186 x = x.__dict__[name] 187 fullname = '%s%s.' % (fullname, name) 188 189 name = path.pop(0) 190 x.__dict__[name] = stat 191 192 self.stattop[pathtop] = self.__dict__[pathtop] 193 self.statdict[statname] = stat 194 self.statlist.append(statname) 195 196 def connect(self): 197 # connect 198 self.thedb = MySQLdb.connect(db=self.db, 199 host=self.host, 200 user=self.user, 201 passwd=self.passwd) 202 203 # create a cursor 204 self.cursor = self.thedb.cursor() 205 206 self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project 207 from runs''') 208 for result in self.cursor.fetchall(): 209 run = RunData(result); 210 self.allRuns.append(run) 211 self.allRunIds[run.run] = run 212 self.allRunNames[run.name] = run 213 214 self.query('select * from bins') 215 for id,name in self.cursor.fetchall(): 216 self.allBinIds[int(id)] = name 217 self.allBinNames[name] = int(id) 218 219 self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata') 220 for result in self.cursor.fetchall(): 221 subdata = SubData(result) 222 if self.allSubData.has_key(subdata.stat): 223 self.allSubData[subdata.stat].append(subdata) 224 else: 225 self.allSubData[subdata.stat] = [ subdata ] 226 227 self.query('select * from formulas') 228 for id,formula in self.cursor.fetchall(): 229 self.allFormulas[int(id)] = formula.tostring() 230 231 StatData.db = self 232 self.query('select * from stats') 233 import info 234 for result in self.cursor.fetchall(): 235 stat = info.NewStat(self, StatData(result)) 236 self.append(stat) 237 self.allStats.append(stat) 238 self.allStatIds[stat.stat] = stat 239 self.allStatNames[stat.name] = stat 240 241 # Name: listbins 242 # Desc: Prints all bins matching regex argument, if no argument 243 # is given all bins are returned 244 def listBins(self, regex='.*'): 245 print '%-50s %-10s' % ('bin name', 'id') 246 print '-' * 61 247 names = self.allBinNames.keys() 248 names.sort() 249 for name in names: 250 id = self.allBinNames[name] 251 print '%-50s %-10d' % (name, id) 252 253 # Name: listruns 254 # Desc: Prints all runs matching a given user, if no argument 255 # is given all runs are returned 256 def listRuns(self, user=None): 257 print '%-40s %-10s %-5s' % ('run name', 'user', 'id') 258 print '-' * 62 259 for run in self.allRuns: 260 if user == None or user == run.user: 261 print '%-40s %-10s %-10d' % (run.name, run.user, run.run) 262 263 # Name: listTicks 264 # Desc: Prints all samples for a given run 265 def listTicks(self, runs=None): 266 print "tick" 267 print "----------------------------------------" 268 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 269 if runs != None: 270 first = True 271 for run in runs: 272 if first: 273 # sql += ' where' 274 first = False 275 else: 276 sql += ' or' 277 sql += ' dt_run=%s' % run.run 278 sql += ')' 279 self.query(sql) 280 for r in self.cursor.fetchall(): 281 print r[0] 282 283 # Name: retTicks 284 # Desc: Prints all samples for a given run 285 def retTicks(self, runs=None): 286 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 287 if runs != None: 288 first = True 289 for run in runs: 290 if first: 291 first = False 292 else: 293 sql += ' or' 294 sql += ' dt_run=%s' % run.run 295 sql += ')' 296 self.query(sql) 297 ret = [] 298 for r in self.cursor.fetchall(): 299 ret.append(r[0]) 300 return ret 301 302 # Name: liststats 303 # Desc: Prints all statistics that appear in the database, 304 # the optional argument is a regular expression that can 305 # be used to prune the result set 306 def listStats(self, regex=None): 307 print '%-60s %-8s %-10s' % ('stat name', 'id', 'type') 308 print '-' * 80 309 310 rx = None 311 if regex != None: 312 rx = re.compile(regex) 313 314 stats = [ stat.name for stat in self.allStats ] 315 stats.sort(statcmp) 316 for stat in stats: 317 stat = self.allStatNames[stat] 318 if rx == None or rx.match(stat.name): 319 print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type) 320 321 # Name: liststats 322 # Desc: Prints all statistics that appear in the database, 323 # the optional argument is a regular expression that can 324 # be used to prune the result set 325 def listFormulas(self, regex=None): 326 print '%-60s %s' % ('formula name', 'formula') 327 print '-' * 80 328 329 rx = None 330 if regex != None: 331 rx = re.compile(regex) 332 333 stats = [ stat.name for stat in self.allStats ] 334 stats.sort(statcmp) 335 for stat in stats: 336 stat = self.allStatNames[stat] 337 if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)): 338 print '%-60s %s' % (stat.name, self.allFormulas[stat.stat]) 339 340 def getStat(self, stats): 341 if type(stats) is not list: 342 stats = [ stats ] 343 344 ret = [] 345 for stat in stats: 346 if type(stat) is int: 347 ret.append(self.allStatIds[stat]) 348 349 if type(stat) is str: 350 rx = re.compile(stat) 351 for stat in self.allStats: 352 if rx.match(stat.name): 353 ret.append(stat) 354 return ret 355 356 def getBin(self, bins): 357 if type(bins) is not list: 358 bins = [ bins ] 359 360 ret = [] 361 for bin in bins: 362 if type(bin) is int: 363 ret.append(bin) 364 elif type(bin) is str: 365 ret.append(self.allBinNames[bin]) 366 else: 367 for name,id in self.allBinNames.items(): 368 if bin.match(name): 369 ret.append(id) 370 371 return ret 372 373 def getNotBin(self, bin): 374 map = {} 375 for bin in getBin(bin): 376 map[bin] = 1 377 378 ret = [] 379 for bin in self.allBinIds.keys(): 380 if not map.has_key(bin): 381 ret.append(bin) 382 383 return ret 384 385 ######################################### 386 # get the data 387 # 388 def inner(self, op, stat, bins, ticks, group=False): 389 sql = 'select ' 390 sql += 'dt_stat as stat, ' 391 sql += 'dt_run as run, ' 392 sql += 'dt_x as x, ' 393 sql += 'dt_y as y, ' 394 if group: 395 sql += 'dt_tick as tick, ' 396 sql += '%s(dt_data) as data ' % op 397 sql += 'from data ' 398 sql += 'where ' 399 400 if isinstance(stat, list): 401 val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ]) 402 sql += ' (%s)' % val 403 else: 404 sql += ' dt_stat=%d' % stat.stat 405 406 if self.runs != None and len(self.runs): 407 val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ]) 408 sql += ' and (%s)' % val 409 410 if bins != None and len(bins): 411 val = ' or '.join([ 'dt_bin=%d' % b for b in bins ]) 412 sql += ' and (%s)' % val 413 414 if ticks != None and len(ticks): 415 val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ]) 416 sql += ' and (%s)' % val 417 418 sql += ' group by dt_stat,dt_run,dt_x,dt_y' 419 if group: 420 sql += ',dt_tick' 421 return sql 422 423 def outer(self, op_out, op_in, stat, bins, ticks): 424 sql = self.inner(op_in, stat, bins, ticks, True) 425 sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql) 426 sql += 'group by stat,run,x,y' 427 return sql 428 429 # Name: sum 430 # Desc: given a run, a stat and an array of samples and bins, 431 # sum all the bins and then get the standard deviation of the 432 # samples for non-binned runs. This will just return the average 433 # of samples, however a bin array still must be passed 434 def sum(self, stat, bins, ticks): 435 return self.inner('sum', stat, bins, ticks) 436 437 # Name: avg 438 # Desc: given a run, a stat and an array of samples and bins, 439 # sum all the bins and then average the samples for non-binned 440 # runs this will just return the average of samples, however 441 # a bin array still must be passed 442 def avg(self, stat, bins, ticks): 443 return self.outer('avg', 'sum', stat, bins, ticks) 444 445 # Name: stdev 446 # Desc: given a run, a stat and an array of samples and bins, 447 # sum all the bins and then get the standard deviation of the 448 # samples for non-binned runs. This will just return the average 449 # of samples, however a bin array still must be passed 450 def stdev(self, stat, bins, ticks): 451 return self.outer('stddev', 'sum', stat, bins, ticks) 452 453 def __setattr__(self, attr, value): 454 super(Database, self).__setattr__(attr, value) 455 if attr != 'method': 456 return 457 458 if value == 'sum': 459 self._method = self.sum 460 elif value == 'avg': 461 self._method = self.avg 462 elif value == 'stdev': 463 self._method = self.stdev 464 else: 465 raise AttributeError, "can only set get to: sum | avg | stdev" 466 467 def data(self, stat, bins=None, ticks=None): 468 if bins is None: 469 bins = self.bins 470 if ticks is None: 471 ticks = self.ticks 472 sql = self._method(self, stat, bins, ticks) 473 self.query(sql) 474 475 runs = {} 476 xmax = 0 477 ymax = 0 478 for x in self.cursor.fetchall(): 479 data = Data(x) 480 if not runs.has_key(data.run): 481 runs[data.run] = {} 482 if not runs[data.run].has_key(data.x): 483 runs[data.run][data.x] = {} 484 485 xmax = max(xmax, data.x) 486 ymax = max(ymax, data.y) 487 runs[data.run][data.x][data.y] = data.data 488 489 results = Result(xmax + 1, ymax + 1) 490 for run,data in runs.iteritems(): 491 result = results[run] 492 for x,ydata in data.iteritems(): 493 for y,data in ydata.iteritems(): 494 result[x][y] = data 495 return results 496 497 def __getitem__(self, key): 498 return self.stattop[key] |