db.py revision 1881
1# Copyright (c) 2003-2004 The Regents of The University of Michigan 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer; 8# redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution; 11# neither the name of the copyright holders nor the names of its 12# contributors may be used to endorse or promote products derived from 13# this software without specific prior written permission. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27import MySQLdb, re, string 28 29def statcmp(a, b): 30 v1 = a.split('.') 31 v2 = b.split('.') 32 33 last = min(len(v1), len(v2)) - 1 34 for i,j in zip(v1[0:last], v2[0:last]): 35 if i != j: 36 return cmp(i, j) 37 38 # Special compare for last element. 39 if len(v1) == len(v2): 40 return cmp(v1[last], v2[last]) 41 else: 42 return cmp(len(v1), len(v2)) 43 44class RunData: 45 def __init__(self, row): 46 self.run = int(row[0]) 47 self.name = row[1] 48 self.user = row[2] 49 self.project = row[3] 50 51class SubData: 52 def __init__(self, row): 53 self.stat = int(row[0]) 54 self.x = int(row[1]) 55 self.y = int(row[2]) 56 self.name = row[3] 57 self.descr = row[4] 58 59class Data: 60 def __init__(self, row): 61 if len(row) != 5: 62 raise 'stat db error' 63 self.stat = int(row[0]) 64 self.run = int(row[1]) 65 self.x = int(row[2]) 66 self.y = int(row[3]) 67 self.data = float(row[4]) 68 69 def __repr__(self): 70 return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat, 71 self.run, self.x, self.y, self.data) 72 73class StatData(object): 74 def __init__(self, row): 75 self.stat = int(row[0]) 76 self.name = row[1] 77 self.desc = row[2] 78 self.type = row[3] 79 self.prereq = int(row[5]) 80 self.precision = int(row[6]) 81 82 import flags 83 self.flags = 0 84 if int(row[4]): self.flags |= flags.printable 85 if int(row[7]): self.flags |= flags.nozero 86 if int(row[8]): self.flags |= flags.nonan 87 if int(row[9]): self.flags |= flags.total 88 if int(row[10]): self.flags |= flags.pdf 89 if int(row[11]): self.flags |= flags.cdf 90 91 if self.type == 'DIST' or self.type == 'VECTORDIST': 92 self.min = float(row[12]) 93 self.max = float(row[13]) 94 self.bktsize = float(row[14]) 95 self.size = int(row[15]) 96 97 if self.type == 'FORMULA': 98 self.formula = self.db.allFormulas[self.stat] 99 100class Node(object): 101 def __init__(self, name): 102 self.name = name 103 def __str__(self): 104 return self.name 105 106class Database(object): 107 def __init__(self): 108 self.host = 'zizzer.pool' 109 self.user = '' 110 self.passwd = '' 111 self.db = 'm5stats' 112 self.cursor = None 113 114 self.allStats = [] 115 self.allStatIds = {} 116 self.allStatNames = {} 117 118 self.allSubData = {} 119 120 self.allRuns = [] 121 self.allRunIds = {} 122 self.allRunNames = {} 123 124 self.allBins = [] 125 self.allBinIds = {} 126 self.allBinNames = {} 127 128 self.allFormulas = {} 129 130 self.stattop = {} 131 self.statdict = {} 132 self.statlist = [] 133 134 self.mode = 'sum'; 135 self.runs = None 136 self.bins = None 137 self.ticks = None 138 self.__dict__['get'] = type(self).sum 139 140 def query(self, sql): 141 self.cursor.execute(sql) 142 143 def update_dict(self, dict): 144 dict.update(self.stattop) 145 146 def append(self, stat): 147 statname = re.sub(':', '__', stat.name) 148 path = string.split(statname, '.') 149 pathtop = path[0] 150 fullname = '' 151 152 x = self 153 while len(path) > 1: 154 name = path.pop(0) 155 if not x.__dict__.has_key(name): 156 x.__dict__[name] = Node(fullname + name) 157 x = x.__dict__[name] 158 fullname = '%s%s.' % (fullname, name) 159 160 name = path.pop(0) 161 x.__dict__[name] = stat 162 163 self.stattop[pathtop] = self.__dict__[pathtop] 164 self.statdict[statname] = stat 165 self.statlist.append(statname) 166 167 def connect(self): 168 # connect 169 self.thedb = MySQLdb.connect(db=self.db, 170 host=self.host, 171 user=self.user, 172 passwd=self.passwd) 173 174 # create a cursor 175 self.cursor = self.thedb.cursor() 176 177 self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project 178 from runs''') 179 for result in self.cursor.fetchall(): 180 run = RunData(result); 181 self.allRuns.append(run) 182 self.allRunIds[run.run] = run 183 self.allRunNames[run.name] = run 184 185 self.query('select * from bins') 186 for id,name in self.cursor.fetchall(): 187 self.allBinIds[int(id)] = name 188 self.allBinNames[name] = int(id) 189 190 self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata') 191 for result in self.cursor.fetchall(): 192 subdata = SubData(result) 193 if self.allSubData.has_key(subdata.stat): 194 self.allSubData[subdata.stat].append(subdata) 195 else: 196 self.allSubData[subdata.stat] = [ subdata ] 197 198 self.query('select * from formulas') 199 for id,formula in self.cursor.fetchall(): 200 self.allFormulas[int(id)] = formula.tostring() 201 202 StatData.db = self 203 self.query('select * from stats') 204 import info 205 for result in self.cursor.fetchall(): 206 stat = info.NewStat(StatData(result)) 207 self.append(stat) 208 self.allStats.append(stat) 209 self.allStatIds[stat.stat] = stat 210 self.allStatNames[stat.name] = stat 211 212 # Name: listbins 213 # Desc: Prints all bins matching regex argument, if no argument 214 # is given all bins are returned 215 def listBins(self, regex='.*'): 216 print '%-50s %-10s' % ('bin name', 'id') 217 print '-' * 61 218 names = self.allBinNames.keys() 219 names.sort() 220 for name in names: 221 id = self.allBinNames[name] 222 print '%-50s %-10d' % (name, id) 223 224 # Name: listruns 225 # Desc: Prints all runs matching a given user, if no argument 226 # is given all runs are returned 227 def listRuns(self, user=None): 228 print '%-40s %-10s %-5s' % ('run name', 'user', 'id') 229 print '-' * 62 230 for run in self.allRuns: 231 if user == None or user == run.user: 232 print '%-40s %-10s %-10d' % (run.name, run.user, run.run) 233 234 # Name: listTicks 235 # Desc: Prints all samples for a given run 236 def listTicks(self, runs=None): 237 print "tick" 238 print "----------------------------------------" 239 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 240 if runs != None: 241 first = True 242 for run in runs: 243 if first: 244 # sql += ' where' 245 first = False 246 else: 247 sql += ' or' 248 sql += ' dt_run=%s' % run.run 249 sql += ')' 250 self.query(sql) 251 for r in self.cursor.fetchall(): 252 print r[0] 253 254 # Name: retTicks 255 # Desc: Prints all samples for a given run 256 def retTicks(self, runs=None): 257 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 258 if runs != None: 259 first = True 260 for run in runs: 261 if first: 262 first = False 263 else: 264 sql += ' or' 265 sql += ' dt_run=%s' % run.run 266 sql += ')' 267 self.query(sql) 268 ret = [] 269 for r in self.cursor.fetchall(): 270 ret.append(r[0]) 271 return ret 272 273 # Name: liststats 274 # Desc: Prints all statistics that appear in the database, 275 # the optional argument is a regular expression that can 276 # be used to prune the result set 277 def listStats(self, regex=None): 278 print '%-60s %-8s %-10s' % ('stat name', 'id', 'type') 279 print '-' * 80 280 281 rx = None 282 if regex != None: 283 rx = re.compile(regex) 284 285 stats = [ stat.name for stat in self.allStats ] 286 stats.sort(statcmp) 287 for stat in stats: 288 stat = self.allStatNames[stat] 289 if rx == None or rx.match(stat.name): 290 print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type) 291 292 # Name: liststats 293 # Desc: Prints all statistics that appear in the database, 294 # the optional argument is a regular expression that can 295 # be used to prune the result set 296 def listFormulas(self, regex=None): 297 print '%-60s %s' % ('formula name', 'formula') 298 print '-' * 80 299 300 rx = None 301 if regex != None: 302 rx = re.compile(regex) 303 304 stats = [ stat.name for stat in self.allStats ] 305 stats.sort(statcmp) 306 for stat in stats: 307 stat = self.allStatNames[stat] 308 if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)): 309 print '%-60s %s' % (stat.name, self.allFormulas[stat.stat]) 310 311 def getStat(self, stats): 312 if type(stats) is not list: 313 stats = [ stats ] 314 315 ret = [] 316 for stat in stats: 317 if type(stat) is int: 318 ret.append(self.allStatIds[stat]) 319 320 if type(stat) is str: 321 rx = re.compile(stat) 322 for stat in self.allStats: 323 if rx.match(stat.name): 324 ret.append(stat) 325 return ret 326 327 def getBin(self, bins): 328 if type(bins) is not list: 329 bins = [ bins ] 330 331 ret = [] 332 for bin in bins: 333 if type(bin) is int: 334 ret.append(bin) 335 elif type(bin) is str: 336 ret.append(self.allBinNames[bin]) 337 else: 338 for name,id in self.allBinNames.items(): 339 if bin.match(name): 340 ret.append(id) 341 342 return ret 343 344 def getNotBin(self, bin): 345 map = {} 346 for bin in getBin(bin): 347 map[bin] = 1 348 349 ret = [] 350 for bin in self.allBinIds.keys(): 351 if not map.has_key(bin): 352 ret.append(bin) 353 354 return ret 355 356 ######################################### 357 # get the data 358 # 359 def inner(self, op, stat, bins, ticks, group=False): 360 sql = 'select ' 361 sql += 'dt_stat as stat, ' 362 sql += 'dt_run as run, ' 363 sql += 'dt_x as x, ' 364 sql += 'dt_y as y, ' 365 if group: 366 sql += 'dt_tick as tick, ' 367 sql += '%s(dt_data) as data ' % op 368 sql += 'from data ' 369 sql += 'where ' 370 371 if isinstance(stat, list): 372 val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ]) 373 sql += ' (%s)' % val 374 else: 375 sql += ' dt_stat=%d' % stat.stat 376 377 if self.runs != None and len(self.runs): 378 val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ]) 379 sql += ' and (%s)' % val 380 381 if bins != None and len(bins): 382 val = ' or '.join([ 'dt_bin=%d' % b for b in bins ]) 383 sql += ' and (%s)' % val 384 385 if ticks != None and len(ticks): 386 val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ]) 387 sql += ' and (%s)' % val 388 389 sql += ' group by dt_stat,dt_run,dt_x,dt_y' 390 if group: 391 sql += ',dt_tick' 392 return sql 393 394 def outer(self, op_out, op_in, stat, bins, ticks): 395 sql = self.inner(op_in, stat, bins, ticks, True) 396 sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql) 397 sql += 'group by stat,run,x,y' 398 return sql 399 400 # Name: sum 401 # Desc: given a run, a stat and an array of samples and bins, 402 # sum all the bins and then get the standard deviation of the 403 # samples for non-binned runs. This will just return the average 404 # of samples, however a bin array still must be passed 405 def sum(self, stat, bins, ticks): 406 return self.inner('sum', stat, bins, ticks) 407 408 # Name: avg 409 # Desc: given a run, a stat and an array of samples and bins, 410 # sum all the bins and then average the samples for non-binned 411 # runs this will just return the average of samples, however 412 # a bin array still must be passed 413 def avg(self, stat, bins, ticks): 414 return self.outer('avg', 'sum', stat, bins, ticks) 415 416 # Name: stdev 417 # Desc: given a run, a stat and an array of samples and bins, 418 # sum all the bins and then get the standard deviation of the 419 # samples for non-binned runs. This will just return the average 420 # of samples, however a bin array still must be passed 421 def stdev(self, stat, bins, ticks): 422 return self.outer('stddev', 'sum', stat, bins, ticks) 423 424 def __getattribute__(self, attr): 425 if attr != 'get': 426 return super(Database, self).__getattribute__(attr) 427 428 if self.__dict__['get'] == type(self).sum: 429 return 'sum' 430 elif self.__dict__['get'] == type(self).avg: 431 return 'avg' 432 elif self.__dict__['get'] == type(self).stdev: 433 return 'stdev' 434 else: 435 return '' 436 437 def __setattr__(self, attr, value): 438 if attr != 'get': 439 super(Database, self).__setattr__(attr, value) 440 return 441 442 if value == 'sum': 443 self.__dict__['get'] = type(self).sum 444 elif value == 'avg': 445 self.__dict__['get'] = type(self).avg 446 elif value == 'stdev': 447 self.__dict__['get'] = type(self).stdev 448 else: 449 raise AttributeError, "can only set get to: sum | avg | stdev" 450 451 def data(self, stat, bins=None, ticks=None): 452 if bins is None: 453 bins = self.bins 454 if ticks is None: 455 ticks = self.ticks 456 sql = self.__dict__['get'](self, stat, bins, ticks) 457 self.query(sql) 458 459 runs = {} 460 for x in self.cursor.fetchall(): 461 data = Data(x) 462 if not runs.has_key(data.run): 463 runs[data.run] = {} 464 if not runs[data.run].has_key(data.x): 465 runs[data.run][data.x] = {} 466 467 runs[data.run][data.x][data.y] = data.data 468 return runs 469 470 def __getitem__(self, key): 471 return self.stattop[key] 472