db.py revision 1758
1 2# Copyright (c) 2003-2004 The Regents of The University of Michigan 3# All rights reserved. 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: redistributions of source code must retain the above copyright 8# notice, this list of conditions and the following disclaimer; 9# redistributions in binary form must reproduce the above copyright 10# notice, this list of conditions and the following disclaimer in the 11# documentation and/or other materials provided with the distribution; 12# neither the name of the copyright holders nor the names of its 13# contributors may be used to endorse or promote products derived from 14# this software without specific prior written permission. 15# 16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28#Permission is granted to use, copy, create derivative works and 29#redistribute this software and such derivative works for any purpose, 30#so long as the copyright notice above, this grant of permission, and 31#the disclaimer below appear in all copies made; and so long as the 32#name of The University of Michigan is not used in any advertising or 33#publicity pertaining to the use or distribution of this software 34#without specific, written prior authorization. 35# 36#THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION FROM THE 37#UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY PURPOSE, AND WITHOUT 38#WARRANTY BY THE UNIVERSITY OF MICHIGAN OF ANY KIND, EITHER EXPRESS OR 39#IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED WARRANTIES OF 40#MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE REGENTS OF 41#THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE FOR ANY DAMAGES, 42#INCLUDING DIRECT, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL 43#DAMAGES, WITH RESPECT TO ANY CLAIM ARISING OUT OF OR IN CONNECTION 44#WITH THE USE OF THE SOFTWARE, EVEN IF IT HAS BEEN OR IS HEREAFTER 45#ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 46 47import MySQLdb, re, string 48 49def statcmp(a, b): 50 v1 = a.split('.') 51 v2 = b.split('.') 52 53 last = min(len(v1), len(v2)) - 1 54 for i,j in zip(v1[0:last], v2[0:last]): 55 if i != j: 56 return cmp(i, j) 57 58 # Special compare for last element. 59 if len(v1) == len(v2): 60 return cmp(v1[last], v2[last]) 61 else: 62 return cmp(len(v1), len(v2)) 63 64class RunData: 65 def __init__(self, row): 66 self.run = int(row[0]) 67 self.name = row[1] 68 self.user = row[2] 69 self.project = row[3] 70 71class SubData: 72 def __init__(self, row): 73 self.stat = int(row[0]) 74 self.x = int(row[1]) 75 self.y = int(row[2]) 76 self.name = row[3] 77 self.descr = row[4] 78 79class Data: 80 def __init__(self, row): 81 if len(row) != 5: 82 raise 'stat db error' 83 self.stat = int(row[0]) 84 self.run = int(row[1]) 85 self.x = int(row[2]) 86 self.y = int(row[3]) 87 self.data = float(row[4]) 88 89 def __repr__(self): 90 return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat, 91 self.run, self.x, self.y, self.data) 92 93class StatData(object): 94 def __init__(self, row): 95 self.stat = int(row[0]) 96 self.name = row[1] 97 self.desc = row[2] 98 self.type = row[3] 99 self.prereq = int(row[5]) 100 self.precision = int(row[6]) 101 102 import flags 103 self.flags = 0 104 if int(row[4]): self.flags |= flags.printable 105 if int(row[7]): self.flags |= flags.nozero 106 if int(row[8]): self.flags |= flags.nonan 107 if int(row[9]): self.flags |= flags.total 108 if int(row[10]): self.flags |= flags.pdf 109 if int(row[11]): self.flags |= flags.cdf 110 111 if self.type == 'DIST' or self.type == 'VECTORDIST': 112 self.min = float(row[12]) 113 self.max = float(row[13]) 114 self.bktsize = float(row[14]) 115 self.size = int(row[15]) 116 117 if self.type == 'FORMULA': 118 self.formula = self.db.allFormulas[self.stat] 119 120class Node(object): 121 def __init__(self, name): 122 self.name = name 123 def __str__(self): 124 return name 125 126class Database(object): 127 def __init__(self): 128 self.host = 'zizzer.pool' 129 self.user = '' 130 self.passwd = '' 131 self.db = 'm5stats' 132 self.cursor = None 133 134 self.allStats = [] 135 self.allStatIds = {} 136 self.allStatNames = {} 137 138 self.allSubData = {} 139 140 self.allRuns = [] 141 self.allRunIds = {} 142 self.allRunNames = {} 143 144 self.allBins = [] 145 self.allBinIds = {} 146 self.allBinNames = {} 147 148 self.allFormulas = {} 149 150 self.stattop = {} 151 self.statdict = {} 152 self.statlist = [] 153 154 self.mode = 'sum'; 155 self.runs = None 156 self.bins = None 157 self.ticks = None 158 self.__dict__['get'] = type(self).sum 159 160 def query(self, sql): 161 self.cursor.execute(sql) 162 163 def update_dict(self, dict): 164 dict.update(self.stattop) 165 166 def append(self, stat): 167 statname = re.sub(':', '__', stat.name) 168 path = string.split(statname, '.') 169 pathtop = path[0] 170 fullname = '' 171 172 x = self 173 while len(path) > 1: 174 name = path.pop(0) 175 if not x.__dict__.has_key(name): 176 x.__dict__[name] = Node(fullname + name) 177 x = x.__dict__[name] 178 fullname = '%s%s.' % (fullname, name) 179 180 name = path.pop(0) 181 x.__dict__[name] = stat 182 183 self.stattop[pathtop] = self.__dict__[pathtop] 184 self.statdict[statname] = stat 185 self.statlist.append(statname) 186 187 def connect(self): 188 # connect 189 self.thedb = MySQLdb.connect(db=self.db, 190 host=self.host, 191 user=self.user, 192 passwd=self.passwd) 193 194 # create a cursor 195 self.cursor = self.thedb.cursor() 196 197 self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project 198 from runs''') 199 for result in self.cursor.fetchall(): 200 run = RunData(result); 201 self.allRuns.append(run) 202 self.allRunIds[run.run] = run 203 self.allRunNames[run.name] = run 204 205 self.query('select * from bins') 206 for id,name in self.cursor.fetchall(): 207 self.allBinIds[int(id)] = name 208 self.allBinNames[name] = int(id) 209 210 self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata') 211 for result in self.cursor.fetchall(): 212 subdata = SubData(result) 213 if self.allSubData.has_key(subdata.stat): 214 self.allSubData[subdata.stat].append(subdata) 215 else: 216 self.allSubData[subdata.stat] = [ subdata ] 217 218 self.query('select * from formulas') 219 for id,formula in self.cursor.fetchall(): 220 self.allFormulas[int(id)] = formula.tostring() 221 222 StatData.db = self 223 self.query('select * from stats') 224 import info 225 for result in self.cursor.fetchall(): 226 stat = info.NewStat(StatData(result)) 227 self.append(stat) 228 self.allStats.append(stat) 229 self.allStatIds[stat.stat] = stat 230 self.allStatNames[stat.name] = stat 231 232 # Name: listbins 233 # Desc: Prints all bins matching regex argument, if no argument 234 # is given all bins are returned 235 def listBins(self, regex='.*'): 236 print '%-50s %-10s' % ('bin name', 'id') 237 print '-' * 61 238 names = self.allBinNames.keys() 239 names.sort() 240 for name in names: 241 id = self.allBinNames[name] 242 print '%-50s %-10d' % (name, id) 243 244 # Name: listruns 245 # Desc: Prints all runs matching a given user, if no argument 246 # is given all runs are returned 247 def listRuns(self, user=None): 248 print '%-40s %-10s %-5s' % ('run name', 'user', 'id') 249 print '-' * 62 250 for run in self.allRuns: 251 if user == None or user == run.user: 252 print '%-40s %-10s %-10d' % (run.name, run.user, run.run) 253 254 # Name: listTicks 255 # Desc: Prints all samples for a given run 256 def listTicks(self, runs=None): 257 print "tick" 258 print "----------------------------------------" 259 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 260 if runs != None: 261 first = True 262 for run in runs: 263 if first: 264 # sql += ' where' 265 first = False 266 else: 267 sql += ' or' 268 sql += ' dt_run=%s' % run.run 269 sql += ')' 270 self.query(sql) 271 for r in self.cursor.fetchall(): 272 print r[0] 273 274 # Name: retTicks 275 # Desc: Prints all samples for a given run 276 def retTicks(self, runs=None): 277 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 278 if runs != None: 279 first = True 280 for run in runs: 281 if first: 282 first = False 283 else: 284 sql += ' or' 285 sql += ' dt_run=%s' % run.run 286 sql += ')' 287 self.query(sql) 288 ret = [] 289 for r in self.cursor.fetchall(): 290 ret.append(r[0]) 291 return ret 292 293 # Name: liststats 294 # Desc: Prints all statistics that appear in the database, 295 # the optional argument is a regular expression that can 296 # be used to prune the result set 297 def listStats(self, regex=None): 298 print '%-60s %-8s %-10s' % ('stat name', 'id', 'type') 299 print '-' * 80 300 301 rx = None 302 if regex != None: 303 rx = re.compile(regex) 304 305 stats = [ stat.name for stat in self.allStats ] 306 stats.sort(statcmp) 307 for stat in stats: 308 stat = self.allStatNames[stat] 309 if rx == None or rx.match(stat.name): 310 print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type) 311 312 # Name: liststats 313 # Desc: Prints all statistics that appear in the database, 314 # the optional argument is a regular expression that can 315 # be used to prune the result set 316 def listFormulas(self, regex=None): 317 print '%-60s %s' % ('formula name', 'formula') 318 print '-' * 80 319 320 rx = None 321 if regex != None: 322 rx = re.compile(regex) 323 324 stats = [ stat.name for stat in self.allStats ] 325 stats.sort(statcmp) 326 for stat in stats: 327 stat = self.allStatNames[stat] 328 if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)): 329 print '%-60s %s' % (stat.name, self.allFormulas[stat.stat]) 330 331 def getStat(self, stats): 332 if type(stats) is not list: 333 stats = [ stats ] 334 335 ret = [] 336 for stat in stats: 337 if type(stat) is int: 338 ret.append(self.allStatIds[stat]) 339 340 if type(stat) is str: 341 rx = re.compile(stat) 342 for stat in self.allStats: 343 if rx.match(stat.name): 344 ret.append(stat) 345 return ret 346 347 def getBin(self, bins): 348 if type(bins) is not list: 349 bins = [ bins ] 350 351 ret = [] 352 for bin in bins: 353 if type(bin) is int: 354 ret.append(bin) 355 elif type(bin) is str: 356 ret.append(self.allBinNames[bin]) 357 else: 358 for name,id in self.allBinNames.items(): 359 if bin.match(name): 360 ret.append(id) 361 362 return ret 363 364 def getNotBin(self, bin): 365 map = {} 366 for bin in getBin(bin): 367 map[bin] = 1 368 369 ret = [] 370 for bin in self.allBinIds.keys(): 371 if not map.has_key(bin): 372 ret.append(bin) 373 374 return ret 375 376 ######################################### 377 # get the data 378 # 379 def inner(self, op, stat, bins, ticks, group=False): 380 sql = 'select ' 381 sql += 'dt_stat as stat, ' 382 sql += 'dt_run as run, ' 383 sql += 'dt_x as x, ' 384 sql += 'dt_y as y, ' 385 if group: 386 sql += 'dt_tick as tick, ' 387 sql += '%s(dt_data) as data ' % op 388 sql += 'from data ' 389 sql += 'where ' 390 391 if isinstance(stat, list): 392 val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ]) 393 sql += ' (%s)' % val 394 else: 395 sql += ' dt_stat=%d' % stat.stat 396 397 if self.runs != None and len(self.runs): 398 val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ]) 399 sql += ' and (%s)' % val 400 401 if bins != None and len(bins): 402 val = ' or '.join([ 'dt_bin=%d' % b for b in bins ]) 403 sql += ' and (%s)' % val 404 405 if ticks != None and len(ticks): 406 val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ]) 407 sql += ' and (%s)' % val 408 409 sql += ' group by dt_stat,dt_run,dt_x,dt_y' 410 if group: 411 sql += ',dt_tick' 412 return sql 413 414 def outer(self, op_out, op_in, stat, bins, ticks): 415 sql = self.inner(op_in, stat, bins, ticks, True) 416 sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql) 417 sql += 'group by stat,run,x,y' 418 return sql 419 420 # Name: sum 421 # Desc: given a run, a stat and an array of samples and bins, 422 # sum all the bins and then get the standard deviation of the 423 # samples for non-binned runs. This will just return the average 424 # of samples, however a bin array still must be passed 425 def sum(self, stat, bins, ticks): 426 return self.inner('sum', stat, bins, ticks) 427 428 # Name: avg 429 # Desc: given a run, a stat and an array of samples and bins, 430 # sum all the bins and then average the samples for non-binned 431 # runs this will just return the average of samples, however 432 # a bin array still must be passed 433 def avg(self, stat, bins, ticks): 434 return self.outer('avg', 'sum', stat, bins, ticks) 435 436 # Name: stdev 437 # Desc: given a run, a stat and an array of samples and bins, 438 # sum all the bins and then get the standard deviation of the 439 # samples for non-binned runs. This will just return the average 440 # of samples, however a bin array still must be passed 441 def stdev(self, stat, bins, ticks): 442 return self.outer('stddev', 'sum', stat, bins, ticks) 443 444 def __getattribute__(self, attr): 445 if attr != 'get': 446 return super(Database, self).__getattribute__(attr) 447 448 if self.__dict__['get'] == type(self).sum: 449 return 'sum' 450 elif self.__dict__['get'] == type(self).avg: 451 return 'avg' 452 elif self.__dict__['get'] == type(self).stdev: 453 return 'stdev' 454 else: 455 return '' 456 457 def __setattr__(self, attr, value): 458 if attr != 'get': 459 super(Database, self).__setattr__(attr, value) 460 return 461 462 if value == 'sum': 463 self.__dict__['get'] = type(self).sum 464 elif value == 'avg': 465 self.__dict__['get'] = type(self).avg 466 elif value == 'stdev': 467 self.__dict__['get'] = type(self).stdev 468 else: 469 raise AttributeError, "can only set get to: sum | avg | stdev" 470 471 def data(self, stat, bins=None, ticks=None): 472 if bins is None: 473 bins = self.bins 474 if ticks is None: 475 ticks = self.ticks 476 sql = self.__dict__['get'](self, stat, bins, ticks) 477 self.query(sql) 478 479 runs = {} 480 for x in self.cursor.fetchall(): 481 data = Data(x) 482 if not runs.has_key(data.run): 483 runs[data.run] = {} 484 if not runs[data.run].has_key(data.x): 485 runs[data.run][data.x] = {} 486 487 runs[data.run][data.x][data.y] = data.data 488 return runs 489