db.py revision 2343
1# Copyright (c) 2003-2004 The Regents of The University of Michigan 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer; 8# redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution; 11# neither the name of the copyright holders nor the names of its 12# contributors may be used to endorse or promote products derived from 13# this software without specific prior written permission. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27import MySQLdb, re, string 28 29def statcmp(a, b): 30 v1 = a.split('.') 31 v2 = b.split('.') 32 33 last = min(len(v1), len(v2)) - 1 34 for i,j in zip(v1[0:last], v2[0:last]): 35 if i != j: 36 return cmp(i, j) 37 38 # Special compare for last element. 39 if len(v1) == len(v2): 40 return cmp(v1[last], v2[last]) 41 else: 42 return cmp(len(v1), len(v2)) 43 44class RunData: 45 def __init__(self, row): 46 self.run = int(row[0]) 47 self.name = row[1] 48 self.user = row[2] 49 self.project = row[3] 50 51class SubData: 52 def __init__(self, row): 53 self.stat = int(row[0]) 54 self.x = int(row[1]) 55 self.y = int(row[2]) 56 self.name = row[3] 57 self.descr = row[4] 58 59class Data: 60 def __init__(self, row): 61 if len(row) != 5: 62 raise 'stat db error' 63 self.stat = int(row[0]) 64 self.run = int(row[1]) 65 self.x = int(row[2]) 66 self.y = int(row[3]) 67 self.data = float(row[4]) 68 69 def __repr__(self): 70 return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat, 71 self.run, self.x, self.y, self.data) 72 73class StatData(object): 74 def __init__(self, row): 75 self.stat = int(row[0]) 76 self.name = row[1] 77 self.desc = row[2] 78 self.type = row[3] 79 self.prereq = int(row[5]) 80 self.precision = int(row[6]) 81 82 import flags 83 self.flags = 0 84 if int(row[4]): self.flags |= flags.printable 85 if int(row[7]): self.flags |= flags.nozero 86 if int(row[8]): self.flags |= flags.nonan 87 if int(row[9]): self.flags |= flags.total 88 if int(row[10]): self.flags |= flags.pdf 89 if int(row[11]): self.flags |= flags.cdf 90 91 if self.type == 'DIST' or self.type == 'VECTORDIST': 92 self.min = float(row[12]) 93 self.max = float(row[13]) 94 self.bktsize = float(row[14]) 95 self.size = int(row[15]) 96 97 if self.type == 'FORMULA': 98 self.formula = self.db.allFormulas[self.stat] 99 100class Node(object): 101 def __init__(self, name): 102 self.name = name 103 def __str__(self): 104 return self.name 105 106class Result(object): 107 def __init__(self, x, y): 108 self.data = {} 109 self.x = x 110 self.y = y 111 112 def __contains__(self, run): 113 return run in self.data 114 115 def __getitem__(self, run): 116 if run not in self.data: 117 self.data[run] = [ [ 0.0 ] * self.y for i in xrange(self.x) ] 118 return self.data[run] 119 120class Database(object): 121 def __init__(self): 122 self.host = 'zizzer.pool' 123 self.user = '' 124 self.passwd = '' 125 self.db = 'm5stats' 126 self.cursor = None 127 128 self.allStats = [] 129 self.allStatIds = {} 130 self.allStatNames = {} 131 132 self.allSubData = {} 133 134 self.allRuns = [] 135 self.allRunIds = {} 136 self.allRunNames = {} 137 138 self.allFormulas = {} 139 140 self.stattop = {} 141 self.statdict = {} 142 self.statlist = [] 143 144 self.mode = 'sum'; 145 self.runs = None 146 self.ticks = None 147 self.method = 'sum' 148 self._method = type(self).sum 149 150 def get(self, job, stat, system=None): 151 run = self.allRunNames.get(str(job), None) 152 if run is None: 153 return None 154 155 from info import ProxyError, scalar, vector, value, values, total, len 156 if system is None and hasattr(job, 'system'): 157 system = job.system 158 159 if system is not None: 160 stat.system = self[system] 161 try: 162 if scalar(stat): 163 return value(stat, run.run) 164 if vector(stat): 165 return values(stat, run.run) 166 except ProxyError: 167 return None 168 169 return None 170 171 def query(self, sql): 172 self.cursor.execute(sql) 173 174 def update_dict(self, dict): 175 dict.update(self.stattop) 176 177 def append(self, stat): 178 statname = re.sub(':', '__', stat.name) 179 path = string.split(statname, '.') 180 pathtop = path[0] 181 fullname = '' 182 183 x = self 184 while len(path) > 1: 185 name = path.pop(0) 186 if not x.__dict__.has_key(name): 187 x.__dict__[name] = Node(fullname + name) 188 x = x.__dict__[name] 189 fullname = '%s%s.' % (fullname, name) 190 191 name = path.pop(0) 192 x.__dict__[name] = stat 193 194 self.stattop[pathtop] = self.__dict__[pathtop] 195 self.statdict[statname] = stat 196 self.statlist.append(statname) 197 198 def connect(self): 199 # connect 200 self.thedb = MySQLdb.connect(db=self.db, 201 host=self.host, 202 user=self.user, 203 passwd=self.passwd) 204 205 # create a cursor 206 self.cursor = self.thedb.cursor() 207 208 self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project 209 from runs''') 210 for result in self.cursor.fetchall(): 211 run = RunData(result); 212 self.allRuns.append(run) 213 self.allRunIds[run.run] = run 214 self.allRunNames[run.name] = run 215 216 self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata') 217 for result in self.cursor.fetchall(): 218 subdata = SubData(result) 219 if self.allSubData.has_key(subdata.stat): 220 self.allSubData[subdata.stat].append(subdata) 221 else: 222 self.allSubData[subdata.stat] = [ subdata ] 223 224 self.query('select * from formulas') 225 for id,formula in self.cursor.fetchall(): 226 self.allFormulas[int(id)] = formula.tostring() 227 228 StatData.db = self 229 self.query('select * from stats') 230 import info 231 for result in self.cursor.fetchall(): 232 stat = info.NewStat(self, StatData(result)) 233 self.append(stat) 234 self.allStats.append(stat) 235 self.allStatIds[stat.stat] = stat 236 self.allStatNames[stat.name] = stat 237 238 # Name: listruns 239 # Desc: Prints all runs matching a given user, if no argument 240 # is given all runs are returned 241 def listRuns(self, user=None): 242 print '%-40s %-10s %-5s' % ('run name', 'user', 'id') 243 print '-' * 62 244 for run in self.allRuns: 245 if user == None or user == run.user: 246 print '%-40s %-10s %-10d' % (run.name, run.user, run.run) 247 248 # Name: listTicks 249 # Desc: Prints all samples for a given run 250 def listTicks(self, runs=None): 251 print "tick" 252 print "----------------------------------------" 253 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 254 if runs != None: 255 first = True 256 for run in runs: 257 if first: 258 # sql += ' where' 259 first = False 260 else: 261 sql += ' or' 262 sql += ' dt_run=%s' % run.run 263 sql += ')' 264 self.query(sql) 265 for r in self.cursor.fetchall(): 266 print r[0] 267 268 # Name: retTicks 269 # Desc: Prints all samples for a given run 270 def retTicks(self, runs=None): 271 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 272 if runs != None: 273 first = True 274 for run in runs: 275 if first: 276 first = False 277 else: 278 sql += ' or' 279 sql += ' dt_run=%s' % run.run 280 sql += ')' 281 self.query(sql) 282 ret = [] 283 for r in self.cursor.fetchall(): 284 ret.append(r[0]) 285 return ret 286 287 # Name: liststats 288 # Desc: Prints all statistics that appear in the database, 289 # the optional argument is a regular expression that can 290 # be used to prune the result set 291 def listStats(self, regex=None): 292 print '%-60s %-8s %-10s' % ('stat name', 'id', 'type') 293 print '-' * 80 294 295 rx = None 296 if regex != None: 297 rx = re.compile(regex) 298 299 stats = [ stat.name for stat in self.allStats ] 300 stats.sort(statcmp) 301 for stat in stats: 302 stat = self.allStatNames[stat] 303 if rx == None or rx.match(stat.name): 304 print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type) 305 306 # Name: liststats 307 # Desc: Prints all statistics that appear in the database, 308 # the optional argument is a regular expression that can 309 # be used to prune the result set 310 def listFormulas(self, regex=None): 311 print '%-60s %s' % ('formula name', 'formula') 312 print '-' * 80 313 314 rx = None 315 if regex != None: 316 rx = re.compile(regex) 317 318 stats = [ stat.name for stat in self.allStats ] 319 stats.sort(statcmp) 320 for stat in stats: 321 stat = self.allStatNames[stat] 322 if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)): 323 print '%-60s %s' % (stat.name, self.allFormulas[stat.stat]) 324 325 def getStat(self, stats): 326 if type(stats) is not list: 327 stats = [ stats ] 328 329 ret = [] 330 for stat in stats: 331 if type(stat) is int: 332 ret.append(self.allStatIds[stat]) 333 334 if type(stat) is str: 335 rx = re.compile(stat) 336 for stat in self.allStats: 337 if rx.match(stat.name): 338 ret.append(stat) 339 return ret 340 341 ######################################### 342 # get the data 343 # 344 def query(self, op, stat, ticks, group=False): 345 sql = 'select ' 346 sql += 'dt_stat as stat, ' 347 sql += 'dt_run as run, ' 348 sql += 'dt_x as x, ' 349 sql += 'dt_y as y, ' 350 if group: 351 sql += 'dt_tick as tick, ' 352 sql += '%s(dt_data) as data ' % op 353 sql += 'from data ' 354 sql += 'where ' 355 356 if isinstance(stat, list): 357 val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ]) 358 sql += ' (%s)' % val 359 else: 360 sql += ' dt_stat=%d' % stat.stat 361 362 if self.runs != None and len(self.runs): 363 val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ]) 364 sql += ' and (%s)' % val 365 366 if ticks != None and len(ticks): 367 val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ]) 368 sql += ' and (%s)' % val 369 370 sql += ' group by dt_stat,dt_run,dt_x,dt_y' 371 if group: 372 sql += ',dt_tick' 373 return sql 374 375 # Name: sum 376 # Desc: given a run, a stat and an array of samples, total the samples 377 def sum(self, *args, **kwargs): 378 return self.query('sum', *args, **kwargs) 379 380 # Name: avg 381 # Desc: given a run, a stat and an array of samples, average the samples 382 def avg(self, stat, ticks): 383 return self.query('avg', *args, **kwargs) 384 385 # Name: stdev 386 # Desc: given a run, a stat and an array of samples, get the standard 387 # deviation 388 def stdev(self, stat, ticks): 389 return self.query('stddev', *args, **kwargs) 390 391 def __setattr__(self, attr, value): 392 super(Database, self).__setattr__(attr, value) 393 if attr != 'method': 394 return 395 396 if value == 'sum': 397 self._method = self.sum 398 elif value == 'avg': 399 self._method = self.avg 400 elif value == 'stdev': 401 self._method = self.stdev 402 else: 403 raise AttributeError, "can only set get to: sum | avg | stdev" 404 405 def data(self, stat, ticks=None): 406 if ticks is None: 407 ticks = self.ticks 408 sql = self._method(self, stat, ticks) 409 self.query(sql) 410 411 runs = {} 412 xmax = 0 413 ymax = 0 414 for x in self.cursor.fetchall(): 415 data = Data(x) 416 if not runs.has_key(data.run): 417 runs[data.run] = {} 418 if not runs[data.run].has_key(data.x): 419 runs[data.run][data.x] = {} 420 421 xmax = max(xmax, data.x) 422 ymax = max(ymax, data.y) 423 runs[data.run][data.x][data.y] = data.data 424 425 results = Result(xmax + 1, ymax + 1) 426 for run,data in runs.iteritems(): 427 result = results[run] 428 for x,ydata in data.iteritems(): 429 for y,data in ydata.iteritems(): 430 result[x][y] = data 431 return results 432 433 def __getitem__(self, key): 434 return self.stattop[key] 435