db.py revision 1929
18853Sandreas.hansson@arm.com# Copyright (c) 2003-2004 The Regents of The University of Michigan 212532Sandreas.sandberg@arm.com# All rights reserved. 38853Sandreas.hansson@arm.com# 48853Sandreas.hansson@arm.com# Redistribution and use in source and binary forms, with or without 58853Sandreas.hansson@arm.com# modification, are permitted provided that the following conditions are 68853Sandreas.hansson@arm.com# met: redistributions of source code must retain the above copyright 78853Sandreas.hansson@arm.com# notice, this list of conditions and the following disclaimer; 88853Sandreas.hansson@arm.com# redistributions in binary form must reproduce the above copyright 98853Sandreas.hansson@arm.com# notice, this list of conditions and the following disclaimer in the 108853Sandreas.hansson@arm.com# documentation and/or other materials provided with the distribution; 118853Sandreas.hansson@arm.com# neither the name of the copyright holders nor the names of its 128853Sandreas.hansson@arm.com# contributors may be used to endorse or promote products derived from 138853Sandreas.hansson@arm.com# this software without specific prior written permission. 148853Sandreas.hansson@arm.com# 158853Sandreas.hansson@arm.com# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 168853Sandreas.hansson@arm.com# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 178853Sandreas.hansson@arm.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 188853Sandreas.hansson@arm.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 198853Sandreas.hansson@arm.com# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 208853Sandreas.hansson@arm.com# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 218853Sandreas.hansson@arm.com# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 228853Sandreas.hansson@arm.com# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 238853Sandreas.hansson@arm.com# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 248853Sandreas.hansson@arm.com# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 258853Sandreas.hansson@arm.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 268853Sandreas.hansson@arm.com 278853Sandreas.hansson@arm.comimport MySQLdb, re, string 288853Sandreas.hansson@arm.com 298853Sandreas.hansson@arm.comdef statcmp(a, b): 308853Sandreas.hansson@arm.com v1 = a.split('.') 318853Sandreas.hansson@arm.com v2 = b.split('.') 328853Sandreas.hansson@arm.com 338853Sandreas.hansson@arm.com last = min(len(v1), len(v2)) - 1 348853Sandreas.hansson@arm.com for i,j in zip(v1[0:last], v2[0:last]): 358853Sandreas.hansson@arm.com if i != j: 368853Sandreas.hansson@arm.com return cmp(i, j) 378853Sandreas.hansson@arm.com 388853Sandreas.hansson@arm.com # Special compare for last element. 398853Sandreas.hansson@arm.com if len(v1) == len(v2): 4011793Sbrandon.potter@amd.com return cmp(v1[last], v2[last]) 4111793Sbrandon.potter@amd.com else: 428853Sandreas.hansson@arm.com return cmp(len(v1), len(v2)) 438853Sandreas.hansson@arm.com 448853Sandreas.hansson@arm.comclass RunData: 4512532Sandreas.sandberg@arm.com def __init__(self, row): 4614009Sgabeblack@google.com self.run = int(row[0]) 478853Sandreas.hansson@arm.com self.name = row[1] 4810564Sandreas.hansson@arm.com self.user = row[2] 4910564Sandreas.hansson@arm.com self.project = row[3] 5012749Sgiacomo.travaglini@arm.com 5112749Sgiacomo.travaglini@arm.comclass SubData: 5212749Sgiacomo.travaglini@arm.com def __init__(self, row): 5312749Sgiacomo.travaglini@arm.com self.stat = int(row[0]) 5412749Sgiacomo.travaglini@arm.com self.x = int(row[1]) 5514009Sgabeblack@google.com self.y = int(row[2]) 5614196Sgabeblack@google.com self.name = row[3] 5714009Sgabeblack@google.com self.descr = row[4] 588853Sandreas.hansson@arm.com 598853Sandreas.hansson@arm.comclass Data: 608853Sandreas.hansson@arm.com def __init__(self, row): 618853Sandreas.hansson@arm.com if len(row) != 5: 6212532Sandreas.sandberg@arm.com raise 'stat db error' 6314009Sgabeblack@google.com self.stat = int(row[0]) 6410564Sandreas.hansson@arm.com self.run = int(row[1]) 6510564Sandreas.hansson@arm.com self.x = int(row[2]) 6610564Sandreas.hansson@arm.com self.y = int(row[3]) 6712749Sgiacomo.travaglini@arm.com self.data = float(row[4]) 6812749Sgiacomo.travaglini@arm.com 6912749Sgiacomo.travaglini@arm.com def __repr__(self): 7012749Sgiacomo.travaglini@arm.com return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat, 7112749Sgiacomo.travaglini@arm.com self.run, self.x, self.y, self.data) 7214009Sgabeblack@google.com 7314196Sgabeblack@google.comclass StatData(object): 7414009Sgabeblack@google.com def __init__(self, row): 7510564Sandreas.hansson@arm.com self.stat = int(row[0]) 7610564Sandreas.hansson@arm.com self.name = row[1] 7710564Sandreas.hansson@arm.com self.desc = row[2] 7810564Sandreas.hansson@arm.com self.type = row[3] 7912532Sandreas.sandberg@arm.com self.prereq = int(row[5]) 8012532Sandreas.sandberg@arm.com self.precision = int(row[6]) 818853Sandreas.hansson@arm.com 828853Sandreas.hansson@arm.com import flags 838853Sandreas.hansson@arm.com self.flags = 0 848853Sandreas.hansson@arm.com if int(row[4]): self.flags |= flags.printable 858853Sandreas.hansson@arm.com if int(row[7]): self.flags |= flags.nozero 8612532Sandreas.sandberg@arm.com if int(row[8]): self.flags |= flags.nonan 878853Sandreas.hansson@arm.com if int(row[9]): self.flags |= flags.total 888853Sandreas.hansson@arm.com if int(row[10]): self.flags |= flags.pdf 898853Sandreas.hansson@arm.com if int(row[11]): self.flags |= flags.cdf 9014008Sgabeblack@google.com 9114008Sgabeblack@google.com if self.type == 'DIST' or self.type == 'VECTORDIST': 9214008Sgabeblack@google.com self.min = float(row[12]) 9314008Sgabeblack@google.com self.max = float(row[13]) 9414008Sgabeblack@google.com self.bktsize = float(row[14]) 9514009Sgabeblack@google.com self.size = int(row[15]) 9614008Sgabeblack@google.com 9714008Sgabeblack@google.com if self.type == 'FORMULA': 9814008Sgabeblack@google.com self.formula = self.db.allFormulas[self.stat] 9914008Sgabeblack@google.com 10014008Sgabeblack@google.comclass Node(object): 10114008Sgabeblack@google.com def __init__(self, name): 10214008Sgabeblack@google.com self.name = name 10314008Sgabeblack@google.com def __str__(self): 10414008Sgabeblack@google.com return self.name 10514008Sgabeblack@google.com 10614008Sgabeblack@google.comclass Result(object): 10714008Sgabeblack@google.com def __init__(self, x, y): 10814008Sgabeblack@google.com self.data = {} 10914008Sgabeblack@google.com self.x = x 11014008Sgabeblack@google.com self.y = y 11114008Sgabeblack@google.com 11214008Sgabeblack@google.com def __contains__(self, run): 11314012Sgabeblack@google.com return run in self.data 11414012Sgabeblack@google.com 11514012Sgabeblack@google.com def __getitem__(self, run): 11614012Sgabeblack@google.com if run not in self.data: 11714012Sgabeblack@google.com self.data[run] = [ [ 0.0 ] * self.y for i in xrange(self.x) ] 11814012Sgabeblack@google.com return self.data[run] 11914012Sgabeblack@google.com 12014012Sgabeblack@google.comclass Database(object): 12114012Sgabeblack@google.com def __init__(self): 12214012Sgabeblack@google.com self.host = 'zizzer.pool' 12314012Sgabeblack@google.com self.user = '' 12414012Sgabeblack@google.com self.passwd = '' 12514012Sgabeblack@google.com self.db = 'm5stats' 12614012Sgabeblack@google.com self.cursor = None 12714012Sgabeblack@google.com 128 self.allStats = [] 129 self.allStatIds = {} 130 self.allStatNames = {} 131 132 self.allSubData = {} 133 134 self.allRuns = [] 135 self.allRunIds = {} 136 self.allRunNames = {} 137 138 self.allBins = [] 139 self.allBinIds = {} 140 self.allBinNames = {} 141 142 self.allFormulas = {} 143 144 self.stattop = {} 145 self.statdict = {} 146 self.statlist = [] 147 148 self.mode = 'sum'; 149 self.runs = None 150 self.bins = None 151 self.ticks = None 152 self.method = 'sum' 153 self._method = type(self).sum 154 155 def get(self, job, stat): 156 run = self.allRunNames.get(job.name, None) 157 if run is None: 158 print 'run "%s" not found' % job 159 return None 160 161 from info import scalar, vector, value, total, len 162 stat.system = self[job.system] 163 if scalar(stat): 164 return value(stat, run.run) 165 if vector(stat): 166 return values(stat, run.run) 167 168 return None 169 170 def query(self, sql): 171 self.cursor.execute(sql) 172 173 def update_dict(self, dict): 174 dict.update(self.stattop) 175 176 def append(self, stat): 177 statname = re.sub(':', '__', stat.name) 178 path = string.split(statname, '.') 179 pathtop = path[0] 180 fullname = '' 181 182 x = self 183 while len(path) > 1: 184 name = path.pop(0) 185 if not x.__dict__.has_key(name): 186 x.__dict__[name] = Node(fullname + name) 187 x = x.__dict__[name] 188 fullname = '%s%s.' % (fullname, name) 189 190 name = path.pop(0) 191 x.__dict__[name] = stat 192 193 self.stattop[pathtop] = self.__dict__[pathtop] 194 self.statdict[statname] = stat 195 self.statlist.append(statname) 196 197 def connect(self): 198 # connect 199 self.thedb = MySQLdb.connect(db=self.db, 200 host=self.host, 201 user=self.user, 202 passwd=self.passwd) 203 204 # create a cursor 205 self.cursor = self.thedb.cursor() 206 207 self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project 208 from runs''') 209 for result in self.cursor.fetchall(): 210 run = RunData(result); 211 self.allRuns.append(run) 212 self.allRunIds[run.run] = run 213 self.allRunNames[run.name] = run 214 215 self.query('select * from bins') 216 for id,name in self.cursor.fetchall(): 217 self.allBinIds[int(id)] = name 218 self.allBinNames[name] = int(id) 219 220 self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata') 221 for result in self.cursor.fetchall(): 222 subdata = SubData(result) 223 if self.allSubData.has_key(subdata.stat): 224 self.allSubData[subdata.stat].append(subdata) 225 else: 226 self.allSubData[subdata.stat] = [ subdata ] 227 228 self.query('select * from formulas') 229 for id,formula in self.cursor.fetchall(): 230 self.allFormulas[int(id)] = formula.tostring() 231 232 StatData.db = self 233 self.query('select * from stats') 234 import info 235 for result in self.cursor.fetchall(): 236 stat = info.NewStat(self, StatData(result)) 237 self.append(stat) 238 self.allStats.append(stat) 239 self.allStatIds[stat.stat] = stat 240 self.allStatNames[stat.name] = stat 241 242 # Name: listbins 243 # Desc: Prints all bins matching regex argument, if no argument 244 # is given all bins are returned 245 def listBins(self, regex='.*'): 246 print '%-50s %-10s' % ('bin name', 'id') 247 print '-' * 61 248 names = self.allBinNames.keys() 249 names.sort() 250 for name in names: 251 id = self.allBinNames[name] 252 print '%-50s %-10d' % (name, id) 253 254 # Name: listruns 255 # Desc: Prints all runs matching a given user, if no argument 256 # is given all runs are returned 257 def listRuns(self, user=None): 258 print '%-40s %-10s %-5s' % ('run name', 'user', 'id') 259 print '-' * 62 260 for run in self.allRuns: 261 if user == None or user == run.user: 262 print '%-40s %-10s %-10d' % (run.name, run.user, run.run) 263 264 # Name: listTicks 265 # Desc: Prints all samples for a given run 266 def listTicks(self, runs=None): 267 print "tick" 268 print "----------------------------------------" 269 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 270 if runs != None: 271 first = True 272 for run in runs: 273 if first: 274 # sql += ' where' 275 first = False 276 else: 277 sql += ' or' 278 sql += ' dt_run=%s' % run.run 279 sql += ')' 280 self.query(sql) 281 for r in self.cursor.fetchall(): 282 print r[0] 283 284 # Name: retTicks 285 # Desc: Prints all samples for a given run 286 def retTicks(self, runs=None): 287 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 288 if runs != None: 289 first = True 290 for run in runs: 291 if first: 292 first = False 293 else: 294 sql += ' or' 295 sql += ' dt_run=%s' % run.run 296 sql += ')' 297 self.query(sql) 298 ret = [] 299 for r in self.cursor.fetchall(): 300 ret.append(r[0]) 301 return ret 302 303 # Name: liststats 304 # Desc: Prints all statistics that appear in the database, 305 # the optional argument is a regular expression that can 306 # be used to prune the result set 307 def listStats(self, regex=None): 308 print '%-60s %-8s %-10s' % ('stat name', 'id', 'type') 309 print '-' * 80 310 311 rx = None 312 if regex != None: 313 rx = re.compile(regex) 314 315 stats = [ stat.name for stat in self.allStats ] 316 stats.sort(statcmp) 317 for stat in stats: 318 stat = self.allStatNames[stat] 319 if rx == None or rx.match(stat.name): 320 print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type) 321 322 # Name: liststats 323 # Desc: Prints all statistics that appear in the database, 324 # the optional argument is a regular expression that can 325 # be used to prune the result set 326 def listFormulas(self, regex=None): 327 print '%-60s %s' % ('formula name', 'formula') 328 print '-' * 80 329 330 rx = None 331 if regex != None: 332 rx = re.compile(regex) 333 334 stats = [ stat.name for stat in self.allStats ] 335 stats.sort(statcmp) 336 for stat in stats: 337 stat = self.allStatNames[stat] 338 if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)): 339 print '%-60s %s' % (stat.name, self.allFormulas[stat.stat]) 340 341 def getStat(self, stats): 342 if type(stats) is not list: 343 stats = [ stats ] 344 345 ret = [] 346 for stat in stats: 347 if type(stat) is int: 348 ret.append(self.allStatIds[stat]) 349 350 if type(stat) is str: 351 rx = re.compile(stat) 352 for stat in self.allStats: 353 if rx.match(stat.name): 354 ret.append(stat) 355 return ret 356 357 def getBin(self, bins): 358 if type(bins) is not list: 359 bins = [ bins ] 360 361 ret = [] 362 for bin in bins: 363 if type(bin) is int: 364 ret.append(bin) 365 elif type(bin) is str: 366 ret.append(self.allBinNames[bin]) 367 else: 368 for name,id in self.allBinNames.items(): 369 if bin.match(name): 370 ret.append(id) 371 372 return ret 373 374 def getNotBin(self, bin): 375 map = {} 376 for bin in getBin(bin): 377 map[bin] = 1 378 379 ret = [] 380 for bin in self.allBinIds.keys(): 381 if not map.has_key(bin): 382 ret.append(bin) 383 384 return ret 385 386 ######################################### 387 # get the data 388 # 389 def inner(self, op, stat, bins, ticks, group=False): 390 sql = 'select ' 391 sql += 'dt_stat as stat, ' 392 sql += 'dt_run as run, ' 393 sql += 'dt_x as x, ' 394 sql += 'dt_y as y, ' 395 if group: 396 sql += 'dt_tick as tick, ' 397 sql += '%s(dt_data) as data ' % op 398 sql += 'from data ' 399 sql += 'where ' 400 401 if isinstance(stat, list): 402 val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ]) 403 sql += ' (%s)' % val 404 else: 405 sql += ' dt_stat=%d' % stat.stat 406 407 if self.runs != None and len(self.runs): 408 val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ]) 409 sql += ' and (%s)' % val 410 411 if bins != None and len(bins): 412 val = ' or '.join([ 'dt_bin=%d' % b for b in bins ]) 413 sql += ' and (%s)' % val 414 415 if ticks != None and len(ticks): 416 val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ]) 417 sql += ' and (%s)' % val 418 419 sql += ' group by dt_stat,dt_run,dt_x,dt_y' 420 if group: 421 sql += ',dt_tick' 422 return sql 423 424 def outer(self, op_out, op_in, stat, bins, ticks): 425 sql = self.inner(op_in, stat, bins, ticks, True) 426 sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql) 427 sql += 'group by stat,run,x,y' 428 return sql 429 430 # Name: sum 431 # Desc: given a run, a stat and an array of samples and bins, 432 # sum all the bins and then get the standard deviation of the 433 # samples for non-binned runs. This will just return the average 434 # of samples, however a bin array still must be passed 435 def sum(self, stat, bins, ticks): 436 return self.inner('sum', stat, bins, ticks) 437 438 # Name: avg 439 # Desc: given a run, a stat and an array of samples and bins, 440 # sum all the bins and then average the samples for non-binned 441 # runs this will just return the average of samples, however 442 # a bin array still must be passed 443 def avg(self, stat, bins, ticks): 444 return self.outer('avg', 'sum', stat, bins, ticks) 445 446 # Name: stdev 447 # Desc: given a run, a stat and an array of samples and bins, 448 # sum all the bins and then get the standard deviation of the 449 # samples for non-binned runs. This will just return the average 450 # of samples, however a bin array still must be passed 451 def stdev(self, stat, bins, ticks): 452 return self.outer('stddev', 'sum', stat, bins, ticks) 453 454 def __setattr__(self, attr, value): 455 super(Database, self).__setattr__(attr, value) 456 if attr != 'method': 457 return 458 459 if value == 'sum': 460 self._method = self.sum 461 elif value == 'avg': 462 self._method = self.avg 463 elif value == 'stdev': 464 self._method = self.stdev 465 else: 466 raise AttributeError, "can only set get to: sum | avg | stdev" 467 468 def data(self, stat, bins=None, ticks=None): 469 if bins is None: 470 bins = self.bins 471 if ticks is None: 472 ticks = self.ticks 473 sql = self._method(self, stat, bins, ticks) 474 self.query(sql) 475 476 runs = {} 477 xmax = 0 478 ymax = 0 479 for x in self.cursor.fetchall(): 480 data = Data(x) 481 if not runs.has_key(data.run): 482 runs[data.run] = {} 483 if not runs[data.run].has_key(data.x): 484 runs[data.run][data.x] = {} 485 486 xmax = max(xmax, data.x) 487 ymax = max(ymax, data.y) 488 runs[data.run][data.x][data.y] = data.data 489 490 results = Result(xmax + 1, ymax + 1) 491 for run,data in runs.iteritems(): 492 result = results[run] 493 for x,ydata in data.iteritems(): 494 for y,data in ydata.iteritems(): 495 result[x][y] = data 496 return results 497 498 def __getitem__(self, key): 499 return self.stattop[key] 500