db.py revision 1309:7daf185ddae8
1import MySQLdb, re, string 2 3def statcmp(a, b): 4 v1 = a.split('.') 5 v2 = b.split('.') 6 7 last = min(len(v1), len(v2)) - 1 8 for i,j in zip(v1[0:last], v2[0:last]): 9 if i != j: 10 return cmp(i, j) 11 12 # Special compare for last element. 13 if len(v1) == len(v2): 14 return cmp(v1[last], v2[last]) 15 else: 16 return cmp(len(v1), len(v2)) 17 18class RunData: 19 def __init__(self, row): 20 self.run = int(row[0]) 21 self.name = row[1] 22 self.user = row[2] 23 self.project = row[3] 24 25class SubData: 26 def __init__(self, row): 27 self.stat = int(row[0]) 28 self.x = int(row[1]) 29 self.y = int(row[2]) 30 self.name = row[3] 31 self.descr = row[4] 32 33class Data: 34 def __init__(self, row): 35 if len(row) != 5: 36 raise 'stat db error' 37 self.stat = int(row[0]) 38 self.run = int(row[1]) 39 self.x = int(row[2]) 40 self.y = int(row[3]) 41 self.data = float(row[4]) 42 43 def __repr__(self): 44 return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat, 45 self.run, self.x, self.y, self.data) 46 47class StatData(object): 48 def __init__(self, row): 49 self.stat = int(row[0]) 50 self.name = row[1] 51 self.desc = row[2] 52 self.type = row[3] 53 self.prereq = int(row[5]) 54 self.precision = int(row[6]) 55 56 import flags 57 self.flags = 0 58 if int(row[4]): self.flags |= flags.printable 59 if int(row[7]): self.flags |= flags.nozero 60 if int(row[8]): self.flags |= flags.nonan 61 if int(row[9]): self.flags |= flags.total 62 if int(row[10]): self.flags |= flags.pdf 63 if int(row[11]): self.flags |= flags.cdf 64 65 if self.type == 'DIST' or self.type == 'VECTORDIST': 66 self.min = float(row[12]) 67 self.max = float(row[13]) 68 self.bktsize = float(row[14]) 69 self.size = int(row[15]) 70 71 if self.type == 'FORMULA': 72 self.formula = self.db.allFormulas[self.stat] 73 74class Node(object): 75 def __init__(self, name): 76 self.name = name 77 def __str__(self): 78 return name 79 80class Database(object): 81 def __init__(self): 82 self.host = 'zizzer.pool' 83 self.user = '' 84 self.passwd = '' 85 self.db = 'm5stats' 86 self.cursor = None 87 88 self.allStats = [] 89 self.allStatIds = {} 90 self.allStatNames = {} 91 92 self.allSubData = {} 93 94 self.allRuns = [] 95 self.allRunIds = {} 96 self.allRunNames = {} 97 98 self.allBins = [] 99 self.allBinIds = {} 100 self.allBinNames = {} 101 102 self.allFormulas = {} 103 104 self.stattop = {} 105 self.statdict = {} 106 self.statlist = [] 107 108 self.mode = 'sum'; 109 self.runs = None 110 self.bins = None 111 self.ticks = None 112 self.__dict__['get'] = type(self).sum 113 114 def query(self, sql): 115 self.cursor.execute(sql) 116 117 def update_dict(self, dict): 118 dict.update(self.stattop) 119 120 def append(self, stat): 121 statname = re.sub(':', '__', stat.name) 122 path = string.split(statname, '.') 123 pathtop = path[0] 124 fullname = '' 125 126 x = self 127 while len(path) > 1: 128 name = path.pop(0) 129 if not x.__dict__.has_key(name): 130 x.__dict__[name] = Node(fullname + name) 131 x = x.__dict__[name] 132 fullname = '%s%s.' % (fullname, name) 133 134 name = path.pop(0) 135 x.__dict__[name] = stat 136 137 self.stattop[pathtop] = self.__dict__[pathtop] 138 self.statdict[statname] = stat 139 self.statlist.append(statname) 140 141 def connect(self): 142 # connect 143 self.thedb = MySQLdb.connect(db=self.db, 144 host=self.host, 145 user=self.user, 146 passwd=self.passwd) 147 148 # create a cursor 149 self.cursor = self.thedb.cursor() 150 151 self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project 152 from runs''') 153 for result in self.cursor.fetchall(): 154 run = RunData(result); 155 self.allRuns.append(run) 156 self.allRunIds[run.run] = run 157 self.allRunNames[run.name] = run 158 159 self.query('select * from bins') 160 for id,name in self.cursor.fetchall(): 161 self.allBinIds[int(id)] = name 162 self.allBinNames[name] = int(id) 163 164 self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata') 165 for result in self.cursor.fetchall(): 166 subdata = SubData(result) 167 if self.allSubData.has_key(subdata.stat): 168 self.allSubData[subdata.stat].append(subdata) 169 else: 170 self.allSubData[subdata.stat] = [ subdata ] 171 172 self.query('select * from formulas') 173 for id,formula in self.cursor.fetchall(): 174 self.allFormulas[int(id)] = formula.tostring() 175 176 StatData.db = self 177 self.query('select * from stats') 178 import info 179 for result in self.cursor.fetchall(): 180 stat = info.NewStat(StatData(result)) 181 self.append(stat) 182 self.allStats.append(stat) 183 self.allStatIds[stat.stat] = stat 184 self.allStatNames[stat.name] = stat 185 186 # Name: listbins 187 # Desc: Prints all bins matching regex argument, if no argument 188 # is given all bins are returned 189 def listBins(self, regex='.*'): 190 print '%-50s %-10s' % ('bin name', 'id') 191 print '-' * 61 192 names = self.allBinNames.keys() 193 names.sort() 194 for name in names: 195 id = self.allBinNames[name] 196 print '%-50s %-10d' % (name, id) 197 198 # Name: listruns 199 # Desc: Prints all runs matching a given user, if no argument 200 # is given all runs are returned 201 def listRuns(self, user=None): 202 print '%-40s %-10s %-5s' % ('run name', 'user', 'id') 203 print '-' * 62 204 for run in self.allRuns: 205 if user == None or user == run.user: 206 print '%-40s %-10s %-10d' % (run.name, run.user, run.run) 207 208 # Name: listTicks 209 # Desc: Prints all samples for a given run 210 def listTicks(self, runs=None): 211 print "tick" 212 print "----------------------------------------" 213 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 214 if runs != None: 215 first = True 216 for run in runs: 217 if first: 218 # sql += ' where' 219 first = False 220 else: 221 sql += ' or' 222 sql += ' dt_run=%s' % run.run 223 sql += ')' 224 self.query(sql) 225 for r in self.cursor.fetchall(): 226 print r[0] 227 228 # Name: retTicks 229 # Desc: Prints all samples for a given run 230 def retTicks(self, runs=None): 231 sql = 'select distinct dt_tick from data where dt_stat=1180 and (' 232 if runs != None: 233 first = True 234 for run in runs: 235 if first: 236 first = False 237 else: 238 sql += ' or' 239 sql += ' dt_run=%s' % run.run 240 sql += ')' 241 self.query(sql) 242 ret = [] 243 for r in self.cursor.fetchall(): 244 ret.append(r[0]) 245 return ret 246 247 # Name: liststats 248 # Desc: Prints all statistics that appear in the database, 249 # the optional argument is a regular expression that can 250 # be used to prune the result set 251 def listStats(self, regex=None): 252 print '%-60s %-8s %-10s' % ('stat name', 'id', 'type') 253 print '-' * 80 254 255 rx = None 256 if regex != None: 257 rx = re.compile(regex) 258 259 stats = [ stat.name for stat in self.allStats ] 260 stats.sort(statcmp) 261 for stat in stats: 262 stat = self.allStatNames[stat] 263 if rx == None or rx.match(stat.name): 264 print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type) 265 266 # Name: liststats 267 # Desc: Prints all statistics that appear in the database, 268 # the optional argument is a regular expression that can 269 # be used to prune the result set 270 def listFormulas(self, regex=None): 271 print '%-60s %s' % ('formula name', 'formula') 272 print '-' * 80 273 274 rx = None 275 if regex != None: 276 rx = re.compile(regex) 277 278 stats = [ stat.name for stat in self.allStats ] 279 stats.sort(statcmp) 280 for stat in stats: 281 stat = self.allStatNames[stat] 282 if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)): 283 print '%-60s %s' % (stat.name, self.allFormulas[stat.stat]) 284 285 def getStat(self, stats): 286 if type(stats) is not list: 287 stats = [ stats ] 288 289 ret = [] 290 for stat in stats: 291 if type(stat) is int: 292 ret.append(self.allStatIds[stat]) 293 294 if type(stat) is str: 295 rx = re.compile(stat) 296 for stat in self.allStats: 297 if rx.match(stat.name): 298 ret.append(stat) 299 return ret 300 301 def getBin(self, bins): 302 if type(bins) is not list: 303 bins = [ bins ] 304 305 ret = [] 306 for bin in bins: 307 if type(bin) is int: 308 ret.append(bin) 309 elif type(bin) is str: 310 ret.append(self.allBinNames[bin]) 311 else: 312 for name,id in self.allBinNames.items(): 313 if bin.match(name): 314 ret.append(id) 315 316 return ret 317 318 def getNotBin(self, bin): 319 map = {} 320 for bin in getBin(bin): 321 map[bin] = 1 322 323 ret = [] 324 for bin in self.allBinIds.keys(): 325 if not map.has_key(bin): 326 ret.append(bin) 327 328 return ret 329 330 ######################################### 331 # get the data 332 # 333 def inner(self, op, stat, bins, ticks, group=False): 334 sql = 'select ' 335 sql += 'dt_stat as stat, ' 336 sql += 'dt_run as run, ' 337 sql += 'dt_x as x, ' 338 sql += 'dt_y as y, ' 339 if group: 340 sql += 'dt_tick as tick, ' 341 sql += '%s(dt_data) as data ' % op 342 sql += 'from data ' 343 sql += 'where ' 344 345 if isinstance(stat, list): 346 val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ]) 347 sql += ' (%s)' % val 348 else: 349 sql += ' dt_stat=%d' % stat.stat 350 351 if self.runs != None and len(self.runs): 352 val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ]) 353 sql += ' and (%s)' % val 354 355 if bins != None and len(bins): 356 val = ' or '.join([ 'dt_bin=%d' % b for b in bins ]) 357 sql += ' and (%s)' % val 358 359 if ticks != None and len(ticks): 360 val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ]) 361 sql += ' and (%s)' % val 362 363 sql += ' group by dt_stat,dt_run,dt_x,dt_y' 364 if group: 365 sql += ',dt_tick' 366 return sql 367 368 def outer(self, op_out, op_in, stat, bins, ticks): 369 sql = self.inner(op_in, stat, bins, ticks, True) 370 sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql) 371 sql += 'group by stat,run,x,y' 372 return sql 373 374 # Name: sum 375 # Desc: given a run, a stat and an array of samples and bins, 376 # sum all the bins and then get the standard deviation of the 377 # samples for non-binned runs. This will just return the average 378 # of samples, however a bin array still must be passed 379 def sum(self, stat, bins, ticks): 380 return self.inner('sum', stat, bins, ticks) 381 382 # Name: avg 383 # Desc: given a run, a stat and an array of samples and bins, 384 # sum all the bins and then average the samples for non-binned 385 # runs this will just return the average of samples, however 386 # a bin array still must be passed 387 def avg(self, stat, bins, ticks): 388 return self.outer('avg', 'sum', stat, bins, ticks) 389 390 # Name: stdev 391 # Desc: given a run, a stat and an array of samples and bins, 392 # sum all the bins and then get the standard deviation of the 393 # samples for non-binned runs. This will just return the average 394 # of samples, however a bin array still must be passed 395 def stdev(self, stat, bins, ticks): 396 return self.outer('stddev', 'sum', stat, bins, ticks) 397 398 def __getattribute__(self, attr): 399 if attr != 'get': 400 return super(Database, self).__getattribute__(attr) 401 402 if self.__dict__['get'] == type(self).sum: 403 return 'sum' 404 elif self.__dict__['get'] == type(self).avg: 405 return 'avg' 406 elif self.__dict__['get'] == type(self).stdev: 407 return 'stdev' 408 else: 409 return '' 410 411 def __setattr__(self, attr, value): 412 if attr != 'get': 413 super(Database, self).__setattr__(attr, value) 414 return 415 416 if value == 'sum': 417 self.__dict__['get'] = type(self).sum 418 elif value == 'avg': 419 self.__dict__['get'] = type(self).avg 420 elif value == 'stdev': 421 self.__dict__['get'] = type(self).stdev 422 else: 423 raise AttributeError, "can only set get to: sum | avg | stdev" 424 425 def data(self, stat, bins=None, ticks=None): 426 if bins is None: 427 bins = self.bins 428 if ticks is None: 429 ticks = self.ticks 430 sql = self.__dict__['get'](self, stat, bins, ticks) 431 self.query(sql) 432 433 runs = {} 434 for x in self.cursor.fetchall(): 435 data = Data(x) 436 if not runs.has_key(data.run): 437 runs[data.run] = {} 438 if not runs[data.run].has_key(data.x): 439 runs[data.run][data.x] = {} 440 441 runs[data.run][data.x][data.y] = data.data 442 return runs 443