db.py revision 1049
1import MySQLdb, re, string 2 3def statcmp(a, b): 4 v1 = a.split('.') 5 v2 = b.split('.') 6 7 last = min(len(v1), len(v2)) - 1 8 for i,j in zip(v1[0:last], v2[0:last]): 9 if i != j: 10 return cmp(i, j) 11 12 # Special compare for last element. 13 if len(v1) == len(v2): 14 return cmp(v1[last], v2[last]) 15 else: 16 return cmp(len(v1), len(v2)) 17 18class RunData: 19 def __init__(self, row): 20 self.run = int(row[0]) 21 self.name = row[1] 22 self.user = row[2] 23 self.project = row[3] 24 25class SubData: 26 def __init__(self, row): 27 self.stat = int(row[0]) 28 self.x = int(row[1]) 29 self.y = int(row[2]) 30 self.name = row[3] 31 self.descr = row[4] 32 33class Data: 34 def __init__(self, row): 35 if len(row) != 5: 36 raise 'stat db error' 37 self.stat = int(row[0]) 38 self.run = int(row[1]) 39 self.x = int(row[2]) 40 self.y = int(row[3]) 41 self.data = float(row[4]) 42 43 def __repr__(self): 44 return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat, 45 self.run, self.x, self.y, self.data) 46 47class StatData(object): 48 def __init__(self, row): 49 self.stat = int(row[0]) 50 self.name = row[1] 51 self.desc = row[2] 52 self.type = row[3] 53 self.prereq = int(row[5]) 54 self.precision = int(row[6]) 55 56 import flags 57 self.flags = 0 58 if int(row[4]): self.flags |= flags.printable 59 if int(row[7]): self.flags |= flags.nozero 60 if int(row[8]): self.flags |= flags.nonan 61 if int(row[9]): self.flags |= flags.total 62 if int(row[10]): self.flags |= flags.pdf 63 if int(row[11]): self.flags |= flags.cdf 64 65 if self.type == 'DIST' or self.type == 'VECTORDIST': 66 self.min = float(row[12]) 67 self.max = float(row[13]) 68 self.bktsize = float(row[14]) 69 self.size = int(row[15]) 70 71 if self.type == 'FORMULA': 72 self.formula = self.db.allFormulas[self.stat] 73 74class Node(object): 75 def __init__(self, name): 76 self.name = name 77 def __str__(self): 78 return name 79 80class Database(object): 81 def __init__(self): 82 self.host = 'zizzer.pool' 83 self.user = '' 84 self.passwd = '' 85 self.db = 'm5stats' 86 self.cursor = None 87 88 self.allStats = [] 89 self.allStatIds = {} 90 self.allStatNames = {} 91 92 self.allSubData = {} 93 94 self.allRuns = [] 95 self.allRunIds = {} 96 self.allRunNames = {} 97 98 self.allBins = [] 99 self.allBinIds = {} 100 self.allBinNames = {} 101 102 self.allFormulas = {} 103 104 self.stattop = {} 105 self.statdict = {} 106 self.statlist = [] 107 108 self.mode = 'sum'; 109 self.runs = None 110 self.bins = None 111 self.ticks = None 112 self.__dict__['get'] = type(self).sum 113 114 def query(self, sql): 115 self.cursor.execute(sql) 116 117 def update_dict(self, dict): 118 dict.update(self.stattop) 119 120 def append(self, stat): 121 statname = re.sub(':', '__', stat.name) 122 path = string.split(statname, '.') 123 pathtop = path[0] 124 fullname = '' 125 126 x = self 127 while len(path) > 1: 128 name = path.pop(0) 129 if not x.__dict__.has_key(name): 130 x.__dict__[name] = Node(fullname + name) 131 x = x.__dict__[name] 132 fullname = '%s%s.' % (fullname, name) 133 134 name = path.pop(0) 135 x.__dict__[name] = stat 136 137 self.stattop[pathtop] = self.__dict__[pathtop] 138 self.statdict[statname] = stat 139 self.statlist.append(statname) 140 141 def connect(self): 142 # connect 143 self.thedb = MySQLdb.connect(db=self.db, 144 host=self.host, 145 user=self.user, 146 passwd=self.passwd) 147 148 # create a cursor 149 self.cursor = self.thedb.cursor() 150 151 self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project 152 from runs''') 153 for result in self.cursor.fetchall(): 154 run = RunData(result); 155 self.allRuns.append(run) 156 self.allRunIds[run.run] = run 157 self.allRunNames[run.name] = run 158 159 self.query('select * from bins') 160 for id,name in self.cursor.fetchall(): 161 self.allBinIds[int(id)] = name 162 self.allBinNames[name] = int(id) 163 164 self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata') 165 for result in self.cursor.fetchall(): 166 subdata = SubData(result) 167 if self.allSubData.has_key(subdata.stat): 168 self.allSubData[subdata.stat].append(subdata) 169 else: 170 self.allSubData[subdata.stat] = [ subdata ] 171 172 self.query('select * from formulas') 173 for id,formula in self.cursor.fetchall(): 174 self.allFormulas[int(id)] = formula 175 176 StatData.db = self 177 self.query('select * from stats') 178 import info 179 for result in self.cursor.fetchall(): 180 stat = info.NewStat(StatData(result)) 181 self.append(stat) 182 self.allStats.append(stat) 183 self.allStatIds[stat.stat] = stat 184 self.allStatNames[stat.name] = stat 185 186 # Name: listbins 187 # Desc: Prints all bins matching regex argument, if no argument 188 # is given all bins are returned 189 def listBins(self, regex='.*'): 190 print '%-50s %-10s' % ('bin name', 'id') 191 print '-' * 61 192 names = self.allBinNames.keys() 193 names.sort() 194 for name in names: 195 id = self.allBinNames[name] 196 print '%-50s %-10d' % (name, id) 197 198 # Name: listruns 199 # Desc: Prints all runs matching a given user, if no argument 200 # is given all runs are returned 201 def listRuns(self, user=None): 202 print '%-40s %-10s %-5s' % ('run name', 'user', 'id') 203 print '-' * 62 204 for run in self.allRuns: 205 if user == None or user == run.user: 206 print '%-40s %-10s %-10d' % (run.name, run.user, run.run) 207 208 # Name: listTicks 209 # Desc: Prints all samples for a given run 210 def listTicks(self, run=None): 211 print "tick" 212 print "----------------------------------------" 213 sql = 'select distinct dt_tick from data where dt_stat=1950' 214 #if run != None: 215 # sql += ' where dt_run=%d' % run 216 self.query(sql) 217 for r in self.cursor.fetchall(): 218 print r[0] 219 220 # Name: liststats 221 # Desc: Prints all statistics that appear in the database, 222 # the optional argument is a regular expression that can 223 # be used to prune the result set 224 def listStats(self, regex=None): 225 print '%-60s %-8s %-10s' % ('stat name', 'id', 'type') 226 print '-' * 80 227 228 rx = None 229 if regex != None: 230 rx = re.compile(regex) 231 232 stats = [ stat.name for stat in self.allStats ] 233 stats.sort(statcmp) 234 for stat in stats: 235 stat = self.allStatNames[stat] 236 if rx == None or rx.match(stat.name): 237 print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type) 238 239 # Name: liststats 240 # Desc: Prints all statistics that appear in the database, 241 # the optional argument is a regular expression that can 242 # be used to prune the result set 243 def listFormulas(self, regex=None): 244 print '%-60s %s' % ('formula name', 'formula') 245 print '-' * 80 246 247 rx = None 248 if regex != None: 249 rx = re.compile(regex) 250 251 stats = [ stat.name for stat in self.allStats ] 252 stats.sort(statcmp) 253 for stat in stats: 254 stat = self.allStatNames[stat] 255 if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)): 256 print '%-60s %s' % (stat.name, self.allFormulas[stat.stat]) 257 258 def getStat(self, stats): 259 if type(stats) is not list: 260 stats = [ stats ] 261 262 ret = [] 263 for stat in stats: 264 if type(stat) is int: 265 ret.append(self.allStatIds[stat]) 266 267 if type(stat) is str: 268 rx = re.compile(stat) 269 for stat in self.allStats: 270 if rx.match(stat.name): 271 ret.append(stat) 272 return ret 273 274 def getBin(self, bins): 275 if type(bins) is not list: 276 bins = [ bins ] 277 278 ret = [] 279 for bin in bins: 280 if type(bin) is int: 281 ret.append(bin) 282 elif type(bin) is str: 283 ret.append(self.allBinNames[bin]) 284 else: 285 for name,id in self.allBinNames.items(): 286 if bin.match(name): 287 ret.append(id) 288 289 return ret 290 291 def getNotBin(self, bin): 292 map = {} 293 for bin in getBin(bin): 294 map[bin] = 1 295 296 ret = [] 297 for bin in self.allBinIds.keys(): 298 if not map.has_key(bin): 299 ret.append(bin) 300 301 return ret 302 303 ######################################### 304 # get the data 305 # 306 def inner(self, op, stat, bins, ticks, group=False): 307 sql = 'select ' 308 sql += 'dt_stat as stat, ' 309 sql += 'dt_run as run, ' 310 sql += 'dt_x as x, ' 311 sql += 'dt_y as y, ' 312 if group: 313 sql += 'dt_tick as tick, ' 314 sql += '%s(dt_data) as data ' % op 315 sql += 'from data ' 316 sql += 'where ' 317 318 if isinstance(stat, list): 319 val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ]) 320 sql += ' (%s)' % val 321 else: 322 sql += ' dt_stat=%d' % stat.stat 323 324 if self.runs != None and len(self.runs): 325 val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ]) 326 sql += ' and (%s)' % val 327 328 if bins != None and len(bins): 329 val = ' or '.join([ 'dt_bin=%d' % b for b in bins ]) 330 sql += ' and (%s)' % val 331 332 if ticks != None and len(ticks): 333 val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ]) 334 sql += ' and (%s)' % val 335 336 sql += ' group by dt_stat,dt_run,dt_x,dt_y' 337 if group: 338 sql += ',dt_tick' 339 return sql 340 341 def outer(self, op_out, op_in, stat, bins, ticks): 342 sql = self.inner(op_in, stat, bins, ticks, True) 343 sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql) 344 sql += 'group by stat,run,x,y' 345 return sql 346 347 # Name: sum 348 # Desc: given a run, a stat and an array of samples and bins, 349 # sum all the bins and then get the standard deviation of the 350 # samples for non-binned runs. This will just return the average 351 # of samples, however a bin array still must be passed 352 def sum(self, stat, bins, ticks): 353 return self.inner('sum', stat, bins, ticks) 354 355 # Name: avg 356 # Desc: given a run, a stat and an array of samples and bins, 357 # sum all the bins and then average the samples for non-binned 358 # runs this will just return the average of samples, however 359 # a bin array still must be passed 360 def avg(self, stat, bins, ticks): 361 return self.outer('avg', 'sum', stat, bins, ticks) 362 363 # Name: stdev 364 # Desc: given a run, a stat and an array of samples and bins, 365 # sum all the bins and then get the standard deviation of the 366 # samples for non-binned runs. This will just return the average 367 # of samples, however a bin array still must be passed 368 def stdev(self, stat, bins, ticks): 369 return self.outer('stddev', 'sum', stat, bins, ticks) 370 371 def __getattribute__(self, attr): 372 if attr != 'get': 373 return super(Database, self).__getattribute__(attr) 374 375 if self.__dict__['get'] == type(self).sum: 376 return 'sum' 377 elif self.__dict__['get'] == type(self).avg: 378 return 'avg' 379 elif self.__dict__['get'] == type(self).stdev: 380 return 'stdev' 381 else: 382 return '' 383 384 def __setattr__(self, attr, value): 385 if attr != 'get': 386 super(Database, self).__setattr__(attr, value) 387 return 388 389 if value == 'sum': 390 self.__dict__['get'] = type(self).sum 391 elif value == 'avg': 392 self.__dict__['get'] = type(self).avg 393 elif value == 'stdev': 394 self.__dict__['get'] = type(self).stdev 395 else: 396 raise AttributeError, "can only set get to: sum | avg | stdev" 397 398 def data(self, stat, bins=None, ticks=None): 399 if bins is None: 400 bins = self.bins 401 if ticks is None: 402 ticks = self.ticks 403 sql = self.__dict__['get'](self, stat, bins, ticks) 404 self.query(sql) 405 406 runs = {} 407 for x in self.cursor.fetchall(): 408 data = Data(x) 409 if not runs.has_key(data.run): 410 runs[data.run] = {} 411 if not runs[data.run].has_key(data.x): 412 runs[data.run][data.x] = {} 413 414 runs[data.run][data.x][data.y] = data.data 415 return runs 416