profile.py revision 2014:7df693ff6fa4
1# Copyright (c) 2005 The Regents of The University of Michigan 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer; 8# redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution; 11# neither the name of the copyright holders nor the names of its 12# contributors may be used to endorse or promote products derived from 13# this software without specific prior written permission. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27from orderdict import orderdict 28import output 29 30class FileData(dict): 31 def __init__(self, filename): 32 self.filename = filename 33 fd = file(filename) 34 current = [] 35 for line in fd: 36 line = line.strip() 37 if line.startswith('>>>'): 38 current = [] 39 self[line[3:]] = current 40 else: 41 current.append(line) 42 fd.close() 43 44class RunData(dict): 45 def __init__(self, filename): 46 self.filename = filename 47 48 def __getattribute__(self, attr): 49 if attr == 'total': 50 total = 0.0 51 for value in self.itervalues(): 52 total += value 53 return total 54 55 if attr == 'filedata': 56 return FileData(self.filename) 57 58 if attr == 'maxsymlen': 59 return max([ len(sym) for sym in self.iterkeys() ]) 60 61 return super(RunData, self).__getattribute__(attr) 62 63 def display(self, output=None, limit=None, maxsymlen=None): 64 if not output: 65 import sys 66 output = sys.stdout 67 elif isinstance(output, str): 68 output = file(output, 'w') 69 70 total = float(self.total) 71 72 # swap (string,count) order so we can sort on count 73 symbols = [ (count,name) for name,count in self.iteritems() ] 74 symbols.sort(reverse=True) 75 if limit is not None: 76 symbols = symbols[:limit] 77 78 if not maxsymlen: 79 maxsymlen = self.maxsymlen 80 81 symbolf = "%-" + str(maxsymlen + 1) + "s %.2f%%" 82 for number,name in symbols: 83 print >>output, symbolf % (name, 100.0 * (float(number) / total)) 84 85class PCData(RunData): 86 def __init__(self, filename=None, categorize=None, showidle=True): 87 super(PCData, self).__init__(self, filename) 88 89 filedata = self.filedata['PC data'] 90 for line in filedata: 91 (symbol, count) = line.split() 92 if symbol == "0x0": 93 continue 94 count = int(count) 95 96 if categorize is not None: 97 category = categorize(symbol) 98 if category is None: 99 category = 'other' 100 elif category == 'idle' and not showidle: 101 continue 102 103 self[category] = count 104 105class FuncNode(object): 106 def __new__(cls, filedata=None): 107 if filedata is None: 108 return super(FuncNode, cls).__new__(cls) 109 110 nodes = {} 111 for line in filedata['function data']: 112 data = line.split(' ') 113 node_id = long(data[0], 16) 114 node = FuncNode() 115 node.symbol = data[1] 116 if node.symbol == '': 117 node.symbol = 'unknown' 118 node.count = long(data[2]) 119 node.children = [ long(child, 16) for child in data[3:] ] 120 nodes[node_id] = node 121 122 for node in nodes.itervalues(): 123 children = [] 124 for cid in node.children: 125 child = nodes[cid] 126 children.append(child) 127 child.parent = node 128 node.children = tuple(children) 129 if not nodes: 130 print filedata.filename 131 print nodes 132 return nodes[0] 133 134 def total(self): 135 total = self.count 136 for child in self.children: 137 total += child.total() 138 139 return total 140 141 def aggregate(self, dict, categorize, incategory): 142 category = None 143 if categorize: 144 category = categorize(self.symbol) 145 146 total = self.count 147 for child in self.children: 148 total += child.aggregate(dict, categorize, category or incategory) 149 150 if category: 151 dict[category] = dict.get(category, 0) + total 152 return 0 153 elif not incategory: 154 dict[self.symbol] = dict.get(self.symbol, 0) + total 155 156 return total 157 158 def dump(self): 159 kids = [ child.symbol for child in self.children] 160 print '%s %d <%s>' % (self.symbol, self.count, ', '.join(kids)) 161 for child in self.children: 162 child.dump() 163 164 def _dot(self, dot, threshold, categorize, total): 165 from pydot import Dot, Edge, Node 166 self.dot_node = None 167 168 value = self.total() * 100.0 / total 169 if value < threshold: 170 return 171 if categorize: 172 category = categorize(self.symbol) 173 if category and category != 'other': 174 return 175 label = '%s %.2f%%' % (self.symbol, value) 176 self.dot_node = Node(self, label=label) 177 dot.add_node(self.dot_node) 178 179 for child in self.children: 180 child._dot(dot, threshold, categorize, total) 181 if child.dot_node is not None: 182 dot.add_edge(Edge(self, child)) 183 184 def _cleandot(self): 185 for child in self.children: 186 child._cleandot() 187 self.dot_node = None 188 del self.__dict__['dot_node'] 189 190 def dot(self, dot, threshold=0.1, categorize=None): 191 self._dot(dot, threshold, categorize, self.total()) 192 self._cleandot() 193 194class FuncData(RunData): 195 def __init__(self, filename, categorize=None): 196 super(FuncData, self).__init__(filename) 197 tree = self.tree 198 tree.aggregate(self, categorize, incategory=False) 199 self.total = tree.total() 200 201 def __getattribute__(self, attr): 202 if attr == 'tree': 203 return FuncNode(self.filedata) 204 return super(FuncData, self).__getattribute__(attr) 205 206 def displayx(self, output=None, maxcount=None): 207 if output is None: 208 import sys 209 output = sys.stdout 210 211 items = [ (val,key) for key,val in self.iteritems() ] 212 items.sort(reverse=True) 213 for val,key in items: 214 if maxcount is not None: 215 if maxcount == 0: 216 return 217 maxcount -= 1 218 219 percent = val * 100.0 / self.total 220 print >>output, '%-30s %8s' % (key, '%3.2f%%' % percent) 221 222class Profile(object): 223 # This list controls the order of values in stacked bar data output 224 default_categories = [ 'interrupt', 225 'driver', 226 'stack', 227 'buffer', 228 'copy', 229 'syscall', 230 'user', 231 'other', 232 'idle'] 233 234 def __init__(self, datatype, categorize=None): 235 categories = Profile.default_categories 236 237 self.datatype = datatype 238 self.categorize = categorize 239 self.data = {} 240 self.categories = categories[:] 241 self.rcategories = categories[:] 242 self.rcategories.reverse() 243 self.cpu = 0 244 245 # Read in files 246 def inputdir(self, directory): 247 import os, os.path, re 248 from os.path import expanduser, join as joinpath 249 250 directory = expanduser(directory) 251 label_ex = re.compile(r'profile\.(.*).dat') 252 for root,dirs,files in os.walk(directory): 253 for name in files: 254 match = label_ex.match(name) 255 if not match: 256 continue 257 258 filename = joinpath(root, name) 259 prefix = os.path.commonprefix([root, directory]) 260 dirname = root[len(prefix)+1:] 261 data = self.datatype(filename, self.categorize) 262 self.setdata(dirname, match.group(1), data) 263 264 def setdata(self, run, cpu, data): 265 if run not in self.data: 266 self.data[run] = {} 267 268 if cpu in self.data[run]: 269 raise AttributeError, \ 270 'data already stored for run %s and cpu %s' % (run, cpu) 271 272 self.data[run][cpu] = data 273 274 def getdata(self, run, cpu): 275 try: 276 return self.data[run][cpu] 277 except KeyError: 278 print run, cpu 279 return None 280 281 def alldata(self): 282 for run,cpus in self.data.iteritems(): 283 for cpu,data in cpus.iteritems(): 284 yield run,cpu,data 285 286 def get(self, job, stat, system=None): 287 if system is None and hasattr('system', job): 288 system = job.system 289 290 if system is None: 291 raise AttributeError, 'The job must have a system set' 292 293 cpu = '%s.run%d' % (system, self.cpu) 294 295 data = self.getdata(str(job), cpu) 296 if not data: 297 return None 298 299 values = [] 300 for category in self.categories: 301 val = float(data.get(category, 0.0)) 302 if val < 0.0: 303 raise ValueError, 'value is %f' % val 304 values.append(val) 305 total = sum(values) 306 return [ v / total * 100.0 for v in values ] 307 308 def dump(self): 309 for run,cpu,data in self.alldata(): 310 print 'run %s, cpu %s' % (run, cpu) 311 data.dump() 312 print 313 314 def write_dot(self, threshold, jobfile=None, jobs=None): 315 import pydot 316 317 if jobs is None: 318 jobs = [ job for job in jobfile.jobs() ] 319 320 for job in jobs: 321 cpu = '%s.run%d' % (job.system, self.cpu) 322 symbols = self.getdata(job.name, cpu) 323 if not symbols: 324 continue 325 326 dot = pydot.Dot() 327 symbols.tree.dot(dot, threshold=threshold) 328 dot.write(symbols.filename[:-3] + 'dot') 329 330 def write_txt(self, jobfile=None, jobs=None, limit=None): 331 if jobs is None: 332 jobs = [ job for job in jobfile.jobs() ] 333 334 for job in jobs: 335 cpu = '%s.run%d' % (job.system, self.cpu) 336 symbols = self.getdata(job.name, cpu) 337 if not symbols: 338 continue 339 340 output = file(symbols.filename[:-3] + 'txt', 'w') 341 symbols.display(output, limit) 342 343 def display(self, jobfile=None, jobs=None, limit=None): 344 if jobs is None: 345 jobs = [ job for job in jobfile.jobs() ] 346 347 maxsymlen = 0 348 349 thejobs = [] 350 for job in jobs: 351 cpu = '%s.run%d' % (job.system, self.cpu) 352 symbols = self.getdata(job.name, cpu) 353 if symbols: 354 thejobs.append(job) 355 maxsymlen = max(maxsymlen, symbols.maxsymlen) 356 357 for job in thejobs: 358 cpu = '%s.run%d' % (job.system, self.cpu) 359 symbols = self.getdata(job.name, cpu) 360 print job.name 361 symbols.display(limit=limit, maxsymlen=maxsymlen) 362 print 363 364 365from categories import func_categorize, pc_categorize 366class PCProfile(Profile): 367 def __init__(self, categorize=pc_categorize): 368 super(PCProfile, self).__init__(PCData, categorize) 369 370 371class FuncProfile(Profile): 372 def __init__(self, categorize=func_categorize): 373 super(FuncProfile, self).__init__(FuncData, categorize) 374 375def usage(exitcode = None): 376 print '''\ 377Usage: %s [-bc] [-g <dir>] [-j <jobfile>] [-n <num>] 378 379 -c groups symbols into categories 380 -b dumps data for bar charts 381 -d generate dot output 382 -g <d> draw graphs and send output to <d> 383 -j <jobfile> specify a different jobfile (default is Test.py) 384 -n <n> selects number of top symbols to print (default 5) 385''' % sys.argv[0] 386 387 if exitcode is not None: 388 sys.exit(exitcode) 389 390if __name__ == '__main__': 391 import getopt, re, sys 392 from os.path import expanduser 393 from output import StatOutput 394 395 # default option values 396 numsyms = 10 397 graph = None 398 cpus = [ 0 ] 399 categorize = False 400 showidle = True 401 funcdata = True 402 jobfilename = 'Test.py' 403 dodot = False 404 dotfile = None 405 textout = False 406 threshold = 0.01 407 inputfile = None 408 409 try: 410 opts, args = getopt.getopt(sys.argv[1:], 'C:cdD:f:g:ij:n:pT:t') 411 except getopt.GetoptError: 412 usage(2) 413 414 for o,a in opts: 415 if o == '-C': 416 cpus = [ int(x) for x in a.split(',') ] 417 elif o == '-c': 418 categorize = True 419 elif o == '-D': 420 dotfile = a 421 elif o == '-d': 422 dodot = True 423 elif o == '-f': 424 inputfile = expanduser(a) 425 elif o == '-g': 426 graph = a 427 elif o == '-i': 428 showidle = False 429 elif o == '-j': 430 jobfilename = a 431 elif o == '-n': 432 numsyms = int(a) 433 elif o == '-p': 434 funcdata = False 435 elif o == '-T': 436 threshold = float(a) 437 elif o == '-t': 438 textout = True 439 440 if args: 441 print "'%s'" % args, len(args) 442 usage(1) 443 444 if inputfile: 445 catfunc = None 446 if categorize: 447 catfunc = func_categorize 448 data = FuncData(inputfile, categorize=catfunc) 449 450 if dodot: 451 import pydot 452 dot = pydot.Dot() 453 data.tree.dot(dot, threshold=threshold) 454 #dot.orientation = 'landscape' 455 #dot.ranksep='equally' 456 #dot.rank='samerank' 457 dot.write(dotfile, format='png') 458 else: 459 data.display(limit=numsyms) 460 461 else: 462 from jobfile import JobFile 463 jobfile = JobFile(jobfilename) 464 465 if funcdata: 466 profile = FuncProfile() 467 else: 468 profile = PCProfile() 469 470 if not categorize: 471 profile.categorize = None 472 profile.inputdir(jobfile.rootdir) 473 474 if graph: 475 for cpu in cpus: 476 profile.cpu = cpu 477 if funcdata: 478 name = 'funcstacks%d' % cpu 479 else: 480 name = 'stacks%d' % cpu 481 output = StatOutput(jobfile, info=profile) 482 output.xlabel = 'System Configuration' 483 output.ylabel = '% CPU utilization' 484 output.stat = name 485 output.graph(name, graph) 486 487 if dodot: 488 for cpu in cpus: 489 profile.cpu = cpu 490 profile.write_dot(jobfile=jobfile, threshold=threshold) 491 492 if textout: 493 for cpu in cpus: 494 profile.cpu = cpu 495 profile.write_txt(jobfile=jobfile) 496 497 if not graph and not textout and not dodot: 498 for cpu in cpus: 499 if not categorize: 500 profile.categorize = None 501 profile.cpu = cpu 502 profile.display(jobfile=jobfile, limit=numsyms) 503