profile.py revision 2004:cdf8e86d1983
1# Copyright (c) 2005 The Regents of The University of Michigan 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer; 8# redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution; 11# neither the name of the copyright holders nor the names of its 12# contributors may be used to endorse or promote products derived from 13# this software without specific prior written permission. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27from orderdict import orderdict 28import output 29 30class RunData(dict): 31 def __init__(self, filename=None): 32 self.filename = filename 33 34 def __getattr__(self, attr): 35 if attr == 'total': 36 total = 0.0 37 for value in self.itervalues(): 38 total += value 39 return total 40 if attr == 'maxsymlen': 41 return max([ len(sym) for sym in self.iterkeys() ]) 42 43 def display(self, output=None, limit=None, maxsymlen=None): 44 if not output: 45 import sys 46 output = sys.stdout 47 elif isinstance(output, str): 48 output = file(output, 'w') 49 50 total = float(self.total) 51 52 # swap (string,count) order so we can sort on count 53 symbols = [ (count,name) for name,count in self.iteritems() ] 54 symbols.sort(reverse=True) 55 if limit is not None: 56 symbols = symbols[:limit] 57 58 if not maxsymlen: 59 maxsymlen = self.maxsymlen 60 61 symbolf = "%-" + str(maxsymlen + 1) + "s %.2f%%" 62 for number,name in symbols: 63 print >>output, symbolf % (name, 100.0 * (float(number) / total)) 64 65 66 67class PCData(RunData): 68 def __init__(self, filename=None, categorize=None, showidle=True): 69 super(PCData, self).__init__(self, filename) 70 if filename is None: 71 return 72 73 fd = file(filename) 74 75 for line in fd: 76 if line.strip() == '>>>PC data': 77 break 78 79 for line in fd: 80 if line.startswith('>>>'): 81 break 82 83 (symbol, count) = line.split() 84 if symbol == "0x0": 85 continue 86 count = int(count) 87 88 if categorize is not None: 89 category = categorize(symbol) 90 if category is None: 91 category = 'other' 92 elif category == 'idle' and not showidle: 93 continue 94 95 self[category] = count 96 97 fd.close() 98 99class FuncNode(object): 100 def __new__(cls, filename = None): 101 if filename is None: 102 return super(FuncNode, cls).__new__(cls) 103 104 fd = file(filename, 'r') 105 fditer = iter(fd) 106 nodes = {} 107 for line in fditer: 108 if line.strip() == '>>>function data': 109 break 110 111 for line in fditer: 112 if line.startswith('>>>'): 113 break 114 115 data = line.split() 116 node_id = int(data[0], 16) 117 node = FuncNode() 118 node.symbol = data[1] 119 node.count = int(data[2]) 120 node.children = [ int(child, 16) for child in data[3:] ] 121 nodes[node_id] = node 122 123 for node in nodes.itervalues(): 124 children = [] 125 for cid in node.children: 126 child = nodes[cid] 127 children.append(child) 128 child.parent = node 129 node.children = tuple(children) 130 if not nodes: 131 print filename 132 print nodes 133 return nodes[0] 134 135 def __init__(self, filename=None): 136 pass 137 138 def total(self): 139 total = self.count 140 for child in self.children: 141 total += child.total() 142 143 return total 144 145 def aggregate(self, dict, categorize, incategory): 146 category = None 147 if categorize: 148 category = categorize(self.symbol) 149 150 total = self.count 151 for child in self.children: 152 total += child.aggregate(dict, categorize, category or incategory) 153 154 if category: 155 dict[category] = dict.get(category, 0) + total 156 return 0 157 elif not incategory: 158 dict[self.symbol] = dict.get(self.symbol, 0) + total 159 160 return total 161 162 def dump(self): 163 kids = [ child.symbol for child in self.children] 164 print '%s %d <%s>' % (self.symbol, self.count, ', '.join(kids)) 165 for child in self.children: 166 child.dump() 167 168 def _dot(self, dot, threshold, categorize, total): 169 from pydot import Dot, Edge, Node 170 self.dot_node = None 171 172 value = self.total() * 100.0 / total 173 if value < threshold: 174 return 175 if categorize: 176 category = categorize(self.symbol) 177 if category and category != 'other': 178 return 179 label = '%s %.2f%%' % (self.symbol, value) 180 self.dot_node = Node(self, label=label) 181 dot.add_node(self.dot_node) 182 183 for child in self.children: 184 child._dot(dot, threshold, categorize, total) 185 if child.dot_node is not None: 186 dot.add_edge(Edge(self, child)) 187 188 def _cleandot(self): 189 for child in self.children: 190 child._cleandot() 191 self.dot_node = None 192 del self.__dict__['dot_node'] 193 194 def dot(self, dot, threshold=0.1, categorize=None): 195 self._dot(dot, threshold, categorize, self.total()) 196 self._cleandot() 197 198class FuncData(RunData): 199 def __init__(self, filename, categorize=None): 200 super(FuncData, self).__init__(filename) 201 self.tree = FuncNode(filename) 202 self.tree.aggregate(self, categorize, incategory=False) 203 self.total = self.tree.total() 204 205 def displayx(self, output=None, maxcount=None): 206 if output is None: 207 import sys 208 output = sys.stdout 209 210 items = [ (val,key) for key,val in self.iteritems() ] 211 items.sort(reverse=True) 212 for val,key in items: 213 if maxcount is not None: 214 if maxcount == 0: 215 return 216 maxcount -= 1 217 218 percent = val * 100.0 / self.total 219 print >>output, '%-30s %8s' % (key, '%3.2f%%' % percent) 220 221class Profile(object): 222 # This list controls the order of values in stacked bar data output 223 default_categories = [ 'interrupt', 224 'driver', 225 'stack', 226 'buffer', 227 'copy', 228 'syscall', 229 'user', 230 'other', 231 'idle'] 232 233 def __init__(self, datatype, categorize=None): 234 categories = Profile.default_categories 235 236 self.datatype = datatype 237 self.categorize = categorize 238 self.data = {} 239 self.categories = categories[:] 240 self.rcategories = categories[:] 241 self.rcategories.reverse() 242 self.cpu = 0 243 244 # Read in files 245 def inputdir(self, directory): 246 import os, os.path, re 247 from os.path import expanduser, join as joinpath 248 249 directory = expanduser(directory) 250 label_ex = re.compile(r'profile\.(.*).dat') 251 for root,dirs,files in os.walk(directory): 252 for name in files: 253 match = label_ex.match(name) 254 if not match: 255 continue 256 257 filename = joinpath(root, name) 258 prefix = os.path.commonprefix([root, directory]) 259 dirname = root[len(prefix)+1:] 260 data = self.datatype(filename, self.categorize) 261 self.setdata(dirname, match.group(1), data) 262 263 def setdata(self, run, cpu, data): 264 if run not in self.data: 265 self.data[run] = {} 266 267 if cpu in self.data[run]: 268 raise AttributeError, \ 269 'data already stored for run %s and cpu %s' % (run, cpu) 270 271 self.data[run][cpu] = data 272 273 def getdata(self, run, cpu): 274 try: 275 return self.data[run][cpu] 276 except KeyError: 277 return None 278 279 def alldata(self): 280 for run,cpus in self.data.iteritems(): 281 for cpu,data in cpus.iteritems(): 282 yield run,cpu,data 283 284 def get(self, job, stat): 285 if job.system is None: 286 raise AttributeError, 'The job must have a system set' 287 288 run = job.name 289 cpu = '%s.run%d' % (job.system, self.cpu) 290 data = self.getdata(run, cpu) 291 if not data: 292 return [ 0.0 for c in self.categories ] 293 294 values = [] 295 for category in self.categories: 296 values.append(data.get(category, 0.0)) 297 return values 298 299 def dump(self): 300 for run,cpu,data in self.alldata(): 301 print 'run %s, cpu %s' % (run, cpu) 302 data.dump() 303 print 304 305 def write_dot(self, threshold, jobfile=None, jobs=None): 306 import pydot 307 308 if jobs is None: 309 jobs = [ job for job in jobfile.jobs() ] 310 311 for job in jobs: 312 cpu = '%s.run%d' % (job.system, self.cpu) 313 symbols = self.getdata(job.name, cpu) 314 if not symbols: 315 continue 316 317 dot = pydot.Dot() 318 symbols.tree.dot(dot, threshold=threshold) 319 dot.write(symbols.filename[:-3] + 'dot') 320 321 def write_txt(self, jobfile=None, jobs=None, limit=None): 322 if jobs is None: 323 jobs = [ job for job in jobfile.jobs() ] 324 325 for job in jobs: 326 cpu = '%s.run%d' % (job.system, self.cpu) 327 symbols = self.getdata(job.name, cpu) 328 if not symbols: 329 continue 330 331 output = file(symbols.filename[:-3] + 'txt', 'w') 332 symbols.display(output, limit) 333 334 def display(self, jobfile=None, jobs=None, limit=None): 335 if jobs is None: 336 jobs = [ job for job in jobfile.jobs() ] 337 338 maxsymlen = 0 339 340 thejobs = [] 341 for job in jobs: 342 cpu = '%s.run%d' % (job.system, self.cpu) 343 symbols = self.getdata(job.name, cpu) 344 if symbols: 345 thejobs.append(job) 346 maxsymlen = max(maxsymlen, symbols.maxsymlen) 347 348 for job in thejobs: 349 cpu = '%s.run%d' % (job.system, self.cpu) 350 symbols = self.getdata(job.name, cpu) 351 print job.name 352 symbols.display(limit=limit, maxsymlen=maxsymlen) 353 print 354 355 356from categories import func_categorize, pc_categorize 357class PCProfile(Profile): 358 def __init__(self, categorize=pc_categorize): 359 super(PCProfile, self).__init__(PCData, categorize) 360 361 362class FuncProfile(Profile): 363 def __init__(self, categorize=func_categorize): 364 super(FuncProfile, self).__init__(FuncData, categorize) 365 366def usage(exitcode = None): 367 print '''\ 368Usage: %s [-bc] [-g <dir>] [-j <jobfile>] [-n <num>] 369 370 -c groups symbols into categories 371 -b dumps data for bar charts 372 -d generate dot output 373 -g <d> draw graphs and send output to <d> 374 -j <jobfile> specify a different jobfile (default is Test.py) 375 -n <n> selects number of top symbols to print (default 5) 376''' % sys.argv[0] 377 378 if exitcode is not None: 379 sys.exit(exitcode) 380 381if __name__ == '__main__': 382 import getopt, re, sys 383 from os.path import expanduser 384 from output import StatOutput 385 from jobfile import JobFile 386 387 # default option values 388 numsyms = 10 389 graph = None 390 cpus = [ 0 ] 391 categorize = False 392 showidle = True 393 funcdata = True 394 jobfilename = 'Test.py' 395 dodot = False 396 dotformat = 'raw' 397 textout = False 398 threshold = 0.01 399 inputfile = None 400 401 try: 402 opts, args = getopt.getopt(sys.argv[1:], 'C:cdD:f:g:ij:n:pT:t') 403 except getopt.GetoptError: 404 usage(2) 405 406 for o,a in opts: 407 if o == '-C': 408 cpus = [ int(x) for x in a.split(',') ] 409 elif o == '-c': 410 categorize = True 411 elif o == '-D': 412 dotformat = a 413 elif o == '-d': 414 dodot = True 415 elif o == '-f': 416 inputfile = expanduser(a) 417 elif o == '-g': 418 graph = a 419 elif o == '-i': 420 showidle = False 421 elif o == '-j': 422 jobfilename = a 423 elif o == '-n': 424 numsyms = int(a) 425 elif o == '-p': 426 funcdata = False 427 elif o == '-T': 428 threshold = float(a) 429 elif o == '-t': 430 textout = True 431 432 if args: 433 print "'%s'" % args, len(args) 434 usage(1) 435 436 if inputfile: 437 data = FuncData(inputfile) 438 439 if dodot: 440 import pydot 441 dot = pydot.Dot() 442 data.dot(dot, threshold=threshold) 443 #dot.orientation = 'landscape' 444 #dot.ranksep='equally' 445 #dot.rank='samerank' 446 dot.write(dotfile, format=dotformat) 447 else: 448 data.display(limit=numsyms) 449 450 else: 451 jobfile = JobFile(jobfilename) 452 453 if funcdata: 454 profile = FuncProfile() 455 else: 456 profile = PCProfile() 457 458 if not categorize: 459 profile.categorize = None 460 profile.inputdir(jobfile.rootdir) 461 462 if graph: 463 for cpu in cpus: 464 profile.cpu = cpu 465 if funcdata: 466 name = 'funcstacks%d' % cpu 467 else: 468 name = 'stacks%d' % cpu 469 output = StatOutput(name, jobfile, info=profile) 470 output.graph(graph) 471 472 if dodot: 473 for cpu in cpus: 474 profile.cpu = cpu 475 profile.write_dot(jobfile=jobfile, threshold=threshold) 476 477 if textout: 478 for cpu in cpus: 479 profile.cpu = cpu 480 profile.write_txt(jobfile=jobfile) 481 482 if not graph and not textout and not dodot: 483 for cpu in cpus: 484 if not categorize: 485 profile.categorize = None 486 profile.cpu = cpu 487 profile.display(jobfile=jobfile, limit=numsyms) 488