profile.py revision 2665:a124942bacb8
1# Copyright (c) 2005 The Regents of The University of Michigan 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer; 8# redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution; 11# neither the name of the copyright holders nor the names of its 12# contributors may be used to endorse or promote products derived from 13# this software without specific prior written permission. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26# 27# Authors: Nathan Binkert 28 29from orderdict import orderdict 30import output 31 32class FileData(dict): 33 def __init__(self, filename): 34 self.filename = filename 35 fd = file(filename) 36 current = [] 37 for line in fd: 38 line = line.strip() 39 if line.startswith('>>>'): 40 current = [] 41 self[line[3:]] = current 42 else: 43 current.append(line) 44 fd.close() 45 46class RunData(dict): 47 def __init__(self, filename): 48 self.filename = filename 49 50 def __getattribute__(self, attr): 51 if attr == 'total': 52 total = 0.0 53 for value in self.itervalues(): 54 total += value 55 return total 56 57 if attr == 'filedata': 58 return FileData(self.filename) 59 60 if attr == 'maxsymlen': 61 return max([ len(sym) for sym in self.iterkeys() ]) 62 63 return super(RunData, self).__getattribute__(attr) 64 65 def display(self, output=None, limit=None, maxsymlen=None): 66 if not output: 67 import sys 68 output = sys.stdout 69 elif isinstance(output, str): 70 output = file(output, 'w') 71 72 total = float(self.total) 73 74 # swap (string,count) order so we can sort on count 75 symbols = [ (count,name) for name,count in self.iteritems() ] 76 symbols.sort(reverse=True) 77 if limit is not None: 78 symbols = symbols[:limit] 79 80 if not maxsymlen: 81 maxsymlen = self.maxsymlen 82 83 symbolf = "%-" + str(maxsymlen + 1) + "s %.2f%%" 84 for number,name in symbols: 85 print >>output, symbolf % (name, 100.0 * (float(number) / total)) 86 87class PCData(RunData): 88 def __init__(self, filename=None, categorize=None, showidle=True): 89 super(PCData, self).__init__(self, filename) 90 91 filedata = self.filedata['PC data'] 92 for line in filedata: 93 (symbol, count) = line.split() 94 if symbol == "0x0": 95 continue 96 count = int(count) 97 98 if categorize is not None: 99 category = categorize(symbol) 100 if category is None: 101 category = 'other' 102 elif category == 'idle' and not showidle: 103 continue 104 105 self[category] = count 106 107class FuncNode(object): 108 def __new__(cls, filedata=None): 109 if filedata is None: 110 return super(FuncNode, cls).__new__(cls) 111 112 nodes = {} 113 for line in filedata['function data']: 114 data = line.split(' ') 115 node_id = long(data[0], 16) 116 node = FuncNode() 117 node.symbol = data[1] 118 if node.symbol == '': 119 node.symbol = 'unknown' 120 node.count = long(data[2]) 121 node.children = [ long(child, 16) for child in data[3:] ] 122 nodes[node_id] = node 123 124 for node in nodes.itervalues(): 125 children = [] 126 for cid in node.children: 127 child = nodes[cid] 128 children.append(child) 129 child.parent = node 130 node.children = tuple(children) 131 if not nodes: 132 print filedata.filename 133 print nodes 134 return nodes[0] 135 136 def total(self): 137 total = self.count 138 for child in self.children: 139 total += child.total() 140 141 return total 142 143 def aggregate(self, dict, categorize, incategory): 144 category = None 145 if categorize: 146 category = categorize(self.symbol) 147 148 total = self.count 149 for child in self.children: 150 total += child.aggregate(dict, categorize, category or incategory) 151 152 if category: 153 dict[category] = dict.get(category, 0) + total 154 return 0 155 elif not incategory: 156 dict[self.symbol] = dict.get(self.symbol, 0) + total 157 158 return total 159 160 def dump(self): 161 kids = [ child.symbol for child in self.children] 162 print '%s %d <%s>' % (self.symbol, self.count, ', '.join(kids)) 163 for child in self.children: 164 child.dump() 165 166 def _dot(self, dot, threshold, categorize, total): 167 from pydot import Dot, Edge, Node 168 self.dot_node = None 169 170 value = self.total() * 100.0 / total 171 if value < threshold: 172 return 173 if categorize: 174 category = categorize(self.symbol) 175 if category and category != 'other': 176 return 177 label = '%s %.2f%%' % (self.symbol, value) 178 self.dot_node = Node(self, label=label) 179 dot.add_node(self.dot_node) 180 181 for child in self.children: 182 child._dot(dot, threshold, categorize, total) 183 if child.dot_node is not None: 184 dot.add_edge(Edge(self, child)) 185 186 def _cleandot(self): 187 for child in self.children: 188 child._cleandot() 189 self.dot_node = None 190 del self.__dict__['dot_node'] 191 192 def dot(self, dot, threshold=0.1, categorize=None): 193 self._dot(dot, threshold, categorize, self.total()) 194 self._cleandot() 195 196class FuncData(RunData): 197 def __init__(self, filename, categorize=None): 198 super(FuncData, self).__init__(filename) 199 tree = self.tree 200 tree.aggregate(self, categorize, incategory=False) 201 self.total = tree.total() 202 203 def __getattribute__(self, attr): 204 if attr == 'tree': 205 return FuncNode(self.filedata) 206 return super(FuncData, self).__getattribute__(attr) 207 208 def displayx(self, output=None, maxcount=None): 209 if output is None: 210 import sys 211 output = sys.stdout 212 213 items = [ (val,key) for key,val in self.iteritems() ] 214 items.sort(reverse=True) 215 for val,key in items: 216 if maxcount is not None: 217 if maxcount == 0: 218 return 219 maxcount -= 1 220 221 percent = val * 100.0 / self.total 222 print >>output, '%-30s %8s' % (key, '%3.2f%%' % percent) 223 224class Profile(object): 225 # This list controls the order of values in stacked bar data output 226 default_categories = [ 'interrupt', 227 'driver', 228 'stack', 229 'buffer', 230 'copy', 231 'syscall', 232 'user', 233 'other', 234 'idle'] 235 236 def __init__(self, datatype, categorize=None): 237 categories = Profile.default_categories 238 239 self.datatype = datatype 240 self.categorize = categorize 241 self.data = {} 242 self.categories = categories[:] 243 self.rcategories = categories[:] 244 self.rcategories.reverse() 245 self.cpu = 0 246 247 # Read in files 248 def inputdir(self, directory): 249 import os, os.path, re 250 from os.path import expanduser, join as joinpath 251 252 directory = expanduser(directory) 253 label_ex = re.compile(r'profile\.(.*).dat') 254 for root,dirs,files in os.walk(directory): 255 for name in files: 256 match = label_ex.match(name) 257 if not match: 258 continue 259 260 filename = joinpath(root, name) 261 prefix = os.path.commonprefix([root, directory]) 262 dirname = root[len(prefix)+1:] 263 data = self.datatype(filename, self.categorize) 264 self.setdata(dirname, match.group(1), data) 265 266 def setdata(self, run, cpu, data): 267 if run not in self.data: 268 self.data[run] = {} 269 270 if cpu in self.data[run]: 271 raise AttributeError, \ 272 'data already stored for run %s and cpu %s' % (run, cpu) 273 274 self.data[run][cpu] = data 275 276 def getdata(self, run, cpu): 277 try: 278 return self.data[run][cpu] 279 except KeyError: 280 print run, cpu 281 return None 282 283 def alldata(self): 284 for run,cpus in self.data.iteritems(): 285 for cpu,data in cpus.iteritems(): 286 yield run,cpu,data 287 288 def get(self, job, stat, system=None): 289 if system is None and hasattr('system', job): 290 system = job.system 291 292 if system is None: 293 raise AttributeError, 'The job must have a system set' 294 295 cpu = '%s.run%d' % (system, self.cpu) 296 297 data = self.getdata(str(job), cpu) 298 if not data: 299 return None 300 301 values = [] 302 for category in self.categories: 303 val = float(data.get(category, 0.0)) 304 if val < 0.0: 305 raise ValueError, 'value is %f' % val 306 values.append(val) 307 total = sum(values) 308 return [ v / total * 100.0 for v in values ] 309 310 def dump(self): 311 for run,cpu,data in self.alldata(): 312 print 'run %s, cpu %s' % (run, cpu) 313 data.dump() 314 print 315 316 def write_dot(self, threshold, jobfile=None, jobs=None): 317 import pydot 318 319 if jobs is None: 320 jobs = [ job for job in jobfile.jobs() ] 321 322 for job in jobs: 323 cpu = '%s.run%d' % (job.system, self.cpu) 324 symbols = self.getdata(job.name, cpu) 325 if not symbols: 326 continue 327 328 dot = pydot.Dot() 329 symbols.tree.dot(dot, threshold=threshold) 330 dot.write(symbols.filename[:-3] + 'dot') 331 332 def write_txt(self, jobfile=None, jobs=None, limit=None): 333 if jobs is None: 334 jobs = [ job for job in jobfile.jobs() ] 335 336 for job in jobs: 337 cpu = '%s.run%d' % (job.system, self.cpu) 338 symbols = self.getdata(job.name, cpu) 339 if not symbols: 340 continue 341 342 output = file(symbols.filename[:-3] + 'txt', 'w') 343 symbols.display(output, limit) 344 345 def display(self, jobfile=None, jobs=None, limit=None): 346 if jobs is None: 347 jobs = [ job for job in jobfile.jobs() ] 348 349 maxsymlen = 0 350 351 thejobs = [] 352 for job in jobs: 353 cpu = '%s.run%d' % (job.system, self.cpu) 354 symbols = self.getdata(job.name, cpu) 355 if symbols: 356 thejobs.append(job) 357 maxsymlen = max(maxsymlen, symbols.maxsymlen) 358 359 for job in thejobs: 360 cpu = '%s.run%d' % (job.system, self.cpu) 361 symbols = self.getdata(job.name, cpu) 362 print job.name 363 symbols.display(limit=limit, maxsymlen=maxsymlen) 364 print 365 366 367from categories import func_categorize, pc_categorize 368class PCProfile(Profile): 369 def __init__(self, categorize=pc_categorize): 370 super(PCProfile, self).__init__(PCData, categorize) 371 372 373class FuncProfile(Profile): 374 def __init__(self, categorize=func_categorize): 375 super(FuncProfile, self).__init__(FuncData, categorize) 376 377def usage(exitcode = None): 378 print '''\ 379Usage: %s [-bc] [-g <dir>] [-j <jobfile>] [-n <num>] 380 381 -c groups symbols into categories 382 -b dumps data for bar charts 383 -d generate dot output 384 -g <d> draw graphs and send output to <d> 385 -j <jobfile> specify a different jobfile (default is Test.py) 386 -n <n> selects number of top symbols to print (default 5) 387''' % sys.argv[0] 388 389 if exitcode is not None: 390 sys.exit(exitcode) 391 392if __name__ == '__main__': 393 import getopt, re, sys 394 from os.path import expanduser 395 from output import StatOutput 396 397 # default option values 398 numsyms = 10 399 graph = None 400 cpus = [ 0 ] 401 categorize = False 402 showidle = True 403 funcdata = True 404 jobfilename = 'Test.py' 405 dodot = False 406 dotfile = None 407 textout = False 408 threshold = 0.01 409 inputfile = None 410 411 try: 412 opts, args = getopt.getopt(sys.argv[1:], 'C:cdD:f:g:ij:n:pT:t') 413 except getopt.GetoptError: 414 usage(2) 415 416 for o,a in opts: 417 if o == '-C': 418 cpus = [ int(x) for x in a.split(',') ] 419 elif o == '-c': 420 categorize = True 421 elif o == '-D': 422 dotfile = a 423 elif o == '-d': 424 dodot = True 425 elif o == '-f': 426 inputfile = expanduser(a) 427 elif o == '-g': 428 graph = a 429 elif o == '-i': 430 showidle = False 431 elif o == '-j': 432 jobfilename = a 433 elif o == '-n': 434 numsyms = int(a) 435 elif o == '-p': 436 funcdata = False 437 elif o == '-T': 438 threshold = float(a) 439 elif o == '-t': 440 textout = True 441 442 if args: 443 print "'%s'" % args, len(args) 444 usage(1) 445 446 if inputfile: 447 catfunc = None 448 if categorize: 449 catfunc = func_categorize 450 data = FuncData(inputfile, categorize=catfunc) 451 452 if dodot: 453 import pydot 454 dot = pydot.Dot() 455 data.tree.dot(dot, threshold=threshold) 456 #dot.orientation = 'landscape' 457 #dot.ranksep='equally' 458 #dot.rank='samerank' 459 dot.write(dotfile, format='png') 460 else: 461 data.display(limit=numsyms) 462 463 else: 464 from jobfile import JobFile 465 jobfile = JobFile(jobfilename) 466 467 if funcdata: 468 profile = FuncProfile() 469 else: 470 profile = PCProfile() 471 472 if not categorize: 473 profile.categorize = None 474 profile.inputdir(jobfile.rootdir) 475 476 if graph: 477 for cpu in cpus: 478 profile.cpu = cpu 479 if funcdata: 480 name = 'funcstacks%d' % cpu 481 else: 482 name = 'stacks%d' % cpu 483 output = StatOutput(jobfile, info=profile) 484 output.xlabel = 'System Configuration' 485 output.ylabel = '% CPU utilization' 486 output.stat = name 487 output.graph(name, graph) 488 489 if dodot: 490 for cpu in cpus: 491 profile.cpu = cpu 492 profile.write_dot(jobfile=jobfile, threshold=threshold) 493 494 if textout: 495 for cpu in cpus: 496 profile.cpu = cpu 497 profile.write_txt(jobfile=jobfile) 498 499 if not graph and not textout and not dodot: 500 for cpu in cpus: 501 if not categorize: 502 profile.categorize = None 503 profile.cpu = cpu 504 profile.display(jobfile=jobfile, limit=numsyms) 505