profile.py revision 2006:3ca085495c69
1# Copyright (c) 2005 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27from orderdict import orderdict
28import output
29
30class FileData(dict):
31    def __init__(self, filename):
32        self.filename = filename
33        fd = file(filename)
34        current = []
35        for line in fd:
36            line = line.strip()
37            if line.startswith('>>>'):
38                current = []
39                self[line[3:]] = current
40            else:
41                current.append(line)
42        fd.close()
43
44class RunData(dict):
45    def __init__(self, filename):
46        self.filename = filename
47
48    def __getattribute__(self, attr):
49        if attr == 'total':
50            total = 0.0
51            for value in self.itervalues():
52                total += value
53            return total
54
55        if attr == 'filedata':
56            return FileData(self.filename)
57
58        if attr == 'maxsymlen':
59            return max([ len(sym) for sym in self.iterkeys() ])
60
61        return super(RunData, self).__getattribute__(attr)
62
63    def display(self, output=None, limit=None, maxsymlen=None):
64        if not output:
65            import sys
66            output = sys.stdout
67        elif isinstance(output, str):
68            output = file(output, 'w')
69
70        total = float(self.total)
71
72        # swap (string,count) order so we can sort on count
73        symbols = [ (count,name) for name,count in self.iteritems() ]
74        symbols.sort(reverse=True)
75        if limit is not None:
76            symbols = symbols[:limit]
77
78        if not maxsymlen:
79            maxsymlen = self.maxsymlen
80
81        symbolf = "%-" + str(maxsymlen + 1) + "s %.2f%%"
82        for number,name in symbols:
83            print >>output, symbolf % (name, 100.0 * (float(number) / total))
84
85class PCData(RunData):
86    def __init__(self, filename=None, categorize=None, showidle=True):
87        super(PCData, self).__init__(self, filename)
88
89        filedata = self.filedata['PC data']
90        for line in filedata:
91            (symbol, count) = line.split()
92            if symbol == "0x0":
93                continue
94            count = int(count)
95
96            if categorize is not None:
97                category = categorize(symbol)
98                if category is None:
99                    category = 'other'
100                elif category == 'idle' and not showidle:
101                    continue
102
103                self[category] = count
104
105class FuncNode(object):
106    def __new__(cls, filedata=None):
107        if filedata is None:
108            return super(FuncNode, cls).__new__(cls)
109
110        nodes = {}
111        for line in filedata['function data']:
112            data = line.split(' ')
113            node_id = long(data[0], 16)
114            node = FuncNode()
115            node.symbol = data[1]
116            if node.symbol == '':
117                node.symbol = 'unknown'
118            node.count = long(data[2])
119            node.children = [ long(child, 16) for child in data[3:] ]
120            nodes[node_id] = node
121
122        for node in nodes.itervalues():
123            children = []
124            for cid in node.children:
125                child = nodes[cid]
126                children.append(child)
127                child.parent = node
128            node.children = tuple(children)
129        if not nodes:
130            print filedata.filename
131            print nodes
132        return nodes[0]
133
134    def total(self):
135        total = self.count
136        for child in self.children:
137            total += child.total()
138
139        return total
140
141    def aggregate(self, dict, categorize, incategory):
142        category = None
143        if categorize:
144            category = categorize(self.symbol)
145
146        total = self.count
147        for child in self.children:
148            total += child.aggregate(dict, categorize, category or incategory)
149
150        if category:
151            dict[category] = dict.get(category, 0) + total
152            return 0
153        elif not incategory:
154            dict[self.symbol] = dict.get(self.symbol, 0) + total
155
156        return total
157
158    def dump(self):
159        kids = [ child.symbol for child in self.children]
160        print '%s %d <%s>' % (self.symbol, self.count, ', '.join(kids))
161        for child in self.children:
162            child.dump()
163
164    def _dot(self, dot, threshold, categorize, total):
165        from pydot import Dot, Edge, Node
166        self.dot_node = None
167
168        value = self.total() * 100.0 / total
169        if value < threshold:
170            return
171        if categorize:
172            category = categorize(self.symbol)
173            if category and category != 'other':
174                return
175        label = '%s %.2f%%' % (self.symbol, value)
176        self.dot_node = Node(self, label=label)
177        dot.add_node(self.dot_node)
178
179        for child in self.children:
180            child._dot(dot, threshold, categorize, total)
181            if child.dot_node is not None:
182                dot.add_edge(Edge(self, child))
183
184    def _cleandot(self):
185        for child in self.children:
186            child._cleandot()
187            self.dot_node = None
188            del self.__dict__['dot_node']
189
190    def dot(self, dot, threshold=0.1, categorize=None):
191        self._dot(dot, threshold, categorize, self.total())
192        self._cleandot()
193
194class FuncData(RunData):
195    def __init__(self, filename, categorize=None):
196        super(FuncData, self).__init__(filename)
197        tree = self.tree
198        tree.aggregate(self, categorize, incategory=False)
199        self.total = tree.total()
200
201    def __getattribute__(self, attr):
202        if attr == 'tree':
203            return FuncNode(self.filedata)
204        return super(FuncData, self).__getattribute__(attr)
205
206    def displayx(self, output=None, maxcount=None):
207        if output is None:
208            import sys
209            output = sys.stdout
210
211        items = [ (val,key) for key,val in self.iteritems() ]
212        items.sort(reverse=True)
213        for val,key in items:
214            if maxcount is not None:
215                if maxcount == 0:
216                    return
217                maxcount -= 1
218
219            percent = val * 100.0 / self.total
220            print >>output, '%-30s %8s' % (key, '%3.2f%%' % percent)
221
222class Profile(object):
223    # This list controls the order of values in stacked bar data output
224    default_categories = [ 'interrupt',
225                           'driver',
226                           'stack',
227                           'buffer',
228                           'copy',
229                           'syscall',
230                           'user',
231                           'other',
232                           'idle']
233
234    def __init__(self, datatype, categorize=None):
235        categories = Profile.default_categories
236
237        self.datatype = datatype
238        self.categorize = categorize
239        self.data = {}
240        self.categories = categories[:]
241        self.rcategories = categories[:]
242        self.rcategories.reverse()
243        self.cpu = 0
244
245    # Read in files
246    def inputdir(self, directory):
247        import os, os.path, re
248        from os.path import expanduser, join as joinpath
249
250        directory = expanduser(directory)
251        label_ex = re.compile(r'profile\.(.*).dat')
252        for root,dirs,files in os.walk(directory):
253            for name in files:
254                match = label_ex.match(name)
255                if not match:
256                    continue
257
258                filename = joinpath(root, name)
259                prefix = os.path.commonprefix([root, directory])
260                dirname = root[len(prefix)+1:]
261                data = self.datatype(filename, self.categorize)
262                self.setdata(dirname, match.group(1), data)
263
264    def setdata(self, run, cpu, data):
265        if run not in self.data:
266            self.data[run] = {}
267
268        if cpu in self.data[run]:
269            raise AttributeError, \
270                  'data already stored for run %s and cpu %s' % (run, cpu)
271
272        self.data[run][cpu] = data
273
274    def getdata(self, run, cpu):
275        try:
276            return self.data[run][cpu]
277        except KeyError:
278            print run, cpu
279            return None
280
281    def alldata(self):
282        for run,cpus in self.data.iteritems():
283            for cpu,data in cpus.iteritems():
284                yield run,cpu,data
285
286    def get(self, job, stat):
287        if job.system is None:
288            raise AttributeError, 'The job must have a system set'
289
290        run = job.name
291        cpu = '%s.run%d' % (job.system, self.cpu)
292        data = self.getdata(run, cpu)
293        if not data:
294            return None
295
296        values = []
297        for category in self.categories:
298            val = float(data.get(category, 0.0))
299            if val < 0.0:
300                raise ValueError, 'value is %f' % val
301            values.append(val)
302        total = sum(values)
303        return [ v / total * 100.0 for v in values ]
304
305    def dump(self):
306        for run,cpu,data in self.alldata():
307            print 'run %s, cpu %s' % (run, cpu)
308            data.dump()
309            print
310
311    def write_dot(self, threshold, jobfile=None, jobs=None):
312        import pydot
313
314        if jobs is None:
315            jobs = [ job for job in jobfile.jobs() ]
316
317        for job in jobs:
318            cpu =  '%s.run%d' % (job.system, self.cpu)
319            symbols = self.getdata(job.name, cpu)
320            if not symbols:
321                continue
322
323            dot = pydot.Dot()
324            symbols.tree.dot(dot, threshold=threshold)
325            dot.write(symbols.filename[:-3] + 'dot')
326
327    def write_txt(self, jobfile=None, jobs=None, limit=None):
328        if jobs is None:
329            jobs = [ job for job in jobfile.jobs() ]
330
331        for job in jobs:
332            cpu =  '%s.run%d' % (job.system, self.cpu)
333            symbols = self.getdata(job.name, cpu)
334            if not symbols:
335                continue
336
337            output = file(symbols.filename[:-3] + 'txt', 'w')
338            symbols.display(output, limit)
339
340    def display(self, jobfile=None, jobs=None, limit=None):
341        if jobs is None:
342            jobs = [ job for job in jobfile.jobs() ]
343
344        maxsymlen = 0
345
346        thejobs = []
347        for job in jobs:
348            cpu =  '%s.run%d' % (job.system, self.cpu)
349            symbols = self.getdata(job.name, cpu)
350            if symbols:
351                thejobs.append(job)
352                maxsymlen = max(maxsymlen, symbols.maxsymlen)
353
354        for job in thejobs:
355            cpu =  '%s.run%d' % (job.system, self.cpu)
356            symbols = self.getdata(job.name, cpu)
357            print job.name
358            symbols.display(limit=limit, maxsymlen=maxsymlen)
359            print
360
361
362from categories import func_categorize, pc_categorize
363class PCProfile(Profile):
364    def __init__(self, categorize=pc_categorize):
365        super(PCProfile, self).__init__(PCData, categorize)
366
367
368class FuncProfile(Profile):
369    def __init__(self, categorize=func_categorize):
370        super(FuncProfile, self).__init__(FuncData, categorize)
371
372def usage(exitcode = None):
373    print '''\
374Usage: %s [-bc] [-g <dir>] [-j <jobfile>] [-n <num>]
375
376    -c           groups symbols into categories
377    -b           dumps data for bar charts
378    -d           generate dot output
379    -g <d>       draw graphs and send output to <d>
380    -j <jobfile> specify a different jobfile (default is Test.py)
381    -n <n>       selects number of top symbols to print (default 5)
382''' % sys.argv[0]
383
384    if exitcode is not None:
385        sys.exit(exitcode)
386
387if __name__ == '__main__':
388    import getopt, re, sys
389    from os.path import expanduser
390    from output import StatOutput
391
392    # default option values
393    numsyms = 10
394    graph = None
395    cpus = [ 0 ]
396    categorize = False
397    showidle = True
398    funcdata = True
399    jobfilename = 'Test.py'
400    dodot = False
401    dotfile = None
402    textout = False
403    threshold = 0.01
404    inputfile = None
405
406    try:
407        opts, args = getopt.getopt(sys.argv[1:], 'C:cdD:f:g:ij:n:pT:t')
408    except getopt.GetoptError:
409        usage(2)
410
411    for o,a in opts:
412        if o == '-C':
413            cpus = [ int(x) for x in a.split(',') ]
414        elif o == '-c':
415            categorize = True
416        elif o == '-D':
417            dotfile = a
418        elif o == '-d':
419            dodot = True
420        elif o == '-f':
421            inputfile = expanduser(a)
422        elif o == '-g':
423            graph = a
424        elif o == '-i':
425            showidle = False
426        elif o == '-j':
427            jobfilename = a
428        elif o == '-n':
429            numsyms = int(a)
430        elif o == '-p':
431            funcdata = False
432        elif o == '-T':
433            threshold = float(a)
434        elif o == '-t':
435            textout = True
436
437    if args:
438        print "'%s'" % args, len(args)
439        usage(1)
440
441    if inputfile:
442        catfunc = None
443        if categorize:
444            catfunc = func_categorize
445        data = FuncData(inputfile, categorize=catfunc)
446
447        if dodot:
448            import pydot
449            dot = pydot.Dot()
450            data.tree.dot(dot, threshold=threshold)
451            #dot.orientation = 'landscape'
452            #dot.ranksep='equally'
453            #dot.rank='samerank'
454            dot.write(dotfile, format='png')
455        else:
456            data.display(limit=numsyms)
457
458    else:
459        from jobfile import JobFile
460        jobfile = JobFile(jobfilename)
461
462        if funcdata:
463            profile = FuncProfile()
464        else:
465            profile = PCProfile()
466
467        if not categorize:
468            profile.categorize = None
469        profile.inputdir(jobfile.rootdir)
470
471        if graph:
472            for cpu in cpus:
473                profile.cpu = cpu
474                if funcdata:
475                    name = 'funcstacks%d' % cpu
476                else:
477                    name = 'stacks%d' % cpu
478                output = StatOutput(jobfile, info=profile)
479                output.xlabel = 'System Configuration'
480                output.ylabel = '% CPU utilization'
481                output.stat = name
482                output.graph(name, graph)
483
484        if dodot:
485            for cpu in cpus:
486                profile.cpu = cpu
487                profile.write_dot(jobfile=jobfile, threshold=threshold)
488
489        if textout:
490            for cpu in cpus:
491                profile.cpu = cpu
492                profile.write_txt(jobfile=jobfile)
493
494        if not graph and not textout and not dodot:
495            for cpu in cpus:
496                if not categorize:
497                    profile.categorize = None
498                profile.cpu = cpu
499                profile.display(jobfile=jobfile, limit=numsyms)
500