profile.py revision 2014:7df693ff6fa4
1# Copyright (c) 2005 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27from orderdict import orderdict
28import output
29
30class FileData(dict):
31    def __init__(self, filename):
32        self.filename = filename
33        fd = file(filename)
34        current = []
35        for line in fd:
36            line = line.strip()
37            if line.startswith('>>>'):
38                current = []
39                self[line[3:]] = current
40            else:
41                current.append(line)
42        fd.close()
43
44class RunData(dict):
45    def __init__(self, filename):
46        self.filename = filename
47
48    def __getattribute__(self, attr):
49        if attr == 'total':
50            total = 0.0
51            for value in self.itervalues():
52                total += value
53            return total
54
55        if attr == 'filedata':
56            return FileData(self.filename)
57
58        if attr == 'maxsymlen':
59            return max([ len(sym) for sym in self.iterkeys() ])
60
61        return super(RunData, self).__getattribute__(attr)
62
63    def display(self, output=None, limit=None, maxsymlen=None):
64        if not output:
65            import sys
66            output = sys.stdout
67        elif isinstance(output, str):
68            output = file(output, 'w')
69
70        total = float(self.total)
71
72        # swap (string,count) order so we can sort on count
73        symbols = [ (count,name) for name,count in self.iteritems() ]
74        symbols.sort(reverse=True)
75        if limit is not None:
76            symbols = symbols[:limit]
77
78        if not maxsymlen:
79            maxsymlen = self.maxsymlen
80
81        symbolf = "%-" + str(maxsymlen + 1) + "s %.2f%%"
82        for number,name in symbols:
83            print >>output, symbolf % (name, 100.0 * (float(number) / total))
84
85class PCData(RunData):
86    def __init__(self, filename=None, categorize=None, showidle=True):
87        super(PCData, self).__init__(self, filename)
88
89        filedata = self.filedata['PC data']
90        for line in filedata:
91            (symbol, count) = line.split()
92            if symbol == "0x0":
93                continue
94            count = int(count)
95
96            if categorize is not None:
97                category = categorize(symbol)
98                if category is None:
99                    category = 'other'
100                elif category == 'idle' and not showidle:
101                    continue
102
103                self[category] = count
104
105class FuncNode(object):
106    def __new__(cls, filedata=None):
107        if filedata is None:
108            return super(FuncNode, cls).__new__(cls)
109
110        nodes = {}
111        for line in filedata['function data']:
112            data = line.split(' ')
113            node_id = long(data[0], 16)
114            node = FuncNode()
115            node.symbol = data[1]
116            if node.symbol == '':
117                node.symbol = 'unknown'
118            node.count = long(data[2])
119            node.children = [ long(child, 16) for child in data[3:] ]
120            nodes[node_id] = node
121
122        for node in nodes.itervalues():
123            children = []
124            for cid in node.children:
125                child = nodes[cid]
126                children.append(child)
127                child.parent = node
128            node.children = tuple(children)
129        if not nodes:
130            print filedata.filename
131            print nodes
132        return nodes[0]
133
134    def total(self):
135        total = self.count
136        for child in self.children:
137            total += child.total()
138
139        return total
140
141    def aggregate(self, dict, categorize, incategory):
142        category = None
143        if categorize:
144            category = categorize(self.symbol)
145
146        total = self.count
147        for child in self.children:
148            total += child.aggregate(dict, categorize, category or incategory)
149
150        if category:
151            dict[category] = dict.get(category, 0) + total
152            return 0
153        elif not incategory:
154            dict[self.symbol] = dict.get(self.symbol, 0) + total
155
156        return total
157
158    def dump(self):
159        kids = [ child.symbol for child in self.children]
160        print '%s %d <%s>' % (self.symbol, self.count, ', '.join(kids))
161        for child in self.children:
162            child.dump()
163
164    def _dot(self, dot, threshold, categorize, total):
165        from pydot import Dot, Edge, Node
166        self.dot_node = None
167
168        value = self.total() * 100.0 / total
169        if value < threshold:
170            return
171        if categorize:
172            category = categorize(self.symbol)
173            if category and category != 'other':
174                return
175        label = '%s %.2f%%' % (self.symbol, value)
176        self.dot_node = Node(self, label=label)
177        dot.add_node(self.dot_node)
178
179        for child in self.children:
180            child._dot(dot, threshold, categorize, total)
181            if child.dot_node is not None:
182                dot.add_edge(Edge(self, child))
183
184    def _cleandot(self):
185        for child in self.children:
186            child._cleandot()
187            self.dot_node = None
188            del self.__dict__['dot_node']
189
190    def dot(self, dot, threshold=0.1, categorize=None):
191        self._dot(dot, threshold, categorize, self.total())
192        self._cleandot()
193
194class FuncData(RunData):
195    def __init__(self, filename, categorize=None):
196        super(FuncData, self).__init__(filename)
197        tree = self.tree
198        tree.aggregate(self, categorize, incategory=False)
199        self.total = tree.total()
200
201    def __getattribute__(self, attr):
202        if attr == 'tree':
203            return FuncNode(self.filedata)
204        return super(FuncData, self).__getattribute__(attr)
205
206    def displayx(self, output=None, maxcount=None):
207        if output is None:
208            import sys
209            output = sys.stdout
210
211        items = [ (val,key) for key,val in self.iteritems() ]
212        items.sort(reverse=True)
213        for val,key in items:
214            if maxcount is not None:
215                if maxcount == 0:
216                    return
217                maxcount -= 1
218
219            percent = val * 100.0 / self.total
220            print >>output, '%-30s %8s' % (key, '%3.2f%%' % percent)
221
222class Profile(object):
223    # This list controls the order of values in stacked bar data output
224    default_categories = [ 'interrupt',
225                           'driver',
226                           'stack',
227                           'buffer',
228                           'copy',
229                           'syscall',
230                           'user',
231                           'other',
232                           'idle']
233
234    def __init__(self, datatype, categorize=None):
235        categories = Profile.default_categories
236
237        self.datatype = datatype
238        self.categorize = categorize
239        self.data = {}
240        self.categories = categories[:]
241        self.rcategories = categories[:]
242        self.rcategories.reverse()
243        self.cpu = 0
244
245    # Read in files
246    def inputdir(self, directory):
247        import os, os.path, re
248        from os.path import expanduser, join as joinpath
249
250        directory = expanduser(directory)
251        label_ex = re.compile(r'profile\.(.*).dat')
252        for root,dirs,files in os.walk(directory):
253            for name in files:
254                match = label_ex.match(name)
255                if not match:
256                    continue
257
258                filename = joinpath(root, name)
259                prefix = os.path.commonprefix([root, directory])
260                dirname = root[len(prefix)+1:]
261                data = self.datatype(filename, self.categorize)
262                self.setdata(dirname, match.group(1), data)
263
264    def setdata(self, run, cpu, data):
265        if run not in self.data:
266            self.data[run] = {}
267
268        if cpu in self.data[run]:
269            raise AttributeError, \
270                  'data already stored for run %s and cpu %s' % (run, cpu)
271
272        self.data[run][cpu] = data
273
274    def getdata(self, run, cpu):
275        try:
276            return self.data[run][cpu]
277        except KeyError:
278            print run, cpu
279            return None
280
281    def alldata(self):
282        for run,cpus in self.data.iteritems():
283            for cpu,data in cpus.iteritems():
284                yield run,cpu,data
285
286    def get(self, job, stat, system=None):
287        if system is None and hasattr('system', job):
288            system = job.system
289
290        if system is None:
291            raise AttributeError, 'The job must have a system set'
292
293        cpu = '%s.run%d' % (system, self.cpu)
294
295        data = self.getdata(str(job), cpu)
296        if not data:
297            return None
298
299        values = []
300        for category in self.categories:
301            val = float(data.get(category, 0.0))
302            if val < 0.0:
303                raise ValueError, 'value is %f' % val
304            values.append(val)
305        total = sum(values)
306        return [ v / total * 100.0 for v in values ]
307
308    def dump(self):
309        for run,cpu,data in self.alldata():
310            print 'run %s, cpu %s' % (run, cpu)
311            data.dump()
312            print
313
314    def write_dot(self, threshold, jobfile=None, jobs=None):
315        import pydot
316
317        if jobs is None:
318            jobs = [ job for job in jobfile.jobs() ]
319
320        for job in jobs:
321            cpu =  '%s.run%d' % (job.system, self.cpu)
322            symbols = self.getdata(job.name, cpu)
323            if not symbols:
324                continue
325
326            dot = pydot.Dot()
327            symbols.tree.dot(dot, threshold=threshold)
328            dot.write(symbols.filename[:-3] + 'dot')
329
330    def write_txt(self, jobfile=None, jobs=None, limit=None):
331        if jobs is None:
332            jobs = [ job for job in jobfile.jobs() ]
333
334        for job in jobs:
335            cpu =  '%s.run%d' % (job.system, self.cpu)
336            symbols = self.getdata(job.name, cpu)
337            if not symbols:
338                continue
339
340            output = file(symbols.filename[:-3] + 'txt', 'w')
341            symbols.display(output, limit)
342
343    def display(self, jobfile=None, jobs=None, limit=None):
344        if jobs is None:
345            jobs = [ job for job in jobfile.jobs() ]
346
347        maxsymlen = 0
348
349        thejobs = []
350        for job in jobs:
351            cpu =  '%s.run%d' % (job.system, self.cpu)
352            symbols = self.getdata(job.name, cpu)
353            if symbols:
354                thejobs.append(job)
355                maxsymlen = max(maxsymlen, symbols.maxsymlen)
356
357        for job in thejobs:
358            cpu =  '%s.run%d' % (job.system, self.cpu)
359            symbols = self.getdata(job.name, cpu)
360            print job.name
361            symbols.display(limit=limit, maxsymlen=maxsymlen)
362            print
363
364
365from categories import func_categorize, pc_categorize
366class PCProfile(Profile):
367    def __init__(self, categorize=pc_categorize):
368        super(PCProfile, self).__init__(PCData, categorize)
369
370
371class FuncProfile(Profile):
372    def __init__(self, categorize=func_categorize):
373        super(FuncProfile, self).__init__(FuncData, categorize)
374
375def usage(exitcode = None):
376    print '''\
377Usage: %s [-bc] [-g <dir>] [-j <jobfile>] [-n <num>]
378
379    -c           groups symbols into categories
380    -b           dumps data for bar charts
381    -d           generate dot output
382    -g <d>       draw graphs and send output to <d>
383    -j <jobfile> specify a different jobfile (default is Test.py)
384    -n <n>       selects number of top symbols to print (default 5)
385''' % sys.argv[0]
386
387    if exitcode is not None:
388        sys.exit(exitcode)
389
390if __name__ == '__main__':
391    import getopt, re, sys
392    from os.path import expanduser
393    from output import StatOutput
394
395    # default option values
396    numsyms = 10
397    graph = None
398    cpus = [ 0 ]
399    categorize = False
400    showidle = True
401    funcdata = True
402    jobfilename = 'Test.py'
403    dodot = False
404    dotfile = None
405    textout = False
406    threshold = 0.01
407    inputfile = None
408
409    try:
410        opts, args = getopt.getopt(sys.argv[1:], 'C:cdD:f:g:ij:n:pT:t')
411    except getopt.GetoptError:
412        usage(2)
413
414    for o,a in opts:
415        if o == '-C':
416            cpus = [ int(x) for x in a.split(',') ]
417        elif o == '-c':
418            categorize = True
419        elif o == '-D':
420            dotfile = a
421        elif o == '-d':
422            dodot = True
423        elif o == '-f':
424            inputfile = expanduser(a)
425        elif o == '-g':
426            graph = a
427        elif o == '-i':
428            showidle = False
429        elif o == '-j':
430            jobfilename = a
431        elif o == '-n':
432            numsyms = int(a)
433        elif o == '-p':
434            funcdata = False
435        elif o == '-T':
436            threshold = float(a)
437        elif o == '-t':
438            textout = True
439
440    if args:
441        print "'%s'" % args, len(args)
442        usage(1)
443
444    if inputfile:
445        catfunc = None
446        if categorize:
447            catfunc = func_categorize
448        data = FuncData(inputfile, categorize=catfunc)
449
450        if dodot:
451            import pydot
452            dot = pydot.Dot()
453            data.tree.dot(dot, threshold=threshold)
454            #dot.orientation = 'landscape'
455            #dot.ranksep='equally'
456            #dot.rank='samerank'
457            dot.write(dotfile, format='png')
458        else:
459            data.display(limit=numsyms)
460
461    else:
462        from jobfile import JobFile
463        jobfile = JobFile(jobfilename)
464
465        if funcdata:
466            profile = FuncProfile()
467        else:
468            profile = PCProfile()
469
470        if not categorize:
471            profile.categorize = None
472        profile.inputdir(jobfile.rootdir)
473
474        if graph:
475            for cpu in cpus:
476                profile.cpu = cpu
477                if funcdata:
478                    name = 'funcstacks%d' % cpu
479                else:
480                    name = 'stacks%d' % cpu
481                output = StatOutput(jobfile, info=profile)
482                output.xlabel = 'System Configuration'
483                output.ylabel = '% CPU utilization'
484                output.stat = name
485                output.graph(name, graph)
486
487        if dodot:
488            for cpu in cpus:
489                profile.cpu = cpu
490                profile.write_dot(jobfile=jobfile, threshold=threshold)
491
492        if textout:
493            for cpu in cpus:
494                profile.cpu = cpu
495                profile.write_txt(jobfile=jobfile)
496
497        if not graph and not textout and not dodot:
498            for cpu in cpus:
499                if not categorize:
500                    profile.categorize = None
501                profile.cpu = cpu
502                profile.display(jobfile=jobfile, limit=numsyms)
503