profile.py revision 2665:a124942bacb8
1# Copyright (c) 2005 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26#
27# Authors: Nathan Binkert
28
29from orderdict import orderdict
30import output
31
32class FileData(dict):
33    def __init__(self, filename):
34        self.filename = filename
35        fd = file(filename)
36        current = []
37        for line in fd:
38            line = line.strip()
39            if line.startswith('>>>'):
40                current = []
41                self[line[3:]] = current
42            else:
43                current.append(line)
44        fd.close()
45
46class RunData(dict):
47    def __init__(self, filename):
48        self.filename = filename
49
50    def __getattribute__(self, attr):
51        if attr == 'total':
52            total = 0.0
53            for value in self.itervalues():
54                total += value
55            return total
56
57        if attr == 'filedata':
58            return FileData(self.filename)
59
60        if attr == 'maxsymlen':
61            return max([ len(sym) for sym in self.iterkeys() ])
62
63        return super(RunData, self).__getattribute__(attr)
64
65    def display(self, output=None, limit=None, maxsymlen=None):
66        if not output:
67            import sys
68            output = sys.stdout
69        elif isinstance(output, str):
70            output = file(output, 'w')
71
72        total = float(self.total)
73
74        # swap (string,count) order so we can sort on count
75        symbols = [ (count,name) for name,count in self.iteritems() ]
76        symbols.sort(reverse=True)
77        if limit is not None:
78            symbols = symbols[:limit]
79
80        if not maxsymlen:
81            maxsymlen = self.maxsymlen
82
83        symbolf = "%-" + str(maxsymlen + 1) + "s %.2f%%"
84        for number,name in symbols:
85            print >>output, symbolf % (name, 100.0 * (float(number) / total))
86
87class PCData(RunData):
88    def __init__(self, filename=None, categorize=None, showidle=True):
89        super(PCData, self).__init__(self, filename)
90
91        filedata = self.filedata['PC data']
92        for line in filedata:
93            (symbol, count) = line.split()
94            if symbol == "0x0":
95                continue
96            count = int(count)
97
98            if categorize is not None:
99                category = categorize(symbol)
100                if category is None:
101                    category = 'other'
102                elif category == 'idle' and not showidle:
103                    continue
104
105                self[category] = count
106
107class FuncNode(object):
108    def __new__(cls, filedata=None):
109        if filedata is None:
110            return super(FuncNode, cls).__new__(cls)
111
112        nodes = {}
113        for line in filedata['function data']:
114            data = line.split(' ')
115            node_id = long(data[0], 16)
116            node = FuncNode()
117            node.symbol = data[1]
118            if node.symbol == '':
119                node.symbol = 'unknown'
120            node.count = long(data[2])
121            node.children = [ long(child, 16) for child in data[3:] ]
122            nodes[node_id] = node
123
124        for node in nodes.itervalues():
125            children = []
126            for cid in node.children:
127                child = nodes[cid]
128                children.append(child)
129                child.parent = node
130            node.children = tuple(children)
131        if not nodes:
132            print filedata.filename
133            print nodes
134        return nodes[0]
135
136    def total(self):
137        total = self.count
138        for child in self.children:
139            total += child.total()
140
141        return total
142
143    def aggregate(self, dict, categorize, incategory):
144        category = None
145        if categorize:
146            category = categorize(self.symbol)
147
148        total = self.count
149        for child in self.children:
150            total += child.aggregate(dict, categorize, category or incategory)
151
152        if category:
153            dict[category] = dict.get(category, 0) + total
154            return 0
155        elif not incategory:
156            dict[self.symbol] = dict.get(self.symbol, 0) + total
157
158        return total
159
160    def dump(self):
161        kids = [ child.symbol for child in self.children]
162        print '%s %d <%s>' % (self.symbol, self.count, ', '.join(kids))
163        for child in self.children:
164            child.dump()
165
166    def _dot(self, dot, threshold, categorize, total):
167        from pydot import Dot, Edge, Node
168        self.dot_node = None
169
170        value = self.total() * 100.0 / total
171        if value < threshold:
172            return
173        if categorize:
174            category = categorize(self.symbol)
175            if category and category != 'other':
176                return
177        label = '%s %.2f%%' % (self.symbol, value)
178        self.dot_node = Node(self, label=label)
179        dot.add_node(self.dot_node)
180
181        for child in self.children:
182            child._dot(dot, threshold, categorize, total)
183            if child.dot_node is not None:
184                dot.add_edge(Edge(self, child))
185
186    def _cleandot(self):
187        for child in self.children:
188            child._cleandot()
189            self.dot_node = None
190            del self.__dict__['dot_node']
191
192    def dot(self, dot, threshold=0.1, categorize=None):
193        self._dot(dot, threshold, categorize, self.total())
194        self._cleandot()
195
196class FuncData(RunData):
197    def __init__(self, filename, categorize=None):
198        super(FuncData, self).__init__(filename)
199        tree = self.tree
200        tree.aggregate(self, categorize, incategory=False)
201        self.total = tree.total()
202
203    def __getattribute__(self, attr):
204        if attr == 'tree':
205            return FuncNode(self.filedata)
206        return super(FuncData, self).__getattribute__(attr)
207
208    def displayx(self, output=None, maxcount=None):
209        if output is None:
210            import sys
211            output = sys.stdout
212
213        items = [ (val,key) for key,val in self.iteritems() ]
214        items.sort(reverse=True)
215        for val,key in items:
216            if maxcount is not None:
217                if maxcount == 0:
218                    return
219                maxcount -= 1
220
221            percent = val * 100.0 / self.total
222            print >>output, '%-30s %8s' % (key, '%3.2f%%' % percent)
223
224class Profile(object):
225    # This list controls the order of values in stacked bar data output
226    default_categories = [ 'interrupt',
227                           'driver',
228                           'stack',
229                           'buffer',
230                           'copy',
231                           'syscall',
232                           'user',
233                           'other',
234                           'idle']
235
236    def __init__(self, datatype, categorize=None):
237        categories = Profile.default_categories
238
239        self.datatype = datatype
240        self.categorize = categorize
241        self.data = {}
242        self.categories = categories[:]
243        self.rcategories = categories[:]
244        self.rcategories.reverse()
245        self.cpu = 0
246
247    # Read in files
248    def inputdir(self, directory):
249        import os, os.path, re
250        from os.path import expanduser, join as joinpath
251
252        directory = expanduser(directory)
253        label_ex = re.compile(r'profile\.(.*).dat')
254        for root,dirs,files in os.walk(directory):
255            for name in files:
256                match = label_ex.match(name)
257                if not match:
258                    continue
259
260                filename = joinpath(root, name)
261                prefix = os.path.commonprefix([root, directory])
262                dirname = root[len(prefix)+1:]
263                data = self.datatype(filename, self.categorize)
264                self.setdata(dirname, match.group(1), data)
265
266    def setdata(self, run, cpu, data):
267        if run not in self.data:
268            self.data[run] = {}
269
270        if cpu in self.data[run]:
271            raise AttributeError, \
272                  'data already stored for run %s and cpu %s' % (run, cpu)
273
274        self.data[run][cpu] = data
275
276    def getdata(self, run, cpu):
277        try:
278            return self.data[run][cpu]
279        except KeyError:
280            print run, cpu
281            return None
282
283    def alldata(self):
284        for run,cpus in self.data.iteritems():
285            for cpu,data in cpus.iteritems():
286                yield run,cpu,data
287
288    def get(self, job, stat, system=None):
289        if system is None and hasattr('system', job):
290            system = job.system
291
292        if system is None:
293            raise AttributeError, 'The job must have a system set'
294
295        cpu = '%s.run%d' % (system, self.cpu)
296
297        data = self.getdata(str(job), cpu)
298        if not data:
299            return None
300
301        values = []
302        for category in self.categories:
303            val = float(data.get(category, 0.0))
304            if val < 0.0:
305                raise ValueError, 'value is %f' % val
306            values.append(val)
307        total = sum(values)
308        return [ v / total * 100.0 for v in values ]
309
310    def dump(self):
311        for run,cpu,data in self.alldata():
312            print 'run %s, cpu %s' % (run, cpu)
313            data.dump()
314            print
315
316    def write_dot(self, threshold, jobfile=None, jobs=None):
317        import pydot
318
319        if jobs is None:
320            jobs = [ job for job in jobfile.jobs() ]
321
322        for job in jobs:
323            cpu =  '%s.run%d' % (job.system, self.cpu)
324            symbols = self.getdata(job.name, cpu)
325            if not symbols:
326                continue
327
328            dot = pydot.Dot()
329            symbols.tree.dot(dot, threshold=threshold)
330            dot.write(symbols.filename[:-3] + 'dot')
331
332    def write_txt(self, jobfile=None, jobs=None, limit=None):
333        if jobs is None:
334            jobs = [ job for job in jobfile.jobs() ]
335
336        for job in jobs:
337            cpu =  '%s.run%d' % (job.system, self.cpu)
338            symbols = self.getdata(job.name, cpu)
339            if not symbols:
340                continue
341
342            output = file(symbols.filename[:-3] + 'txt', 'w')
343            symbols.display(output, limit)
344
345    def display(self, jobfile=None, jobs=None, limit=None):
346        if jobs is None:
347            jobs = [ job for job in jobfile.jobs() ]
348
349        maxsymlen = 0
350
351        thejobs = []
352        for job in jobs:
353            cpu =  '%s.run%d' % (job.system, self.cpu)
354            symbols = self.getdata(job.name, cpu)
355            if symbols:
356                thejobs.append(job)
357                maxsymlen = max(maxsymlen, symbols.maxsymlen)
358
359        for job in thejobs:
360            cpu =  '%s.run%d' % (job.system, self.cpu)
361            symbols = self.getdata(job.name, cpu)
362            print job.name
363            symbols.display(limit=limit, maxsymlen=maxsymlen)
364            print
365
366
367from categories import func_categorize, pc_categorize
368class PCProfile(Profile):
369    def __init__(self, categorize=pc_categorize):
370        super(PCProfile, self).__init__(PCData, categorize)
371
372
373class FuncProfile(Profile):
374    def __init__(self, categorize=func_categorize):
375        super(FuncProfile, self).__init__(FuncData, categorize)
376
377def usage(exitcode = None):
378    print '''\
379Usage: %s [-bc] [-g <dir>] [-j <jobfile>] [-n <num>]
380
381    -c           groups symbols into categories
382    -b           dumps data for bar charts
383    -d           generate dot output
384    -g <d>       draw graphs and send output to <d>
385    -j <jobfile> specify a different jobfile (default is Test.py)
386    -n <n>       selects number of top symbols to print (default 5)
387''' % sys.argv[0]
388
389    if exitcode is not None:
390        sys.exit(exitcode)
391
392if __name__ == '__main__':
393    import getopt, re, sys
394    from os.path import expanduser
395    from output import StatOutput
396
397    # default option values
398    numsyms = 10
399    graph = None
400    cpus = [ 0 ]
401    categorize = False
402    showidle = True
403    funcdata = True
404    jobfilename = 'Test.py'
405    dodot = False
406    dotfile = None
407    textout = False
408    threshold = 0.01
409    inputfile = None
410
411    try:
412        opts, args = getopt.getopt(sys.argv[1:], 'C:cdD:f:g:ij:n:pT:t')
413    except getopt.GetoptError:
414        usage(2)
415
416    for o,a in opts:
417        if o == '-C':
418            cpus = [ int(x) for x in a.split(',') ]
419        elif o == '-c':
420            categorize = True
421        elif o == '-D':
422            dotfile = a
423        elif o == '-d':
424            dodot = True
425        elif o == '-f':
426            inputfile = expanduser(a)
427        elif o == '-g':
428            graph = a
429        elif o == '-i':
430            showidle = False
431        elif o == '-j':
432            jobfilename = a
433        elif o == '-n':
434            numsyms = int(a)
435        elif o == '-p':
436            funcdata = False
437        elif o == '-T':
438            threshold = float(a)
439        elif o == '-t':
440            textout = True
441
442    if args:
443        print "'%s'" % args, len(args)
444        usage(1)
445
446    if inputfile:
447        catfunc = None
448        if categorize:
449            catfunc = func_categorize
450        data = FuncData(inputfile, categorize=catfunc)
451
452        if dodot:
453            import pydot
454            dot = pydot.Dot()
455            data.tree.dot(dot, threshold=threshold)
456            #dot.orientation = 'landscape'
457            #dot.ranksep='equally'
458            #dot.rank='samerank'
459            dot.write(dotfile, format='png')
460        else:
461            data.display(limit=numsyms)
462
463    else:
464        from jobfile import JobFile
465        jobfile = JobFile(jobfilename)
466
467        if funcdata:
468            profile = FuncProfile()
469        else:
470            profile = PCProfile()
471
472        if not categorize:
473            profile.categorize = None
474        profile.inputdir(jobfile.rootdir)
475
476        if graph:
477            for cpu in cpus:
478                profile.cpu = cpu
479                if funcdata:
480                    name = 'funcstacks%d' % cpu
481                else:
482                    name = 'stacks%d' % cpu
483                output = StatOutput(jobfile, info=profile)
484                output.xlabel = 'System Configuration'
485                output.ylabel = '% CPU utilization'
486                output.stat = name
487                output.graph(name, graph)
488
489        if dodot:
490            for cpu in cpus:
491                profile.cpu = cpu
492                profile.write_dot(jobfile=jobfile, threshold=threshold)
493
494        if textout:
495            for cpu in cpus:
496                profile.cpu = cpu
497                profile.write_txt(jobfile=jobfile)
498
499        if not graph and not textout and not dodot:
500            for cpu in cpus:
501                if not categorize:
502                    profile.categorize = None
503                profile.cpu = cpu
504                profile.display(jobfile=jobfile, limit=numsyms)
505