1# Copyright (c) 2005 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26#
27# Authors: Nathan Binkert
28
29import output
30
31class FileData(dict):
32    def __init__(self, filename):
33        self.filename = filename
34        fd = file(filename)
35        current = []
36        for line in fd:
37            line = line.strip()
38            if line.startswith('>>>'):
39                current = []
40                self[line[3:]] = current
41            else:
42                current.append(line)
43        fd.close()
44
45class RunData(dict):
46    def __init__(self, filename):
47        self.filename = filename
48
49    def __getattribute__(self, attr):
50        if attr == 'total':
51            total = 0.0
52            for value in self.itervalues():
53                total += value
54            return total
55
56        if attr == 'filedata':
57            return FileData(self.filename)
58
59        if attr == 'maxsymlen':
60            return max([ len(sym) for sym in self.iterkeys() ])
61
62        return super(RunData, self).__getattribute__(attr)
63
64    def display(self, output=None, limit=None, maxsymlen=None):
65        if not output:
66            import sys
67            output = sys.stdout
68        elif isinstance(output, str):
69            output = file(output, 'w')
70
71        total = float(self.total)
72
73        # swap (string,count) order so we can sort on count
74        symbols = [ (count,name) for name,count in self.iteritems() ]
75        symbols.sort(reverse=True)
76        if limit is not None:
77            symbols = symbols[:limit]
78
79        if not maxsymlen:
80            maxsymlen = self.maxsymlen
81
82        symbolf = "%-" + str(maxsymlen + 1) + "s %.2f%%"
83        for number,name in symbols:
84            print >>output, symbolf % (name, 100.0 * (float(number) / total))
85
86class PCData(RunData):
87    def __init__(self, filename=None, categorize=None, showidle=True):
88        super(PCData, self).__init__(self, filename)
89
90        filedata = self.filedata['PC data']
91        for line in filedata:
92            (symbol, count) = line.split()
93            if symbol == "0x0":
94                continue
95            count = int(count)
96
97            if categorize is not None:
98                category = categorize(symbol)
99                if category is None:
100                    category = 'other'
101                elif category == 'idle' and not showidle:
102                    continue
103
104                self[category] = count
105
106class FuncNode(object):
107    def __new__(cls, filedata=None):
108        if filedata is None:
109            return super(FuncNode, cls).__new__(cls)
110
111        nodes = {}
112        for line in filedata['function data']:
113            data = line.split(' ')
114            node_id = long(data[0], 16)
115            node = FuncNode()
116            node.symbol = data[1]
117            if node.symbol == '':
118                node.symbol = 'unknown'
119            node.count = long(data[2])
120            node.children = [ long(child, 16) for child in data[3:] ]
121            nodes[node_id] = node
122
123        for node in nodes.itervalues():
124            children = []
125            for cid in node.children:
126                child = nodes[cid]
127                children.append(child)
128                child.parent = node
129            node.children = tuple(children)
130        if not nodes:
131            print filedata.filename
132            print nodes
133        return nodes[0]
134
135    def total(self):
136        total = self.count
137        for child in self.children:
138            total += child.total()
139
140        return total
141
142    def aggregate(self, dict, categorize, incategory):
143        category = None
144        if categorize:
145            category = categorize(self.symbol)
146
147        total = self.count
148        for child in self.children:
149            total += child.aggregate(dict, categorize, category or incategory)
150
151        if category:
152            dict[category] = dict.get(category, 0) + total
153            return 0
154        elif not incategory:
155            dict[self.symbol] = dict.get(self.symbol, 0) + total
156
157        return total
158
159    def dump(self):
160        kids = [ child.symbol for child in self.children]
161        print '%s %d <%s>' % (self.symbol, self.count, ', '.join(kids))
162        for child in self.children:
163            child.dump()
164
165    def _dot(self, dot, threshold, categorize, total):
166        from pydot import Dot, Edge, Node
167        self.dot_node = None
168
169        value = self.total() * 100.0 / total
170        if value < threshold:
171            return
172        if categorize:
173            category = categorize(self.symbol)
174            if category and category != 'other':
175                return
176        label = '%s %.2f%%' % (self.symbol, value)
177        self.dot_node = Node(self, label=label)
178        dot.add_node(self.dot_node)
179
180        for child in self.children:
181            child._dot(dot, threshold, categorize, total)
182            if child.dot_node is not None:
183                dot.add_edge(Edge(self, child))
184
185    def _cleandot(self):
186        for child in self.children:
187            child._cleandot()
188            self.dot_node = None
189            del self.__dict__['dot_node']
190
191    def dot(self, dot, threshold=0.1, categorize=None):
192        self._dot(dot, threshold, categorize, self.total())
193        self._cleandot()
194
195class FuncData(RunData):
196    def __init__(self, filename, categorize=None):
197        super(FuncData, self).__init__(filename)
198        tree = self.tree
199        tree.aggregate(self, categorize, incategory=False)
200        self.total = tree.total()
201
202    def __getattribute__(self, attr):
203        if attr == 'tree':
204            return FuncNode(self.filedata)
205        return super(FuncData, self).__getattribute__(attr)
206
207    def displayx(self, output=None, maxcount=None):
208        if output is None:
209            import sys
210            output = sys.stdout
211
212        items = [ (val,key) for key,val in self.iteritems() ]
213        items.sort(reverse=True)
214        for val,key in items:
215            if maxcount is not None:
216                if maxcount == 0:
217                    return
218                maxcount -= 1
219
220            percent = val * 100.0 / self.total
221            print >>output, '%-30s %8s' % (key, '%3.2f%%' % percent)
222
223class Profile(object):
224    # This list controls the order of values in stacked bar data output
225    default_categories = [ 'interrupt',
226                           'driver',
227                           'stack',
228                           'buffer',
229                           'copy',
230                           'syscall',
231                           'user',
232                           'other',
233                           'idle']
234
235    def __init__(self, datatype, categorize=None):
236        categories = Profile.default_categories
237
238        self.datatype = datatype
239        self.categorize = categorize
240        self.data = {}
241        self.categories = categories[:]
242        self.rcategories = categories[:]
243        self.rcategories.reverse()
244        self.cpu = 0
245
246    # Read in files
247    def inputdir(self, directory):
248        import os, os.path, re
249        from os.path import expanduser, join as joinpath
250
251        directory = expanduser(directory)
252        label_ex = re.compile(r'profile\.(.*).dat')
253        for root,dirs,files in os.walk(directory):
254            for name in files:
255                match = label_ex.match(name)
256                if not match:
257                    continue
258
259                filename = joinpath(root, name)
260                prefix = os.path.commonprefix([root, directory])
261                dirname = root[len(prefix)+1:]
262                data = self.datatype(filename, self.categorize)
263                self.setdata(dirname, match.group(1), data)
264
265    def setdata(self, run, cpu, data):
266        if run not in self.data:
267            self.data[run] = {}
268
269        if cpu in self.data[run]:
270            raise AttributeError, \
271                  'data already stored for run %s and cpu %s' % (run, cpu)
272
273        self.data[run][cpu] = data
274
275    def getdata(self, run, cpu):
276        try:
277            return self.data[run][cpu]
278        except KeyError:
279            print run, cpu
280            return None
281
282    def alldata(self):
283        for run,cpus in self.data.iteritems():
284            for cpu,data in cpus.iteritems():
285                yield run,cpu,data
286
287    def get(self, job, stat, system=None):
288        if system is None and hasattr('system', job):
289            system = job.system
290
291        if system is None:
292            raise AttributeError, 'The job must have a system set'
293
294        cpu = '%s.run%d' % (system, self.cpu)
295
296        data = self.getdata(str(job), cpu)
297        if not data:
298            return None
299
300        values = []
301        for category in self.categories:
302            val = float(data.get(category, 0.0))
303            if val < 0.0:
304                raise ValueError, 'value is %f' % val
305            values.append(val)
306        total = sum(values)
307        return [ v / total * 100.0 for v in values ]
308
309    def dump(self):
310        for run,cpu,data in self.alldata():
311            print 'run %s, cpu %s' % (run, cpu)
312            data.dump()
313            print
314
315    def write_dot(self, threshold, jobfile=None, jobs=None):
316        import pydot
317
318        if jobs is None:
319            jobs = [ job for job in jobfile.jobs() ]
320
321        for job in jobs:
322            cpu =  '%s.run%d' % (job.system, self.cpu)
323            symbols = self.getdata(job.name, cpu)
324            if not symbols:
325                continue
326
327            dot = pydot.Dot()
328            symbols.tree.dot(dot, threshold=threshold)
329            dot.write(symbols.filename[:-3] + 'dot')
330
331    def write_txt(self, jobfile=None, jobs=None, limit=None):
332        if jobs is None:
333            jobs = [ job for job in jobfile.jobs() ]
334
335        for job in jobs:
336            cpu =  '%s.run%d' % (job.system, self.cpu)
337            symbols = self.getdata(job.name, cpu)
338            if not symbols:
339                continue
340
341            output = file(symbols.filename[:-3] + 'txt', 'w')
342            symbols.display(output, limit)
343
344    def display(self, jobfile=None, jobs=None, limit=None):
345        if jobs is None:
346            jobs = [ job for job in jobfile.jobs() ]
347
348        maxsymlen = 0
349
350        thejobs = []
351        for job in jobs:
352            cpu =  '%s.run%d' % (job.system, self.cpu)
353            symbols = self.getdata(job.name, cpu)
354            if symbols:
355                thejobs.append(job)
356                maxsymlen = max(maxsymlen, symbols.maxsymlen)
357
358        for job in thejobs:
359            cpu =  '%s.run%d' % (job.system, self.cpu)
360            symbols = self.getdata(job.name, cpu)
361            print job.name
362            symbols.display(limit=limit, maxsymlen=maxsymlen)
363            print
364
365
366from categories import func_categorize, pc_categorize
367class PCProfile(Profile):
368    def __init__(self, categorize=pc_categorize):
369        super(PCProfile, self).__init__(PCData, categorize)
370
371
372class FuncProfile(Profile):
373    def __init__(self, categorize=func_categorize):
374        super(FuncProfile, self).__init__(FuncData, categorize)
375
376def usage(exitcode = None):
377    print '''\
378Usage: %s [-bc] [-g <dir>] [-j <jobfile>] [-n <num>]
379
380    -c           groups symbols into categories
381    -b           dumps data for bar charts
382    -d           generate dot output
383    -g <d>       draw graphs and send output to <d>
384    -j <jobfile> specify a different jobfile (default is Test.py)
385    -n <n>       selects number of top symbols to print (default 5)
386''' % sys.argv[0]
387
388    if exitcode is not None:
389        sys.exit(exitcode)
390
391if __name__ == '__main__':
392    import getopt, re, sys
393    from os.path import expanduser
394    from output import StatOutput
395
396    # default option values
397    numsyms = 10
398    graph = None
399    cpus = [ 0 ]
400    categorize = False
401    showidle = True
402    funcdata = True
403    jobfilename = 'Test.py'
404    dodot = False
405    dotfile = None
406    textout = False
407    threshold = 0.01
408    inputfile = None
409
410    try:
411        opts, args = getopt.getopt(sys.argv[1:], 'C:cdD:f:g:ij:n:pT:t')
412    except getopt.GetoptError:
413        usage(2)
414
415    for o,a in opts:
416        if o == '-C':
417            cpus = [ int(x) for x in a.split(',') ]
418        elif o == '-c':
419            categorize = True
420        elif o == '-D':
421            dotfile = a
422        elif o == '-d':
423            dodot = True
424        elif o == '-f':
425            inputfile = expanduser(a)
426        elif o == '-g':
427            graph = a
428        elif o == '-i':
429            showidle = False
430        elif o == '-j':
431            jobfilename = a
432        elif o == '-n':
433            numsyms = int(a)
434        elif o == '-p':
435            funcdata = False
436        elif o == '-T':
437            threshold = float(a)
438        elif o == '-t':
439            textout = True
440
441    if args:
442        print "'%s'" % args, len(args)
443        usage(1)
444
445    if inputfile:
446        catfunc = None
447        if categorize:
448            catfunc = func_categorize
449        data = FuncData(inputfile, categorize=catfunc)
450
451        if dodot:
452            import pydot
453            dot = pydot.Dot()
454            data.tree.dot(dot, threshold=threshold)
455            #dot.orientation = 'landscape'
456            #dot.ranksep='equally'
457            #dot.rank='samerank'
458            dot.write(dotfile, format='png')
459        else:
460            data.display(limit=numsyms)
461
462    else:
463        from jobfile import JobFile
464        jobfile = JobFile(jobfilename)
465
466        if funcdata:
467            profile = FuncProfile()
468        else:
469            profile = PCProfile()
470
471        if not categorize:
472            profile.categorize = None
473        profile.inputdir(jobfile.rootdir)
474
475        if graph:
476            for cpu in cpus:
477                profile.cpu = cpu
478                if funcdata:
479                    name = 'funcstacks%d' % cpu
480                else:
481                    name = 'stacks%d' % cpu
482                output = StatOutput(jobfile, info=profile)
483                output.xlabel = 'System Configuration'
484                output.ylabel = '% CPU utilization'
485                output.stat = name
486                output.graph(name, graph)
487
488        if dodot:
489            for cpu in cpus:
490                profile.cpu = cpu
491                profile.write_dot(jobfile=jobfile, threshold=threshold)
492
493        if textout:
494            for cpu in cpus:
495                profile.cpu = cpu
496                profile.write_txt(jobfile=jobfile)
497
498        if not graph and not textout and not dodot:
499            for cpu in cpus:
500                if not categorize:
501                    profile.categorize = None
502                profile.cpu = cpu
503                profile.display(jobfile=jobfile, limit=numsyms)
504