sort_includes.py revision 11397
1#!/usr/bin/env python
2
3# Copyright (c) 2011 The Hewlett-Packard Development Company
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met: redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer;
10# redistributions in binary form must reproduce the above copyright
11# notice, this list of conditions and the following disclaimer in the
12# documentation and/or other materials provided with the distribution;
13# neither the name of the copyright holders nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28#
29# Authors: Nathan Binkert
30
31import os
32import re
33import sys
34
35from file_types import *
36
37cpp_c_headers = {
38    'assert.h' : 'cassert',
39    'ctype.h'  : 'cctype',
40    'errno.h'  : 'cerrno',
41    'float.h'  : 'cfloat',
42    'limits.h' : 'climits',
43    'locale.h' : 'clocale',
44    'math.h'   : 'cmath',
45    'setjmp.h' : 'csetjmp',
46    'signal.h' : 'csignal',
47    'stdarg.h' : 'cstdarg',
48    'stddef.h' : 'cstddef',
49    'stdio.h'  : 'cstdio',
50    'stdlib.h' : 'cstdlib',
51    'string.h' : 'cstring',
52    'time.h'   : 'ctime',
53    'wchar.h'  : 'cwchar',
54    'wctype.h' : 'cwctype',
55}
56
57include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
58def include_key(line):
59    '''Mark directories with a leading space so directories
60    are sorted before files'''
61
62    match = include_re.match(line)
63    assert match, line
64    keyword = match.group(2)
65    include = match.group(3)
66
67    # Everything but the file part needs to have a space prepended
68    parts = include.split('/')
69    if len(parts) == 2 and parts[0] == 'dnet':
70        # Don't sort the dnet includes with respect to each other, but
71        # make them sorted with respect to non dnet includes.  Python
72        # guarantees that sorting is stable, so just clear the
73        # basename part of the filename.
74        parts[1] = ' '
75    parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
76    key = '/'.join(parts)
77
78    return key
79
80
81def _include_matcher(keyword="#include", delim="<>"):
82    """Match an include statement and return a (keyword, file, extra)
83    duple, or a touple of None values if there isn't a match."""
84
85    rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
86
87    def matcher(context, line):
88        m = rex.match(line)
89        return m.groups() if m else (None, ) * 3
90
91    return matcher
92
93def _include_matcher_fname(fname, **kwargs):
94    """Match an include of a specific file name. Any keyword arguments
95    are forwarded to _include_matcher, which is used to match the
96    actual include line."""
97
98    rex = re.compile(fname)
99    base_matcher = _include_matcher(**kwargs)
100
101    def matcher(context, line):
102        (keyword, fname, extra) = base_matcher(context, line)
103        if fname and rex.match(fname):
104            return (keyword, fname, extra)
105        else:
106            return (None, ) * 3
107
108    return matcher
109
110
111def _include_matcher_main():
112    """Match a C/C++ source file's primary header (i.e., a file with
113    the same base name, but a header extension)."""
114
115    base_matcher = _include_matcher(delim='""')
116    rex = re.compile(r"^src/(.*)\.([^.]+)$")
117    header_map = {
118        "c" : "h",
119        "cc" : "hh",
120        "cpp" : "hh",
121        }
122    def matcher(context, line):
123        m = rex.match(context["filename"])
124        if not m:
125            return (None, ) * 3
126        base, ext = m.groups()
127        (keyword, fname, extra) = base_matcher(context, line)
128        try:
129            if fname == "%s.%s" % (base, header_map[ext]):
130                return (keyword, fname, extra)
131        except KeyError:
132            pass
133
134        return (None, ) * 3
135
136    return matcher
137
138class SortIncludes(object):
139    # different types of includes for different sorting of headers
140    # <Python.h>         - Python header needs to be first if it exists
141    # <*.h>              - system headers (directories before files)
142    # <*>                - STL headers
143    # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
144    # "*"                - M5 headers (directories before files)
145    includes_re = (
146        ('main', '""', _include_matcher_main()),
147        ('python', '<>', _include_matcher_fname("^Python\.h$")),
148        ('c', '<>', _include_matcher_fname("^.*\.h$")),
149        ('stl', '<>', _include_matcher_fname("^\w+$")),
150        ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
151        ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
152        ('swig0', '<>', _include_matcher(keyword="%import")),
153        ('swig1', '<>', _include_matcher(keyword="%include")),
154        ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
155        ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
156        )
157
158    block_order = (
159        ('main', ),
160        ('python', ),
161        ('c', ),
162        ('stl', ),
163        ('cc', ),
164        ('m5header', ),
165        ('swig0', 'swig1', 'swig2', 'swig3', ),
166        )
167
168    def __init__(self):
169        self.block_priority = {}
170        for prio, keys in enumerate(self.block_order):
171            for key in keys:
172                self.block_priority[key] = prio
173
174    def reset(self):
175        # clear all stored headers
176        self.includes = {}
177
178    def dump_blocks(self, block_types):
179        """Merge includes of from several block types into one large
180        block of sorted includes. This is useful when we have multiple
181        include block types (e.g., swig includes) with the same
182        priority."""
183
184        includes = []
185        for block_type in block_types:
186            try:
187                includes += self.includes[block_type]
188            except KeyError:
189                pass
190
191        return sorted(set(includes))
192
193    def dump_includes(self):
194        blocks = []
195        # Create a list of blocks in the prescribed include
196        # order. Each entry in the list is a multi-line string with
197        # multiple includes.
198        for types in self.block_order:
199            block = "\n".join(self.dump_blocks(types))
200            if block:
201                blocks.append(block)
202
203        self.reset()
204        return "\n\n".join(blocks)
205
206    def __call__(self, lines, filename, language):
207        self.reset()
208
209        context = {
210            "filename" : filename,
211            "language" : language,
212            }
213
214        def match_line(line):
215            if not line:
216                return (None, line)
217
218            for include_type, (ldelim, rdelim), matcher in self.includes_re:
219                keyword, include, extra = matcher(context, line)
220                if keyword:
221                    # if we've got a match, clean up the #include line,
222                    # fix up stl headers and store it in the proper category
223                    if include_type == 'c' and language == 'C++':
224                        stl_inc = cpp_c_headers.get(include, None)
225                        if stl_inc:
226                            include = stl_inc
227                            include_type = 'stl'
228
229                    return (include_type,
230                            keyword + ' ' + ldelim + include + rdelim + extra)
231
232            return (None, line)
233
234        processing_includes = False
235        for line in lines:
236            include_type, line = match_line(line)
237            if include_type:
238                try:
239                    self.includes[include_type].append(line)
240                except KeyError:
241                    self.includes[include_type] = [ line ]
242
243                processing_includes = True
244            elif processing_includes and not line.strip():
245                # Skip empty lines while processing includes
246                pass
247            elif processing_includes:
248                # We are now exiting an include block
249                processing_includes = False
250
251                # Output pending includes, a new line between, and the
252                # current l.
253                yield self.dump_includes()
254                yield ''
255                yield line
256            else:
257                # We are not in an include block, so just emit the line
258                yield line
259
260        # We've reached EOF, so dump any pending includes
261        if processing_includes:
262            yield self.dump_includes()
263
264
265
266# default language types to try to apply our sorting rules to
267default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
268
269def options():
270    import optparse
271    options = optparse.OptionParser()
272    add_option = options.add_option
273    add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
274               default=','.join(default_dir_ignore),
275               help="ignore directories")
276    add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
277               default=','.join(default_file_ignore),
278               help="ignore files")
279    add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
280               default=','.join(default_languages),
281               help="languages")
282    add_option('-n', '--dry-run', action='store_true',
283               help="don't overwrite files")
284
285    return options
286
287def parse_args(parser):
288    opts,args = parser.parse_args()
289
290    opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
291    opts.file_ignore = frozenset(opts.file_ignore.split(','))
292    opts.languages = frozenset(opts.languages.split(','))
293
294    return opts,args
295
296if __name__ == '__main__':
297    parser = options()
298    opts, args = parse_args(parser)
299
300    for base in args:
301        for filename,language in find_files(base, languages=opts.languages,
302                file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
303            if opts.dry_run:
304                print "%s: %s" % (filename, language)
305            else:
306                update_file(filename, filename, language, SortIncludes())
307