sort_includes.py revision 10674
1#!/usr/bin/env python
2
3import os
4import re
5import sys
6
7from file_types import *
8
9cpp_c_headers = {
10    'assert.h' : 'cassert',
11    'ctype.h'  : 'cctype',
12    'errno.h'  : 'cerrno',
13    'float.h'  : 'cfloat',
14    'limits.h' : 'climits',
15    'locale.h' : 'clocale',
16    'math.h'   : 'cmath',
17    'setjmp.h' : 'csetjmp',
18    'signal.h' : 'csignal',
19    'stdarg.h' : 'cstdarg',
20    'stddef.h' : 'cstddef',
21    'stdio.h'  : 'cstdio',
22    'stdlib.h' : 'cstdlib',
23    'string.h' : 'cstring',
24    'time.h'   : 'ctime',
25    'wchar.h'  : 'cwchar',
26    'wctype.h' : 'cwctype',
27}
28
29include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
30def include_key(line):
31    '''Mark directories with a leading space so directories
32    are sorted before files'''
33
34    match = include_re.match(line)
35    assert match, line
36    keyword = match.group(2)
37    include = match.group(3)
38
39    # Everything but the file part needs to have a space prepended
40    parts = include.split('/')
41    if len(parts) == 2 and parts[0] == 'dnet':
42        # Don't sort the dnet includes with respect to each other, but
43        # make them sorted with respect to non dnet includes.  Python
44        # guarantees that sorting is stable, so just clear the
45        # basename part of the filename.
46        parts[1] = ' '
47    parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
48    key = '/'.join(parts)
49
50    return key
51
52
53def _include_matcher(keyword="#include", delim="<>"):
54    """Match an include statement and return a (keyword, file, extra)
55    duple, or a touple of None values if there isn't a match."""
56
57    rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
58
59    def matcher(context, line):
60        m = rex.match(line)
61        return m.groups() if m else (None, ) * 3
62
63    return matcher
64
65def _include_matcher_fname(fname, **kwargs):
66    """Match an include of a specific file name. Any keyword arguments
67    are forwarded to _include_matcher, which is used to match the
68    actual include line."""
69
70    rex = re.compile(fname)
71    base_matcher = _include_matcher(**kwargs)
72
73    def matcher(context, line):
74        (keyword, fname, extra) = base_matcher(context, line)
75        if fname and rex.match(fname):
76            return (keyword, fname, extra)
77        else:
78            return (None, ) * 3
79
80    return matcher
81
82
83def _include_matcher_main():
84    """Match a C/C++ source file's primary header (i.e., a file with
85    the same base name, but a header extension)."""
86
87    base_matcher = _include_matcher(delim='""')
88    rex = re.compile(r"^src/(.*)\.([^.]+)$")
89    header_map = {
90        "c" : "h",
91        "cc" : "hh",
92        "cpp" : "hh",
93        }
94    def matcher(context, line):
95        m = rex.match(context["filename"])
96        if not m:
97            return (None, ) * 3
98        base, ext = m.groups()
99        (keyword, fname, extra) = base_matcher(context, line)
100        try:
101            if fname == "%s.%s" % (base, header_map[ext]):
102                return (keyword, fname, extra)
103        except KeyError:
104            pass
105
106        return (None, ) * 3
107
108    return matcher
109
110class SortIncludes(object):
111    # different types of includes for different sorting of headers
112    # <Python.h>         - Python header needs to be first if it exists
113    # <*.h>              - system headers (directories before files)
114    # <*>                - STL headers
115    # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
116    # "*"                - M5 headers (directories before files)
117    includes_re = (
118        ('main', '""', _include_matcher_main()),
119        ('python', '<>', _include_matcher_fname("^Python\.h$")),
120        ('c', '<>', _include_matcher_fname("^.*\.h$")),
121        ('stl', '<>', _include_matcher_fname("^\w+$")),
122        ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
123        ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
124        ('swig0', '<>', _include_matcher(keyword="%import")),
125        ('swig1', '<>', _include_matcher(keyword="%include")),
126        ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
127        ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
128        )
129
130    block_order = (
131        ('main', ),
132        ('python', ),
133        ('c', ),
134        ('stl', ),
135        ('cc', ),
136        ('m5header', ),
137        ('swig0', 'swig1', 'swig2', 'swig3', ),
138        )
139
140    def __init__(self):
141        self.block_priority = {}
142        for prio, keys in enumerate(self.block_order):
143            for key in keys:
144                self.block_priority[key] = prio
145
146    def reset(self):
147        # clear all stored headers
148        self.includes = {}
149
150    def dump_blocks(self, block_types):
151        """Merge includes of from several block types into one large
152        block of sorted includes. This is useful when we have multiple
153        include block types (e.g., swig includes) with the same
154        priority."""
155
156        includes = []
157        for block_type in block_types:
158            try:
159                includes += self.includes[block_type]
160            except KeyError:
161                pass
162
163        return sorted(set(includes))
164
165    def dump_includes(self):
166        blocks = []
167        # Create a list of blocks in the prescribed include
168        # order. Each entry in the list is a multi-line string with
169        # multiple includes.
170        for types in self.block_order:
171            block = "\n".join(self.dump_blocks(types))
172            if block:
173                blocks.append(block)
174
175        self.reset()
176        return "\n\n".join(blocks)
177
178    def __call__(self, lines, filename, language):
179        self.reset()
180
181        context = {
182            "filename" : filename,
183            "language" : language,
184            }
185
186        def match_line(line):
187            if not line:
188                return (None, line)
189
190            for include_type, (ldelim, rdelim), matcher in self.includes_re:
191                keyword, include, extra = matcher(context, line)
192                if keyword:
193                    # if we've got a match, clean up the #include line,
194                    # fix up stl headers and store it in the proper category
195                    if include_type == 'c' and language == 'C++':
196                        stl_inc = cpp_c_headers.get(include, None)
197                        if stl_inc:
198                            include = stl_inc
199                            include_type = 'stl'
200
201                    return (include_type,
202                            keyword + ' ' + ldelim + include + rdelim + extra)
203
204            return (None, line)
205
206        processing_includes = False
207        for line in lines:
208            include_type, line = match_line(line)
209            if include_type:
210                try:
211                    self.includes[include_type].append(line)
212                except KeyError:
213                    self.includes[include_type] = [ line ]
214
215                processing_includes = True
216            elif processing_includes and not line.strip():
217                # Skip empty lines while processing includes
218                pass
219            elif processing_includes:
220                # We are now exiting an include block
221                processing_includes = False
222
223                # Output pending includes, a new line between, and the
224                # current l.
225                yield self.dump_includes()
226                yield ''
227                yield line
228            else:
229                # We are not in an include block, so just emit the line
230                yield line
231
232        # We've reached EOF, so dump any pending includes
233        if processing_includes:
234            yield self.dump_includes()
235
236
237
238# default language types to try to apply our sorting rules to
239default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
240
241def options():
242    import optparse
243    options = optparse.OptionParser()
244    add_option = options.add_option
245    add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
246               default=','.join(default_dir_ignore),
247               help="ignore directories")
248    add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
249               default=','.join(default_file_ignore),
250               help="ignore files")
251    add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
252               default=','.join(default_languages),
253               help="languages")
254    add_option('-n', '--dry-run', action='store_true',
255               help="don't overwrite files")
256
257    return options
258
259def parse_args(parser):
260    opts,args = parser.parse_args()
261
262    opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
263    opts.file_ignore = frozenset(opts.file_ignore.split(','))
264    opts.languages = frozenset(opts.languages.split(','))
265
266    return opts,args
267
268if __name__ == '__main__':
269    parser = options()
270    opts, args = parse_args(parser)
271
272    for base in args:
273        for filename,language in find_files(base, languages=opts.languages,
274                file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
275            if opts.dry_run:
276                print "%s: %s" % (filename, language)
277            else:
278                update_file(filename, filename, language, SortIncludes())
279