sort_includes.py revision 12009
1#!/usr/bin/env python2
2#
3# Copyright (c) 2014-2015 ARM Limited
4# All rights reserved
5#
6# The license below extends only to copyright in the software and shall
7# not be construed as granting a license to any other intellectual
8# property including but not limited to intellectual property relating
9# to a hardware implementation of the functionality of the software
10# licensed hereunder.  You may use the software subject to the license
11# terms below provided that you ensure that this notice is replicated
12# unmodified and in its entirety in all distributions of the software,
13# modified or unmodified, in source code or in binary form.
14#
15# Copyright (c) 2011 The Hewlett-Packard Development Company
16# All rights reserved.
17#
18# Redistribution and use in source and binary forms, with or without
19# modification, are permitted provided that the following conditions are
20# met: redistributions of source code must retain the above copyright
21# notice, this list of conditions and the following disclaimer;
22# redistributions in binary form must reproduce the above copyright
23# notice, this list of conditions and the following disclaimer in the
24# documentation and/or other materials provided with the distribution;
25# neither the name of the copyright holders nor the names of its
26# contributors may be used to endorse or promote products derived from
27# this software without specific prior written permission.
28#
29# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40#
41# Authors: Nathan Binkert
42#          Andreas Sandberg
43
44import os
45import re
46import sys
47
48from file_types import *
49
50cpp_c_headers = {
51    'assert.h' : 'cassert',
52    'ctype.h'  : 'cctype',
53    'errno.h'  : 'cerrno',
54    'float.h'  : 'cfloat',
55    'limits.h' : 'climits',
56    'locale.h' : 'clocale',
57    'math.h'   : 'cmath',
58    'setjmp.h' : 'csetjmp',
59    'signal.h' : 'csignal',
60    'stdarg.h' : 'cstdarg',
61    'stddef.h' : 'cstddef',
62    'stdio.h'  : 'cstdio',
63    'stdlib.h' : 'cstdlib',
64    'string.h' : 'cstring',
65    'time.h'   : 'ctime',
66    'wchar.h'  : 'cwchar',
67    'wctype.h' : 'cwctype',
68}
69
70include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
71def include_key(line):
72    '''Mark directories with a leading space so directories
73    are sorted before files'''
74
75    match = include_re.match(line)
76    assert match, line
77    keyword = match.group(2)
78    include = match.group(3)
79
80    # Everything but the file part needs to have a space prepended
81    parts = include.split('/')
82    if len(parts) == 2 and parts[0] == 'dnet':
83        # Don't sort the dnet includes with respect to each other, but
84        # make them sorted with respect to non dnet includes.  Python
85        # guarantees that sorting is stable, so just clear the
86        # basename part of the filename.
87        parts[1] = ' '
88    parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
89    key = '/'.join(parts)
90
91    return key
92
93
94def _include_matcher(keyword="#include", delim="<>"):
95    """Match an include statement and return a (keyword, file, extra)
96    duple, or a touple of None values if there isn't a match."""
97
98    rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
99
100    def matcher(context, line):
101        m = rex.match(line)
102        return m.groups() if m else (None, ) * 3
103
104    return matcher
105
106def _include_matcher_fname(fname, **kwargs):
107    """Match an include of a specific file name. Any keyword arguments
108    are forwarded to _include_matcher, which is used to match the
109    actual include line."""
110
111    rex = re.compile(fname)
112    base_matcher = _include_matcher(**kwargs)
113
114    def matcher(context, line):
115        (keyword, fname, extra) = base_matcher(context, line)
116        if fname and rex.match(fname):
117            return (keyword, fname, extra)
118        else:
119            return (None, ) * 3
120
121    return matcher
122
123
124def _include_matcher_main():
125    """Match a C/C++ source file's primary header (i.e., a file with
126    the same base name, but a header extension)."""
127
128    base_matcher = _include_matcher(delim='""')
129    rex = re.compile(r"^src/(.*)\.([^.]+)$")
130    header_map = {
131        "c" : "h",
132        "cc" : "hh",
133        "cpp" : "hh",
134        }
135    def matcher(context, line):
136        m = rex.match(context["filename"])
137        if not m:
138            return (None, ) * 3
139        base, ext = m.groups()
140        (keyword, fname, extra) = base_matcher(context, line)
141        try:
142            if fname == "%s.%s" % (base, header_map[ext]):
143                return (keyword, fname, extra)
144        except KeyError:
145            pass
146
147        return (None, ) * 3
148
149    return matcher
150
151class SortIncludes(object):
152    # different types of includes for different sorting of headers
153    # <Python.h>         - Python header needs to be first if it exists
154    # <*.h>              - system headers (directories before files)
155    # <*>                - STL headers
156    # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
157    # "*"                - M5 headers (directories before files)
158    includes_re = (
159        ('main', '""', _include_matcher_main()),
160        ('python', '<>', _include_matcher_fname("^Python\.h$")),
161        ('pybind', '""', _include_matcher_fname("^pybind11/.*\.h$",
162                                                delim='""')),
163        ('c', '<>', _include_matcher_fname("^.*\.h$")),
164        ('stl', '<>', _include_matcher_fname("^\w+$")),
165        ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
166        ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
167        ('swig0', '<>', _include_matcher(keyword="%import")),
168        ('swig1', '<>', _include_matcher(keyword="%include")),
169        ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
170        ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
171        )
172
173    block_order = (
174        ('python', ),
175        ('pybind', ),
176        ('main', ),
177        ('c', ),
178        ('stl', ),
179        ('cc', ),
180        ('m5header', ),
181        ('swig0', 'swig1', 'swig2', 'swig3', ),
182        )
183
184    def __init__(self):
185        self.block_priority = {}
186        for prio, keys in enumerate(self.block_order):
187            for key in keys:
188                self.block_priority[key] = prio
189
190    def reset(self):
191        # clear all stored headers
192        self.includes = {}
193
194    def dump_blocks(self, block_types):
195        """Merge includes of from several block types into one large
196        block of sorted includes. This is useful when we have multiple
197        include block types (e.g., swig includes) with the same
198        priority."""
199
200        includes = []
201        for block_type in block_types:
202            try:
203                includes += self.includes[block_type]
204            except KeyError:
205                pass
206
207        return sorted(set(includes))
208
209    def dump_includes(self):
210        includes = []
211        for types in self.block_order:
212            block = self.dump_blocks(types)
213            if includes and block:
214                includes.append("")
215            includes += block
216
217        self.reset()
218        return includes
219
220    def __call__(self, lines, filename, language):
221        self.reset()
222
223        context = {
224            "filename" : filename,
225            "language" : language,
226            }
227
228        def match_line(line):
229            if not line:
230                return (None, line)
231
232            for include_type, (ldelim, rdelim), matcher in self.includes_re:
233                keyword, include, extra = matcher(context, line)
234                if keyword:
235                    # if we've got a match, clean up the #include line,
236                    # fix up stl headers and store it in the proper category
237                    if include_type == 'c' and language == 'C++':
238                        stl_inc = cpp_c_headers.get(include, None)
239                        if stl_inc:
240                            include = stl_inc
241                            include_type = 'stl'
242
243                    return (include_type,
244                            keyword + ' ' + ldelim + include + rdelim + extra)
245
246            return (None, line)
247
248        processing_includes = False
249        for line in lines:
250            include_type, line = match_line(line)
251            if include_type:
252                try:
253                    self.includes[include_type].append(line)
254                except KeyError:
255                    self.includes[include_type] = [ line ]
256
257                processing_includes = True
258            elif processing_includes and not line.strip():
259                # Skip empty lines while processing includes
260                pass
261            elif processing_includes:
262                # We are now exiting an include block
263                processing_includes = False
264
265                # Output pending includes, a new line between, and the
266                # current l.
267                for include in self.dump_includes():
268                    yield include
269                yield ''
270                yield line
271            else:
272                # We are not in an include block, so just emit the line
273                yield line
274
275        # We've reached EOF, so dump any pending includes
276        if processing_includes:
277            for include in self.dump_includes():
278                yield include
279
280# default language types to try to apply our sorting rules to
281default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
282
283def options():
284    import optparse
285    options = optparse.OptionParser()
286    add_option = options.add_option
287    add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
288               default=','.join(default_dir_ignore),
289               help="ignore directories")
290    add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
291               default=','.join(default_file_ignore),
292               help="ignore files")
293    add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
294               default=','.join(default_languages),
295               help="languages")
296    add_option('-n', '--dry-run', action='store_true',
297               help="don't overwrite files")
298
299    return options
300
301def parse_args(parser):
302    opts,args = parser.parse_args()
303
304    opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
305    opts.file_ignore = frozenset(opts.file_ignore.split(','))
306    opts.languages = frozenset(opts.languages.split(','))
307
308    return opts,args
309
310if __name__ == '__main__':
311    parser = options()
312    opts, args = parse_args(parser)
313
314    for base in args:
315        for filename,language in find_files(base, languages=opts.languages,
316                file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
317            if opts.dry_run:
318                print "%s: %s" % (filename, language)
319            else:
320                update_file(filename, filename, language, SortIncludes())
321