sort_includes.py revision 11410:e51095583654
1#!/usr/bin/env python
2#
3# Copyright (c) 2014-2015 ARM Limited
4# All rights reserved
5#
6# The license below extends only to copyright in the software and shall
7# not be construed as granting a license to any other intellectual
8# property including but not limited to intellectual property relating
9# to a hardware implementation of the functionality of the software
10# licensed hereunder.  You may use the software subject to the license
11# terms below provided that you ensure that this notice is replicated
12# unmodified and in its entirety in all distributions of the software,
13# modified or unmodified, in source code or in binary form.
14#
15# Copyright (c) 2011 The Hewlett-Packard Development Company
16# All rights reserved.
17#
18# Redistribution and use in source and binary forms, with or without
19# modification, are permitted provided that the following conditions are
20# met: redistributions of source code must retain the above copyright
21# notice, this list of conditions and the following disclaimer;
22# redistributions in binary form must reproduce the above copyright
23# notice, this list of conditions and the following disclaimer in the
24# documentation and/or other materials provided with the distribution;
25# neither the name of the copyright holders nor the names of its
26# contributors may be used to endorse or promote products derived from
27# this software without specific prior written permission.
28#
29# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40#
41# Authors: Nathan Binkert
42#          Andreas Sandberg
43
44import os
45import re
46import sys
47
48from file_types import *
49
50cpp_c_headers = {
51    'assert.h' : 'cassert',
52    'ctype.h'  : 'cctype',
53    'errno.h'  : 'cerrno',
54    'float.h'  : 'cfloat',
55    'limits.h' : 'climits',
56    'locale.h' : 'clocale',
57    'math.h'   : 'cmath',
58    'setjmp.h' : 'csetjmp',
59    'signal.h' : 'csignal',
60    'stdarg.h' : 'cstdarg',
61    'stddef.h' : 'cstddef',
62    'stdio.h'  : 'cstdio',
63    'stdlib.h' : 'cstdlib',
64    'string.h' : 'cstring',
65    'time.h'   : 'ctime',
66    'wchar.h'  : 'cwchar',
67    'wctype.h' : 'cwctype',
68}
69
70include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
71def include_key(line):
72    '''Mark directories with a leading space so directories
73    are sorted before files'''
74
75    match = include_re.match(line)
76    assert match, line
77    keyword = match.group(2)
78    include = match.group(3)
79
80    # Everything but the file part needs to have a space prepended
81    parts = include.split('/')
82    if len(parts) == 2 and parts[0] == 'dnet':
83        # Don't sort the dnet includes with respect to each other, but
84        # make them sorted with respect to non dnet includes.  Python
85        # guarantees that sorting is stable, so just clear the
86        # basename part of the filename.
87        parts[1] = ' '
88    parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
89    key = '/'.join(parts)
90
91    return key
92
93
94def _include_matcher(keyword="#include", delim="<>"):
95    """Match an include statement and return a (keyword, file, extra)
96    duple, or a touple of None values if there isn't a match."""
97
98    rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
99
100    def matcher(context, line):
101        m = rex.match(line)
102        return m.groups() if m else (None, ) * 3
103
104    return matcher
105
106def _include_matcher_fname(fname, **kwargs):
107    """Match an include of a specific file name. Any keyword arguments
108    are forwarded to _include_matcher, which is used to match the
109    actual include line."""
110
111    rex = re.compile(fname)
112    base_matcher = _include_matcher(**kwargs)
113
114    def matcher(context, line):
115        (keyword, fname, extra) = base_matcher(context, line)
116        if fname and rex.match(fname):
117            return (keyword, fname, extra)
118        else:
119            return (None, ) * 3
120
121    return matcher
122
123
124def _include_matcher_main():
125    """Match a C/C++ source file's primary header (i.e., a file with
126    the same base name, but a header extension)."""
127
128    base_matcher = _include_matcher(delim='""')
129    rex = re.compile(r"^src/(.*)\.([^.]+)$")
130    header_map = {
131        "c" : "h",
132        "cc" : "hh",
133        "cpp" : "hh",
134        }
135    def matcher(context, line):
136        m = rex.match(context["filename"])
137        if not m:
138            return (None, ) * 3
139        base, ext = m.groups()
140        (keyword, fname, extra) = base_matcher(context, line)
141        try:
142            if fname == "%s.%s" % (base, header_map[ext]):
143                return (keyword, fname, extra)
144        except KeyError:
145            pass
146
147        return (None, ) * 3
148
149    return matcher
150
151class SortIncludes(object):
152    # different types of includes for different sorting of headers
153    # <Python.h>         - Python header needs to be first if it exists
154    # <*.h>              - system headers (directories before files)
155    # <*>                - STL headers
156    # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
157    # "*"                - M5 headers (directories before files)
158    includes_re = (
159        ('main', '""', _include_matcher_main()),
160        ('python', '<>', _include_matcher_fname("^Python\.h$")),
161        ('c', '<>', _include_matcher_fname("^.*\.h$")),
162        ('stl', '<>', _include_matcher_fname("^\w+$")),
163        ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
164        ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
165        ('swig0', '<>', _include_matcher(keyword="%import")),
166        ('swig1', '<>', _include_matcher(keyword="%include")),
167        ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
168        ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
169        )
170
171    block_order = (
172        ('main', ),
173        ('python', ),
174        ('c', ),
175        ('stl', ),
176        ('cc', ),
177        ('m5header', ),
178        ('swig0', 'swig1', 'swig2', 'swig3', ),
179        )
180
181    def __init__(self):
182        self.block_priority = {}
183        for prio, keys in enumerate(self.block_order):
184            for key in keys:
185                self.block_priority[key] = prio
186
187    def reset(self):
188        # clear all stored headers
189        self.includes = {}
190
191    def dump_blocks(self, block_types):
192        """Merge includes of from several block types into one large
193        block of sorted includes. This is useful when we have multiple
194        include block types (e.g., swig includes) with the same
195        priority."""
196
197        includes = []
198        for block_type in block_types:
199            try:
200                includes += self.includes[block_type]
201            except KeyError:
202                pass
203
204        return sorted(set(includes))
205
206    def dump_includes(self):
207        includes = []
208        for types in self.block_order:
209            block = self.dump_blocks(types)
210            if includes and block:
211                includes.append("")
212            includes += block
213
214        self.reset()
215        return includes
216
217    def __call__(self, lines, filename, language):
218        self.reset()
219
220        context = {
221            "filename" : filename,
222            "language" : language,
223            }
224
225        def match_line(line):
226            if not line:
227                return (None, line)
228
229            for include_type, (ldelim, rdelim), matcher in self.includes_re:
230                keyword, include, extra = matcher(context, line)
231                if keyword:
232                    # if we've got a match, clean up the #include line,
233                    # fix up stl headers and store it in the proper category
234                    if include_type == 'c' and language == 'C++':
235                        stl_inc = cpp_c_headers.get(include, None)
236                        if stl_inc:
237                            include = stl_inc
238                            include_type = 'stl'
239
240                    return (include_type,
241                            keyword + ' ' + ldelim + include + rdelim + extra)
242
243            return (None, line)
244
245        processing_includes = False
246        for line in lines:
247            include_type, line = match_line(line)
248            if include_type:
249                try:
250                    self.includes[include_type].append(line)
251                except KeyError:
252                    self.includes[include_type] = [ line ]
253
254                processing_includes = True
255            elif processing_includes and not line.strip():
256                # Skip empty lines while processing includes
257                pass
258            elif processing_includes:
259                # We are now exiting an include block
260                processing_includes = False
261
262                # Output pending includes, a new line between, and the
263                # current l.
264                for include in self.dump_includes():
265                    yield include
266                yield ''
267                yield line
268            else:
269                # We are not in an include block, so just emit the line
270                yield line
271
272        # We've reached EOF, so dump any pending includes
273        if processing_includes:
274            for include in self.dump_includes():
275                yield include
276
277# default language types to try to apply our sorting rules to
278default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
279
280def options():
281    import optparse
282    options = optparse.OptionParser()
283    add_option = options.add_option
284    add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
285               default=','.join(default_dir_ignore),
286               help="ignore directories")
287    add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
288               default=','.join(default_file_ignore),
289               help="ignore files")
290    add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
291               default=','.join(default_languages),
292               help="languages")
293    add_option('-n', '--dry-run', action='store_true',
294               help="don't overwrite files")
295
296    return options
297
298def parse_args(parser):
299    opts,args = parser.parse_args()
300
301    opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
302    opts.file_ignore = frozenset(opts.file_ignore.split(','))
303    opts.languages = frozenset(opts.languages.split(','))
304
305    return opts,args
306
307if __name__ == '__main__':
308    parser = options()
309    opts, args = parse_args(parser)
310
311    for base in args:
312        for filename,language in find_files(base, languages=opts.languages,
313                file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
314            if opts.dry_run:
315                print "%s: %s" % (filename, language)
316            else:
317                update_file(filename, filename, language, SortIncludes())
318