sort_includes.py revision 11398
18759Sgblack@eecs.umich.edu#!/usr/bin/env python
28759Sgblack@eecs.umich.edu#
38759Sgblack@eecs.umich.edu# Copyright (c) 2014-2015 ARM Limited
48759Sgblack@eecs.umich.edu# All rights reserved
58759Sgblack@eecs.umich.edu#
68759Sgblack@eecs.umich.edu# The license below extends only to copyright in the software and shall
78759Sgblack@eecs.umich.edu# not be construed as granting a license to any other intellectual
88759Sgblack@eecs.umich.edu# property including but not limited to intellectual property relating
98759Sgblack@eecs.umich.edu# to a hardware implementation of the functionality of the software
108759Sgblack@eecs.umich.edu# licensed hereunder.  You may use the software subject to the license
118759Sgblack@eecs.umich.edu# terms below provided that you ensure that this notice is replicated
128759Sgblack@eecs.umich.edu# unmodified and in its entirety in all distributions of the software,
138759Sgblack@eecs.umich.edu# modified or unmodified, in source code or in binary form.
148759Sgblack@eecs.umich.edu#
158759Sgblack@eecs.umich.edu# Copyright (c) 2011 The Hewlett-Packard Development Company
168759Sgblack@eecs.umich.edu# All rights reserved.
178759Sgblack@eecs.umich.edu#
188759Sgblack@eecs.umich.edu# Redistribution and use in source and binary forms, with or without
198759Sgblack@eecs.umich.edu# modification, are permitted provided that the following conditions are
208759Sgblack@eecs.umich.edu# met: redistributions of source code must retain the above copyright
218759Sgblack@eecs.umich.edu# notice, this list of conditions and the following disclaimer;
228759Sgblack@eecs.umich.edu# redistributions in binary form must reproduce the above copyright
238759Sgblack@eecs.umich.edu# notice, this list of conditions and the following disclaimer in the
248759Sgblack@eecs.umich.edu# documentation and/or other materials provided with the distribution;
258759Sgblack@eecs.umich.edu# neither the name of the copyright holders nor the names of its
268759Sgblack@eecs.umich.edu# contributors may be used to endorse or promote products derived from
278759Sgblack@eecs.umich.edu# this software without specific prior written permission.
288759Sgblack@eecs.umich.edu#
298759Sgblack@eecs.umich.edu# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
308759Sgblack@eecs.umich.edu# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
318759Sgblack@eecs.umich.edu# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
328759Sgblack@eecs.umich.edu# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
338759Sgblack@eecs.umich.edu# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
348759Sgblack@eecs.umich.edu# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
358759Sgblack@eecs.umich.edu# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
368759Sgblack@eecs.umich.edu# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
378759Sgblack@eecs.umich.edu# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
388759Sgblack@eecs.umich.edu# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
398759Sgblack@eecs.umich.edu# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
408759Sgblack@eecs.umich.edu#
418759Sgblack@eecs.umich.edu# Authors: Nathan Binkert
428759Sgblack@eecs.umich.edu#          Andreas Sandberg
438759Sgblack@eecs.umich.edu
448759Sgblack@eecs.umich.eduimport os
458759Sgblack@eecs.umich.eduimport re
468759Sgblack@eecs.umich.eduimport sys
478759Sgblack@eecs.umich.edu
488759Sgblack@eecs.umich.edufrom file_types import *
498759Sgblack@eecs.umich.edu
50cpp_c_headers = {
51    'assert.h' : 'cassert',
52    'ctype.h'  : 'cctype',
53    'errno.h'  : 'cerrno',
54    'float.h'  : 'cfloat',
55    'limits.h' : 'climits',
56    'locale.h' : 'clocale',
57    'math.h'   : 'cmath',
58    'setjmp.h' : 'csetjmp',
59    'signal.h' : 'csignal',
60    'stdarg.h' : 'cstdarg',
61    'stddef.h' : 'cstddef',
62    'stdio.h'  : 'cstdio',
63    'stdlib.h' : 'cstdlib',
64    'string.h' : 'cstring',
65    'time.h'   : 'ctime',
66    'wchar.h'  : 'cwchar',
67    'wctype.h' : 'cwctype',
68}
69
70include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
71def include_key(line):
72    '''Mark directories with a leading space so directories
73    are sorted before files'''
74
75    match = include_re.match(line)
76    assert match, line
77    keyword = match.group(2)
78    include = match.group(3)
79
80    # Everything but the file part needs to have a space prepended
81    parts = include.split('/')
82    if len(parts) == 2 and parts[0] == 'dnet':
83        # Don't sort the dnet includes with respect to each other, but
84        # make them sorted with respect to non dnet includes.  Python
85        # guarantees that sorting is stable, so just clear the
86        # basename part of the filename.
87        parts[1] = ' '
88    parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
89    key = '/'.join(parts)
90
91    return key
92
93
94def _include_matcher(keyword="#include", delim="<>"):
95    """Match an include statement and return a (keyword, file, extra)
96    duple, or a touple of None values if there isn't a match."""
97
98    rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
99
100    def matcher(context, line):
101        m = rex.match(line)
102        return m.groups() if m else (None, ) * 3
103
104    return matcher
105
106def _include_matcher_fname(fname, **kwargs):
107    """Match an include of a specific file name. Any keyword arguments
108    are forwarded to _include_matcher, which is used to match the
109    actual include line."""
110
111    rex = re.compile(fname)
112    base_matcher = _include_matcher(**kwargs)
113
114    def matcher(context, line):
115        (keyword, fname, extra) = base_matcher(context, line)
116        if fname and rex.match(fname):
117            return (keyword, fname, extra)
118        else:
119            return (None, ) * 3
120
121    return matcher
122
123
124def _include_matcher_main():
125    """Match a C/C++ source file's primary header (i.e., a file with
126    the same base name, but a header extension)."""
127
128    base_matcher = _include_matcher(delim='""')
129    rex = re.compile(r"^src/(.*)\.([^.]+)$")
130    header_map = {
131        "c" : "h",
132        "cc" : "hh",
133        "cpp" : "hh",
134        }
135    def matcher(context, line):
136        m = rex.match(context["filename"])
137        if not m:
138            return (None, ) * 3
139        base, ext = m.groups()
140        (keyword, fname, extra) = base_matcher(context, line)
141        try:
142            if fname == "%s.%s" % (base, header_map[ext]):
143                return (keyword, fname, extra)
144        except KeyError:
145            pass
146
147        return (None, ) * 3
148
149    return matcher
150
151class SortIncludes(object):
152    # different types of includes for different sorting of headers
153    # <Python.h>         - Python header needs to be first if it exists
154    # <*.h>              - system headers (directories before files)
155    # <*>                - STL headers
156    # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
157    # "*"                - M5 headers (directories before files)
158    includes_re = (
159        ('main', '""', _include_matcher_main()),
160        ('python', '<>', _include_matcher_fname("^Python\.h$")),
161        ('c', '<>', _include_matcher_fname("^.*\.h$")),
162        ('stl', '<>', _include_matcher_fname("^\w+$")),
163        ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
164        ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
165        ('swig0', '<>', _include_matcher(keyword="%import")),
166        ('swig1', '<>', _include_matcher(keyword="%include")),
167        ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
168        ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
169        )
170
171    block_order = (
172        ('main', ),
173        ('python', ),
174        ('c', ),
175        ('stl', ),
176        ('cc', ),
177        ('m5header', ),
178        ('swig0', 'swig1', 'swig2', 'swig3', ),
179        )
180
181    def __init__(self):
182        self.block_priority = {}
183        for prio, keys in enumerate(self.block_order):
184            for key in keys:
185                self.block_priority[key] = prio
186
187    def reset(self):
188        # clear all stored headers
189        self.includes = {}
190
191    def dump_blocks(self, block_types):
192        """Merge includes of from several block types into one large
193        block of sorted includes. This is useful when we have multiple
194        include block types (e.g., swig includes) with the same
195        priority."""
196
197        includes = []
198        for block_type in block_types:
199            try:
200                includes += self.includes[block_type]
201            except KeyError:
202                pass
203
204        return sorted(set(includes))
205
206    def dump_includes(self):
207        blocks = []
208        # Create a list of blocks in the prescribed include
209        # order. Each entry in the list is a multi-line string with
210        # multiple includes.
211        for types in self.block_order:
212            block = "\n".join(self.dump_blocks(types))
213            if block:
214                blocks.append(block)
215
216        self.reset()
217        return "\n\n".join(blocks)
218
219    def __call__(self, lines, filename, language):
220        self.reset()
221
222        context = {
223            "filename" : filename,
224            "language" : language,
225            }
226
227        def match_line(line):
228            if not line:
229                return (None, line)
230
231            for include_type, (ldelim, rdelim), matcher in self.includes_re:
232                keyword, include, extra = matcher(context, line)
233                if keyword:
234                    # if we've got a match, clean up the #include line,
235                    # fix up stl headers and store it in the proper category
236                    if include_type == 'c' and language == 'C++':
237                        stl_inc = cpp_c_headers.get(include, None)
238                        if stl_inc:
239                            include = stl_inc
240                            include_type = 'stl'
241
242                    return (include_type,
243                            keyword + ' ' + ldelim + include + rdelim + extra)
244
245            return (None, line)
246
247        processing_includes = False
248        for line in lines:
249            include_type, line = match_line(line)
250            if include_type:
251                try:
252                    self.includes[include_type].append(line)
253                except KeyError:
254                    self.includes[include_type] = [ line ]
255
256                processing_includes = True
257            elif processing_includes and not line.strip():
258                # Skip empty lines while processing includes
259                pass
260            elif processing_includes:
261                # We are now exiting an include block
262                processing_includes = False
263
264                # Output pending includes, a new line between, and the
265                # current l.
266                yield self.dump_includes()
267                yield ''
268                yield line
269            else:
270                # We are not in an include block, so just emit the line
271                yield line
272
273        # We've reached EOF, so dump any pending includes
274        if processing_includes:
275            yield self.dump_includes()
276
277
278
279# default language types to try to apply our sorting rules to
280default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
281
282def options():
283    import optparse
284    options = optparse.OptionParser()
285    add_option = options.add_option
286    add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
287               default=','.join(default_dir_ignore),
288               help="ignore directories")
289    add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
290               default=','.join(default_file_ignore),
291               help="ignore files")
292    add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
293               default=','.join(default_languages),
294               help="languages")
295    add_option('-n', '--dry-run', action='store_true',
296               help="don't overwrite files")
297
298    return options
299
300def parse_args(parser):
301    opts,args = parser.parse_args()
302
303    opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
304    opts.file_ignore = frozenset(opts.file_ignore.split(','))
305    opts.languages = frozenset(opts.languages.split(','))
306
307    return opts,args
308
309if __name__ == '__main__':
310    parser = options()
311    opts, args = parse_args(parser)
312
313    for base in args:
314        for filename,language in find_files(base, languages=opts.languages,
315                file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
316            if opts.dry_run:
317                print "%s: %s" % (filename, language)
318            else:
319                update_file(filename, filename, language, SortIncludes())
320