sort_includes.py revision 11397
13005Sstever@eecs.umich.edu#!/usr/bin/env python
23005Sstever@eecs.umich.edu
33005Sstever@eecs.umich.edu# Copyright (c) 2011 The Hewlett-Packard Development Company
43005Sstever@eecs.umich.edu# All rights reserved.
53005Sstever@eecs.umich.edu#
63005Sstever@eecs.umich.edu# Redistribution and use in source and binary forms, with or without
73005Sstever@eecs.umich.edu# modification, are permitted provided that the following conditions are
83005Sstever@eecs.umich.edu# met: redistributions of source code must retain the above copyright
93005Sstever@eecs.umich.edu# notice, this list of conditions and the following disclaimer;
103005Sstever@eecs.umich.edu# redistributions in binary form must reproduce the above copyright
113005Sstever@eecs.umich.edu# notice, this list of conditions and the following disclaimer in the
123005Sstever@eecs.umich.edu# documentation and/or other materials provided with the distribution;
133005Sstever@eecs.umich.edu# neither the name of the copyright holders nor the names of its
143005Sstever@eecs.umich.edu# contributors may be used to endorse or promote products derived from
153005Sstever@eecs.umich.edu# this software without specific prior written permission.
163005Sstever@eecs.umich.edu#
173005Sstever@eecs.umich.edu# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
183005Sstever@eecs.umich.edu# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
193005Sstever@eecs.umich.edu# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
203005Sstever@eecs.umich.edu# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
213005Sstever@eecs.umich.edu# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
223005Sstever@eecs.umich.edu# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
233005Sstever@eecs.umich.edu# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
243005Sstever@eecs.umich.edu# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
253005Sstever@eecs.umich.edu# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
263005Sstever@eecs.umich.edu# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
273005Sstever@eecs.umich.edu# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
283005Sstever@eecs.umich.edu#
292889SN/A# Authors: Nathan Binkert
302889SN/A
312710SN/Aimport os
322710SN/Aimport re
332934SN/Aimport sys
342934SN/A
352549SN/Afrom file_types import *
362995SN/A
372549SN/Acpp_c_headers = {
383088Sstever@eecs.umich.edu    'assert.h' : 'cassert',
393088Sstever@eecs.umich.edu    'ctype.h'  : 'cctype',
403088Sstever@eecs.umich.edu    'errno.h'  : 'cerrno',
412889SN/A    'float.h'  : 'cfloat',
422710SN/A    'limits.h' : 'climits',
432917SN/A    'locale.h' : 'clocale',
442710SN/A    'math.h'   : 'cmath',
452917SN/A    'setjmp.h' : 'csetjmp',
462948SN/A    'signal.h' : 'csignal',
472995SN/A    'stdarg.h' : 'cstdarg',
482995SN/A    'stddef.h' : 'cstddef',
492995SN/A    'stdio.h'  : 'cstdio',
502995SN/A    'stdlib.h' : 'cstdlib',
512995SN/A    'string.h' : 'cstring',
523143Shsul@eecs.umich.edu    'time.h'   : 'ctime',
533025Ssaidi@eecs.umich.edu    'wchar.h'  : 'cwchar',
543143Shsul@eecs.umich.edu    'wctype.h' : 'cwctype',
553143Shsul@eecs.umich.edu}
563143Shsul@eecs.umich.edu
573143Shsul@eecs.umich.eduinclude_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
583183Shsul@eecs.umich.edudef include_key(line):
593183Shsul@eecs.umich.edu    '''Mark directories with a leading space so directories
602710SN/A    are sorted before files'''
612710SN/A
622710SN/A    match = include_re.match(line)
632710SN/A    assert match, line
642710SN/A    keyword = match.group(2)
652710SN/A    include = match.group(3)
662710SN/A
672934SN/A    # Everything but the file part needs to have a space prepended
682934SN/A    parts = include.split('/')
692934SN/A    if len(parts) == 2 and parts[0] == 'dnet':
702953SN/A        # Don't sort the dnet includes with respect to each other, but
712934SN/A        # make them sorted with respect to non dnet includes.  Python
722934SN/A        # guarantees that sorting is stable, so just clear the
732934SN/A        # basename part of the filename.
742953SN/A        parts[1] = ' '
752934SN/A    parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
762934SN/A    key = '/'.join(parts)
772934SN/A
782953SN/A    return key
792566SN/A
803005Sstever@eecs.umich.edu
813005Sstever@eecs.umich.edudef _include_matcher(keyword="#include", delim="<>"):
823180Sstever@eecs.umich.edu    """Match an include statement and return a (keyword, file, extra)
833180Sstever@eecs.umich.edu    duple, or a touple of None values if there isn't a match."""
842995SN/A
852995SN/A    rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
862995SN/A
872995SN/A    def matcher(context, line):
882995SN/A        m = rex.match(line)
892995SN/A        return m.groups() if m else (None, ) * 3
902995SN/A
912995SN/A    return matcher
922917SN/A
932995SN/Adef _include_matcher_fname(fname, **kwargs):
943005Sstever@eecs.umich.edu    """Match an include of a specific file name. Any keyword arguments
952995SN/A    are forwarded to _include_matcher, which is used to match the
963005Sstever@eecs.umich.edu    actual include line."""
973005Sstever@eecs.umich.edu
983005Sstever@eecs.umich.edu    rex = re.compile(fname)
993005Sstever@eecs.umich.edu    base_matcher = _include_matcher(**kwargs)
1003005Sstever@eecs.umich.edu
1013005Sstever@eecs.umich.edu    def matcher(context, line):
1023050Sstever@eecs.umich.edu        (keyword, fname, extra) = base_matcher(context, line)
1033005Sstever@eecs.umich.edu        if fname and rex.match(fname):
1043005Sstever@eecs.umich.edu            return (keyword, fname, extra)
1053005Sstever@eecs.umich.edu        else:
1063050Sstever@eecs.umich.edu            return (None, ) * 3
1073025Ssaidi@eecs.umich.edu
1083005Sstever@eecs.umich.edu    return matcher
1093005Sstever@eecs.umich.edu
1103005Sstever@eecs.umich.edu
1113005Sstever@eecs.umich.edudef _include_matcher_main():
1123005Sstever@eecs.umich.edu    """Match a C/C++ source file's primary header (i.e., a file with
1133050Sstever@eecs.umich.edu    the same base name, but a header extension)."""
1143005Sstever@eecs.umich.edu
1153005Sstever@eecs.umich.edu    base_matcher = _include_matcher(delim='""')
1163005Sstever@eecs.umich.edu    rex = re.compile(r"^src/(.*)\.([^.]+)$")
1172566SN/A    header_map = {
1182710SN/A        "c" : "h",
1192710SN/A        "cc" : "hh",
1203183Shsul@eecs.umich.edu        "cpp" : "hh",
1213183Shsul@eecs.umich.edu        }
1223183Shsul@eecs.umich.edu    def matcher(context, line):
1233183Shsul@eecs.umich.edu        m = rex.match(context["filename"])
1243183Shsul@eecs.umich.edu        if not m:
1253183Shsul@eecs.umich.edu            return (None, ) * 3
1263183Shsul@eecs.umich.edu        base, ext = m.groups()
1273183Shsul@eecs.umich.edu        (keyword, fname, extra) = base_matcher(context, line)
1283183Shsul@eecs.umich.edu        try:
1293183Shsul@eecs.umich.edu            if fname == "%s.%s" % (base, header_map[ext]):
1303183Shsul@eecs.umich.edu                return (keyword, fname, extra)
1313183Shsul@eecs.umich.edu        except KeyError:
1323183Shsul@eecs.umich.edu            pass
1333183Shsul@eecs.umich.edu
1343183Shsul@eecs.umich.edu        return (None, ) * 3
1353183Shsul@eecs.umich.edu
1363183Shsul@eecs.umich.edu    return matcher
1373183Shsul@eecs.umich.edu
1383183Shsul@eecs.umich.educlass SortIncludes(object):
1393183Shsul@eecs.umich.edu    # different types of includes for different sorting of headers
1403183Shsul@eecs.umich.edu    # <Python.h>         - Python header needs to be first if it exists
1413183Shsul@eecs.umich.edu    # <*.h>              - system headers (directories before files)
1423183Shsul@eecs.umich.edu    # <*>                - STL headers
1433183Shsul@eecs.umich.edu    # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
1443183Shsul@eecs.umich.edu    # "*"                - M5 headers (directories before files)
1452917SN/A    includes_re = (
1463046Sstever@eecs.umich.edu        ('main', '""', _include_matcher_main()),
1472948SN/A        ('python', '<>', _include_matcher_fname("^Python\.h$")),
1482948SN/A        ('c', '<>', _include_matcher_fname("^.*\.h$")),
1492948SN/A        ('stl', '<>', _include_matcher_fname("^\w+$")),
1503046Sstever@eecs.umich.edu        ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
1512917SN/A        ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
1523046Sstever@eecs.umich.edu        ('swig0', '<>', _include_matcher(keyword="%import")),
1533022Shsul@eecs.umich.edu        ('swig1', '<>', _include_matcher(keyword="%include")),
1543046Sstever@eecs.umich.edu        ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
1553022Shsul@eecs.umich.edu        ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
1563022Shsul@eecs.umich.edu        )
1573143Shsul@eecs.umich.edu
1583143Shsul@eecs.umich.edu    block_order = (
1593143Shsul@eecs.umich.edu        ('main', ),
1603143Shsul@eecs.umich.edu        ('python', ),
1613143Shsul@eecs.umich.edu        ('c', ),
1623133Shsul@eecs.umich.edu        ('stl', ),
1633133Shsul@eecs.umich.edu        ('cc', ),
1643133Shsul@eecs.umich.edu        ('m5header', ),
1653133Shsul@eecs.umich.edu        ('swig0', 'swig1', 'swig2', 'swig3', ),
1662710SN/A        )
1672740SN/A
168    def __init__(self):
169        self.block_priority = {}
170        for prio, keys in enumerate(self.block_order):
171            for key in keys:
172                self.block_priority[key] = prio
173
174    def reset(self):
175        # clear all stored headers
176        self.includes = {}
177
178    def dump_blocks(self, block_types):
179        """Merge includes of from several block types into one large
180        block of sorted includes. This is useful when we have multiple
181        include block types (e.g., swig includes) with the same
182        priority."""
183
184        includes = []
185        for block_type in block_types:
186            try:
187                includes += self.includes[block_type]
188            except KeyError:
189                pass
190
191        return sorted(set(includes))
192
193    def dump_includes(self):
194        blocks = []
195        # Create a list of blocks in the prescribed include
196        # order. Each entry in the list is a multi-line string with
197        # multiple includes.
198        for types in self.block_order:
199            block = "\n".join(self.dump_blocks(types))
200            if block:
201                blocks.append(block)
202
203        self.reset()
204        return "\n\n".join(blocks)
205
206    def __call__(self, lines, filename, language):
207        self.reset()
208
209        context = {
210            "filename" : filename,
211            "language" : language,
212            }
213
214        def match_line(line):
215            if not line:
216                return (None, line)
217
218            for include_type, (ldelim, rdelim), matcher in self.includes_re:
219                keyword, include, extra = matcher(context, line)
220                if keyword:
221                    # if we've got a match, clean up the #include line,
222                    # fix up stl headers and store it in the proper category
223                    if include_type == 'c' and language == 'C++':
224                        stl_inc = cpp_c_headers.get(include, None)
225                        if stl_inc:
226                            include = stl_inc
227                            include_type = 'stl'
228
229                    return (include_type,
230                            keyword + ' ' + ldelim + include + rdelim + extra)
231
232            return (None, line)
233
234        processing_includes = False
235        for line in lines:
236            include_type, line = match_line(line)
237            if include_type:
238                try:
239                    self.includes[include_type].append(line)
240                except KeyError:
241                    self.includes[include_type] = [ line ]
242
243                processing_includes = True
244            elif processing_includes and not line.strip():
245                # Skip empty lines while processing includes
246                pass
247            elif processing_includes:
248                # We are now exiting an include block
249                processing_includes = False
250
251                # Output pending includes, a new line between, and the
252                # current l.
253                yield self.dump_includes()
254                yield ''
255                yield line
256            else:
257                # We are not in an include block, so just emit the line
258                yield line
259
260        # We've reached EOF, so dump any pending includes
261        if processing_includes:
262            yield self.dump_includes()
263
264
265
266# default language types to try to apply our sorting rules to
267default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
268
269def options():
270    import optparse
271    options = optparse.OptionParser()
272    add_option = options.add_option
273    add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
274               default=','.join(default_dir_ignore),
275               help="ignore directories")
276    add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
277               default=','.join(default_file_ignore),
278               help="ignore files")
279    add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
280               default=','.join(default_languages),
281               help="languages")
282    add_option('-n', '--dry-run', action='store_true',
283               help="don't overwrite files")
284
285    return options
286
287def parse_args(parser):
288    opts,args = parser.parse_args()
289
290    opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
291    opts.file_ignore = frozenset(opts.file_ignore.split(','))
292    opts.languages = frozenset(opts.languages.split(','))
293
294    return opts,args
295
296if __name__ == '__main__':
297    parser = options()
298    opts, args = parse_args(parser)
299
300    for base in args:
301        for filename,language in find_files(base, languages=opts.languages,
302                file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
303            if opts.dry_run:
304                print "%s: %s" % (filename, language)
305            else:
306                update_file(filename, filename, language, SortIncludes())
307