sort_includes.py revision 11402
11156SN/A#!/usr/bin/env python
21762SN/A#
31156SN/A# Copyright (c) 2014-2015 ARM Limited
41156SN/A# All rights reserved
51156SN/A#
61156SN/A# The license below extends only to copyright in the software and shall
71156SN/A# not be construed as granting a license to any other intellectual
81156SN/A# property including but not limited to intellectual property relating
91156SN/A# to a hardware implementation of the functionality of the software
101156SN/A# licensed hereunder.  You may use the software subject to the license
111156SN/A# terms below provided that you ensure that this notice is replicated
121156SN/A# unmodified and in its entirety in all distributions of the software,
131156SN/A# modified or unmodified, in source code or in binary form.
141156SN/A#
151156SN/A# Copyright (c) 2011 The Hewlett-Packard Development Company
161156SN/A# All rights reserved.
171156SN/A#
181156SN/A# Redistribution and use in source and binary forms, with or without
191156SN/A# modification, are permitted provided that the following conditions are
201156SN/A# met: redistributions of source code must retain the above copyright
211156SN/A# notice, this list of conditions and the following disclaimer;
221156SN/A# redistributions in binary form must reproduce the above copyright
231156SN/A# notice, this list of conditions and the following disclaimer in the
241156SN/A# documentation and/or other materials provided with the distribution;
251156SN/A# neither the name of the copyright holders nor the names of its
261156SN/A# contributors may be used to endorse or promote products derived from
272665SN/A# this software without specific prior written permission.
282665SN/A#
291156SN/A# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
301156SN/A# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
3111263Sandreas.sandberg@arm.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
3211263Sandreas.sandberg@arm.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
331156SN/A# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
342566SN/A# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
351156SN/A# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
361156SN/A# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
379850SN/A# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
384762SN/A# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3911263Sandreas.sandberg@arm.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
409850SN/A#
418641SN/A# Authors: Nathan Binkert
425882SN/A#          Andreas Sandberg
431156SN/A
446216SN/Aimport os
456658SN/Aimport re
468232SN/Aimport sys
4711263Sandreas.sandberg@arm.com
482566SN/Afrom file_types import *
493348SN/A
501156SN/Acpp_c_headers = {
511157SN/A    'assert.h' : 'cassert',
521156SN/A    'ctype.h'  : 'cctype',
535603SN/A    'errno.h'  : 'cerrno',
541156SN/A    'float.h'  : 'cfloat',
552107SN/A    'limits.h' : 'climits',
561156SN/A    'locale.h' : 'clocale',
571156SN/A    'math.h'   : 'cmath',
581156SN/A    'setjmp.h' : 'csetjmp',
591156SN/A    'signal.h' : 'csignal',
601156SN/A    'stdarg.h' : 'cstdarg',
611156SN/A    'stddef.h' : 'cstddef',
621156SN/A    'stdio.h'  : 'cstdio',
631156SN/A    'stdlib.h' : 'cstdlib',
641156SN/A    'string.h' : 'cstring',
651156SN/A    'time.h'   : 'ctime',
661156SN/A    'wchar.h'  : 'cwchar',
671156SN/A    'wctype.h' : 'cwctype',
681156SN/A}
691156SN/A
701156SN/Ainclude_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
711156SN/Adef include_key(line):
721156SN/A    '''Mark directories with a leading space so directories
731156SN/A    are sorted before files'''
741156SN/A
751156SN/A    match = include_re.match(line)
761156SN/A    assert match, line
771156SN/A    keyword = match.group(2)
781156SN/A    include = match.group(3)
791156SN/A
801156SN/A    # Everything but the file part needs to have a space prepended
811156SN/A    parts = include.split('/')
824981SN/A    if len(parts) == 2 and parts[0] == 'dnet':
839339SN/A        # Don't sort the dnet includes with respect to each other, but
841634SN/A        # make them sorted with respect to non dnet includes.  Python
851634SN/A        # guarantees that sorting is stable, so just clear the
861156SN/A        # basename part of the filename.
871156SN/A        parts[1] = ' '
881156SN/A    parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
894981SN/A    key = '/'.join(parts)
902627SN/A
912282SN/A    return key
922627SN/A
931156SN/A
9412087Sspwilson2@wisc.edudef _include_matcher(keyword="#include", delim="<>"):
9512087Sspwilson2@wisc.edu    """Match an include statement and return a (keyword, file, extra)
9612087Sspwilson2@wisc.edu    duple, or a touple of None values if there isn't a match."""
971156SN/A
981156SN/A    rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
991156SN/A
1004981SN/A    def matcher(context, line):
1011156SN/A        m = rex.match(line)
1021156SN/A        return m.groups() if m else (None, ) * 3
1031156SN/A
1041156SN/A    return matcher
1051156SN/A
1061156SN/Adef _include_matcher_fname(fname, **kwargs):
1071156SN/A    """Match an include of a specific file name. Any keyword arguments
1081156SN/A    are forwarded to _include_matcher, which is used to match the
1091156SN/A    actual include line."""
1101156SN/A
1119339SN/A    rex = re.compile(fname)
1125603SN/A    base_matcher = _include_matcher(**kwargs)
1135603SN/A
1145603SN/A    def matcher(context, line):
1155603SN/A        (keyword, fname, extra) = base_matcher(context, line)
1165603SN/A        if fname and rex.match(fname):
1175603SN/A            return (keyword, fname, extra)
1185603SN/A        else:
1195603SN/A            return (None, ) * 3
1205603SN/A
1215603SN/A    return matcher
1225603SN/A
1235603SN/A
1245603SN/Adef _include_matcher_main():
1255603SN/A    """Match a C/C++ source file's primary header (i.e., a file with
1265603SN/A    the same base name, but a header extension)."""
1275603SN/A
1285603SN/A    base_matcher = _include_matcher(delim='""')
1295603SN/A    rex = re.compile(r"^src/(.*)\.([^.]+)$")
1305603SN/A    header_map = {
1315603SN/A        "c" : "h",
1325603SN/A        "cc" : "hh",
1335603SN/A        "cpp" : "hh",
1345603SN/A        }
1355603SN/A    def matcher(context, line):
1365603SN/A        m = rex.match(context["filename"])
1375603SN/A        if not m:
1385603SN/A            return (None, ) * 3
1395603SN/A        base, ext = m.groups()
1409339SN/A        (keyword, fname, extra) = base_matcher(context, line)
1419339SN/A        try:
1425603SN/A            if fname == "%s.%s" % (base, header_map[ext]):
1431156SN/A                return (keyword, fname, extra)
1441156SN/A        except KeyError:
14513784Sgabeblack@google.com            pass
14613784Sgabeblack@google.com
1474981SN/A        return (None, ) * 3
14813784Sgabeblack@google.com
14913784Sgabeblack@google.com    return matcher
15013784Sgabeblack@google.com
1514981SN/Aclass SortIncludes(object):
1524981SN/A    # different types of includes for different sorting of headers
1534981SN/A    # <Python.h>         - Python header needs to be first if it exists
1541939SN/A    # <*.h>              - system headers (directories before files)
15511005SN/A    # <*>                - STL headers
1562008SN/A    # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
1572008SN/A    # "*"                - M5 headers (directories before files)
1582282SN/A    includes_re = (
1592282SN/A        ('main', '""', _include_matcher_main()),
1602282SN/A        ('python', '<>', _include_matcher_fname("^Python\.h$")),
1612008SN/A        ('c', '<>', _include_matcher_fname("^.*\.h$")),
1622008SN/A        ('stl', '<>', _include_matcher_fname("^\w+$")),
1635603SN/A        ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
1645603SN/A        ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
1655603SN/A        ('swig0', '<>', _include_matcher(keyword="%import")),
1665603SN/A        ('swig1', '<>', _include_matcher(keyword="%include")),
1672008SN/A        ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
16811005SN/A        ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
1692008SN/A        )
1702008SN/A
1712008SN/A    block_order = (
1722008SN/A        ('main', ),
1732008SN/A        ('python', ),
1742008SN/A        ('c', ),
1752008SN/A        ('stl', ),
1762008SN/A        ('cc', ),
1772282SN/A        ('m5header', ),
1782282SN/A        ('swig0', 'swig1', 'swig2', 'swig3', ),
1795603SN/A        )
1802282SN/A
1812008SN/A    def __init__(self):
1822008SN/A        self.block_priority = {}
1832008SN/A        for prio, keys in enumerate(self.block_order):
1842008SN/A            for key in keys:
1852008SN/A                self.block_priority[key] = prio
1862008SN/A
1872008SN/A    def reset(self):
1882008SN/A        # clear all stored headers
1895603SN/A        self.includes = {}
1902008SN/A
1912008SN/A    def dump_blocks(self, block_types):
1922008SN/A        """Merge includes of from several block types into one large
1935603SN/A        block of sorted includes. This is useful when we have multiple
1945603SN/A        include block types (e.g., swig includes) with the same
1955603SN/A        priority."""
1965603SN/A
1975603SN/A        includes = []
1985603SN/A        for block_type in block_types:
1995603SN/A            try:
2005603SN/A                includes += self.includes[block_type]
2015603SN/A            except KeyError:
2025603SN/A                pass
2035603SN/A
2045603SN/A        return sorted(set(includes))
2055603SN/A
2062008SN/A    def dump_includes(self):
2072008SN/A        includes = []
2082008SN/A        for types in self.block_order:
20911005SN/A            block = self.dump_blocks(types)
2101997SN/A            if includes and block:
2112008SN/A                includes.append("")
2121997SN/A            includes += block
2131997SN/A
2141156SN/A        self.reset()
2151939SN/A        return includes
2161156SN/A
2172566SN/A    def __call__(self, lines, filename, language):
2183349SN/A        self.reset()
2191156SN/A
2201817SN/A        context = {
2212641SN/A            "filename" : filename,
2221156SN/A            "language" : language,
22311005SN/A            }
2242641SN/A
2252008SN/A        def match_line(line):
2262008SN/A            if not line:
2271997SN/A                return (None, line)
2282008SN/A
2292617SN/A            for include_type, (ldelim, rdelim), matcher in self.includes_re:
2302641SN/A                keyword, include, extra = matcher(context, line)
2311156SN/A                if keyword:
2322008SN/A                    # if we've got a match, clean up the #include line,
2331939SN/A                    # fix up stl headers and store it in the proper category
2342279SN/A                    if include_type == 'c' and language == 'C++':
2352617SN/A                        stl_inc = cpp_c_headers.get(include, None)
2362641SN/A                        if stl_inc:
2371939SN/A                            include = stl_inc
2382279SN/A                            include_type = 'stl'
2392617SN/A
2402641SN/A                    return (include_type,
2411156SN/A                            keyword + ' ' + ldelim + include + rdelim + extra)
2422008SN/A
2431156SN/A            return (None, line)
2448641SN/A
2452641SN/A        processing_includes = False
2462566SN/A        for line in lines:
24713342Sgabeblack@google.com            include_type, line = match_line(line)
2481939SN/A            if include_type:
2491156SN/A                try:
2501156SN/A                    self.includes[include_type].append(line)
2512641SN/A                except KeyError:
2522566SN/A                    self.includes[include_type] = [ line ]
25313342Sgabeblack@google.com
2541939SN/A                processing_includes = True
2551939SN/A            elif processing_includes and not line.strip():
2561939SN/A                # Skip empty lines while processing includes
2571998SN/A                pass
2582617SN/A            elif processing_includes:
2592641SN/A                # We are now exiting an include block
2601939SN/A                processing_includes = False
2611939SN/A
2621939SN/A                # Output pending includes, a new line between, and the
2632008SN/A                # current l.
2641939SN/A                for include in self.dump_includes():
2651156SN/A                    yield include
2662566SN/A                yield ''
2671156SN/A                yield line
2681156SN/A            else:
2691939SN/A                # We are not in an include block, so just emit the line
2701939SN/A                yield line
2712566SN/A
2722566SN/A        # We've reached EOF, so dump any pending includes
27311005SN/A        if processing_includes:
2741939SN/A            for include in self.dump_includes():
2751939SN/A                yield include
2761939SN/A
2771939SN/A# default language types to try to apply our sorting rules to
2781939SN/Adefault_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
2791939SN/A
2801998SN/Adef options():
2811939SN/A    import optparse
2821998SN/A    options = optparse.OptionParser()
2831998SN/A    add_option = options.add_option
2841939SN/A    add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
2852008SN/A               default=','.join(default_dir_ignore),
2861939SN/A               help="ignore directories")
2871939SN/A    add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
2881939SN/A               default=','.join(default_file_ignore),
2891939SN/A               help="ignore files")
2901939SN/A    add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
2911939SN/A               default=','.join(default_languages),
2921939SN/A               help="languages")
2931998SN/A    add_option('-n', '--dry-run', action='store_true',
2941998SN/A               help="don't overwrite files")
2951939SN/A
2962090SN/A    return options
2971939SN/A
2982566SN/Adef parse_args(parser):
2991939SN/A    opts,args = parser.parse_args()
3001939SN/A
3011939SN/A    opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
3022566SN/A    opts.file_ignore = frozenset(opts.file_ignore.split(','))
3033349SN/A    opts.languages = frozenset(opts.languages.split(','))
3041156SN/A
3051817SN/A    return opts,args
3062641SN/A
3071939SN/Aif __name__ == '__main__':
30811005SN/A    parser = options()
3092641SN/A    opts, args = parse_args(parser)
3102008SN/A
3112008SN/A    for base in args:
3121997SN/A        for filename,language in find_files(base, languages=opts.languages,
3132008SN/A                file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
3142566SN/A            if opts.dry_run:
3152641SN/A                print "%s: %s" % (filename, language)
3161156SN/A            else:
3172008SN/A                update_file(filename, filename, language, SortIncludes())
3181939SN/A