util/style/sort_includes.py

1156SN/A#!/usr/bin/env python
1762SN/A#
1156SN/A# Copyright (c) 2014-2015 ARM Limited
1156SN/A# All rights reserved
1156SN/A#
1156SN/A# The license below extends only to copyright in the software and shall
1156SN/A# not be construed as granting a license to any other intellectual
1156SN/A# property including but not limited to intellectual property relating
1156SN/A# to a hardware implementation of the functionality of the software
1156SN/A# licensed hereunder.  You may use the software subject to the license
1156SN/A# terms below provided that you ensure that this notice is replicated
1156SN/A# unmodified and in its entirety in all distributions of the software,
1156SN/A# modified or unmodified, in source code or in binary form.
1156SN/A#
1156SN/A# Copyright (c) 2011 The Hewlett-Packard Development Company
1156SN/A# All rights reserved.
1156SN/A#
1156SN/A# Redistribution and use in source and binary forms, with or without
1156SN/A# modification, are permitted provided that the following conditions are
1156SN/A# met: redistributions of source code must retain the above copyright
1156SN/A# notice, this list of conditions and the following disclaimer;
1156SN/A# redistributions in binary form must reproduce the above copyright
1156SN/A# notice, this list of conditions and the following disclaimer in the
1156SN/A# documentation and/or other materials provided with the distribution;
1156SN/A# neither the name of the copyright holders nor the names of its
1156SN/A# contributors may be used to endorse or promote products derived from
2665SN/A# this software without specific prior written permission.
2665SN/A#
1156SN/A# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1156SN/A# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
11263Sandreas.sandberg@arm.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
11263Sandreas.sandberg@arm.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1156SN/A# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2566SN/A# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1156SN/A# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1156SN/A# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
9850SN/A# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
4762SN/A# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
11263Sandreas.sandberg@arm.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
9850SN/A#
8641SN/A# Authors: Nathan Binkert
5882SN/A#          Andreas Sandberg
1156SN/A
6216SN/Aimport os
6658SN/Aimport re
8232SN/Aimport sys
11263Sandreas.sandberg@arm.com
2566SN/Afrom file_types import *
3348SN/A
1156SN/Acpp_c_headers = {
1157SN/A    'assert.h' : 'cassert',
1156SN/A    'ctype.h'  : 'cctype',
5603SN/A    'errno.h'  : 'cerrno',
1156SN/A    'float.h'  : 'cfloat',
2107SN/A    'limits.h' : 'climits',
1156SN/A    'locale.h' : 'clocale',
1156SN/A    'math.h'   : 'cmath',
1156SN/A    'setjmp.h' : 'csetjmp',
1156SN/A    'signal.h' : 'csignal',
1156SN/A    'stdarg.h' : 'cstdarg',
1156SN/A    'stddef.h' : 'cstddef',
1156SN/A    'stdio.h'  : 'cstdio',
1156SN/A    'stdlib.h' : 'cstdlib',
1156SN/A    'string.h' : 'cstring',
1156SN/A    'time.h'   : 'ctime',
1156SN/A    'wchar.h'  : 'cwchar',
1156SN/A    'wctype.h' : 'cwctype',
1156SN/A}
1156SN/A
1156SN/Ainclude_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
1156SN/Adef include_key(line):
1156SN/A    '''Mark directories with a leading space so directories
1156SN/A    are sorted before files'''
1156SN/A
1156SN/A    match = include_re.match(line)
1156SN/A    assert match, line
1156SN/A    keyword = match.group(2)
1156SN/A    include = match.group(3)
1156SN/A
1156SN/A    # Everything but the file part needs to have a space prepended
1156SN/A    parts = include.split('/')
4981SN/A    if len(parts) == 2 and parts[0] == 'dnet':
9339SN/A        # Don't sort the dnet includes with respect to each other, but
1634SN/A        # make them sorted with respect to non dnet includes.  Python
1634SN/A        # guarantees that sorting is stable, so just clear the
1156SN/A        # basename part of the filename.
1156SN/A        parts[1] = ' '
1156SN/A    parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
4981SN/A    key = '/'.join(parts)
2627SN/A
2282SN/A    return key
2627SN/A
1156SN/A
12087Sspwilson2@wisc.edudef _include_matcher(keyword="#include", delim="<>"):
12087Sspwilson2@wisc.edu    """Match an include statement and return a (keyword, file, extra)
12087Sspwilson2@wisc.edu    duple, or a touple of None values if there isn't a match."""
1156SN/A
1156SN/A    rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
1156SN/A
4981SN/A    def matcher(context, line):
1156SN/A        m = rex.match(line)
1156SN/A        return m.groups() if m else (None, ) * 3
1156SN/A
1156SN/A    return matcher
1156SN/A
1156SN/Adef _include_matcher_fname(fname, **kwargs):
1156SN/A    """Match an include of a specific file name. Any keyword arguments
1156SN/A    are forwarded to _include_matcher, which is used to match the
1156SN/A    actual include line."""
1156SN/A
9339SN/A    rex = re.compile(fname)
5603SN/A    base_matcher = _include_matcher(**kwargs)
5603SN/A
5603SN/A    def matcher(context, line):
5603SN/A        (keyword, fname, extra) = base_matcher(context, line)
5603SN/A        if fname and rex.match(fname):
5603SN/A            return (keyword, fname, extra)
5603SN/A        else:
5603SN/A            return (None, ) * 3
5603SN/A
5603SN/A    return matcher
5603SN/A
5603SN/A
5603SN/Adef _include_matcher_main():
5603SN/A    """Match a C/C++ source file's primary header (i.e., a file with
5603SN/A    the same base name, but a header extension)."""
5603SN/A
5603SN/A    base_matcher = _include_matcher(delim='""')
5603SN/A    rex = re.compile(r"^src/(.*)\.([^.]+)$")
5603SN/A    header_map = {
5603SN/A        "c" : "h",
5603SN/A        "cc" : "hh",
5603SN/A        "cpp" : "hh",
5603SN/A        }
5603SN/A    def matcher(context, line):
5603SN/A        m = rex.match(context["filename"])
5603SN/A        if not m:
5603SN/A            return (None, ) * 3
5603SN/A        base, ext = m.groups()
9339SN/A        (keyword, fname, extra) = base_matcher(context, line)
9339SN/A        try:
5603SN/A            if fname == "%s.%s" % (base, header_map[ext]):
1156SN/A                return (keyword, fname, extra)
1156SN/A        except KeyError:
13784Sgabeblack@google.com            pass
13784Sgabeblack@google.com
4981SN/A        return (None, ) * 3
13784Sgabeblack@google.com
13784Sgabeblack@google.com    return matcher
13784Sgabeblack@google.com
4981SN/Aclass SortIncludes(object):
4981SN/A    # different types of includes for different sorting of headers
4981SN/A    # <Python.h>         - Python header needs to be first if it exists
1939SN/A    # <*.h>              - system headers (directories before files)
11005SN/A    # <*>                - STL headers
2008SN/A    # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
2008SN/A    # "*"                - M5 headers (directories before files)
2282SN/A    includes_re = (
2282SN/A        ('main', '""', _include_matcher_main()),
2282SN/A        ('python', '<>', _include_matcher_fname("^Python\.h$")),
2008SN/A        ('c', '<>', _include_matcher_fname("^.*\.h$")),
2008SN/A        ('stl', '<>', _include_matcher_fname("^\w+$")),
5603SN/A        ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
5603SN/A        ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
5603SN/A        ('swig0', '<>', _include_matcher(keyword="%import")),
5603SN/A        ('swig1', '<>', _include_matcher(keyword="%include")),
2008SN/A        ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
11005SN/A        ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
2008SN/A        )
2008SN/A
2008SN/A    block_order = (
2008SN/A        ('main', ),
2008SN/A        ('python', ),
2008SN/A        ('c', ),
2008SN/A        ('stl', ),
2008SN/A        ('cc', ),
2282SN/A        ('m5header', ),
2282SN/A        ('swig0', 'swig1', 'swig2', 'swig3', ),
5603SN/A        )
2282SN/A
2008SN/A    def __init__(self):
2008SN/A        self.block_priority = {}
2008SN/A        for prio, keys in enumerate(self.block_order):
2008SN/A            for key in keys:
2008SN/A                self.block_priority[key] = prio
2008SN/A
2008SN/A    def reset(self):
2008SN/A        # clear all stored headers
5603SN/A        self.includes = {}
2008SN/A
2008SN/A    def dump_blocks(self, block_types):
2008SN/A        """Merge includes of from several block types into one large
5603SN/A        block of sorted includes. This is useful when we have multiple
5603SN/A        include block types (e.g., swig includes) with the same
5603SN/A        priority."""
5603SN/A
5603SN/A        includes = []
5603SN/A        for block_type in block_types:
5603SN/A            try:
5603SN/A                includes += self.includes[block_type]
5603SN/A            except KeyError:
5603SN/A                pass
5603SN/A
5603SN/A        return sorted(set(includes))
5603SN/A
2008SN/A    def dump_includes(self):
2008SN/A        includes = []
2008SN/A        for types in self.block_order:
11005SN/A            block = self.dump_blocks(types)
1997SN/A            if includes and block:
2008SN/A                includes.append("")
1997SN/A            includes += block
1997SN/A
1156SN/A        self.reset()
1939SN/A        return includes
1156SN/A
2566SN/A    def __call__(self, lines, filename, language):
3349SN/A        self.reset()
1156SN/A
1817SN/A        context = {
2641SN/A            "filename" : filename,
1156SN/A            "language" : language,
11005SN/A            }
2641SN/A
2008SN/A        def match_line(line):
2008SN/A            if not line:
1997SN/A                return (None, line)
2008SN/A
2617SN/A            for include_type, (ldelim, rdelim), matcher in self.includes_re:
2641SN/A                keyword, include, extra = matcher(context, line)
1156SN/A                if keyword:
2008SN/A                    # if we've got a match, clean up the #include line,
1939SN/A                    # fix up stl headers and store it in the proper category
2279SN/A                    if include_type == 'c' and language == 'C++':
2617SN/A                        stl_inc = cpp_c_headers.get(include, None)
2641SN/A                        if stl_inc:
1939SN/A                            include = stl_inc
2279SN/A                            include_type = 'stl'
2617SN/A
2641SN/A                    return (include_type,
1156SN/A                            keyword + ' ' + ldelim + include + rdelim + extra)
2008SN/A
1156SN/A            return (None, line)
8641SN/A
2641SN/A        processing_includes = False
2566SN/A        for line in lines:
13342Sgabeblack@google.com            include_type, line = match_line(line)
1939SN/A            if include_type:
1156SN/A                try:
1156SN/A                    self.includes[include_type].append(line)
2641SN/A                except KeyError:
2566SN/A                    self.includes[include_type] = [ line ]
13342Sgabeblack@google.com
1939SN/A                processing_includes = True
1939SN/A            elif processing_includes and not line.strip():
1939SN/A                # Skip empty lines while processing includes
1998SN/A                pass
2617SN/A            elif processing_includes:
2641SN/A                # We are now exiting an include block
1939SN/A                processing_includes = False
1939SN/A
1939SN/A                # Output pending includes, a new line between, and the
2008SN/A                # current l.
1939SN/A                for include in self.dump_includes():
1156SN/A                    yield include
2566SN/A                yield ''
1156SN/A                yield line
1156SN/A            else:
1939SN/A                # We are not in an include block, so just emit the line
1939SN/A                yield line
2566SN/A
2566SN/A        # We've reached EOF, so dump any pending includes
11005SN/A        if processing_includes:
1939SN/A            for include in self.dump_includes():
1939SN/A                yield include
1939SN/A
1939SN/A# default language types to try to apply our sorting rules to
1939SN/Adefault_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
1939SN/A
1998SN/Adef options():
1939SN/A    import optparse
1998SN/A    options = optparse.OptionParser()
1998SN/A    add_option = options.add_option
1939SN/A    add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
2008SN/A               default=','.join(default_dir_ignore),
1939SN/A               help="ignore directories")
1939SN/A    add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
1939SN/A               default=','.join(default_file_ignore),
1939SN/A               help="ignore files")
1939SN/A    add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
1939SN/A               default=','.join(default_languages),
1939SN/A               help="languages")
1998SN/A    add_option('-n', '--dry-run', action='store_true',
1998SN/A               help="don't overwrite files")
1939SN/A
2090SN/A    return options
1939SN/A
2566SN/Adef parse_args(parser):
1939SN/A    opts,args = parser.parse_args()
1939SN/A
1939SN/A    opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
2566SN/A    opts.file_ignore = frozenset(opts.file_ignore.split(','))
3349SN/A    opts.languages = frozenset(opts.languages.split(','))
1156SN/A
1817SN/A    return opts,args
2641SN/A
1939SN/Aif __name__ == '__main__':
11005SN/A    parser = options()
2641SN/A    opts, args = parse_args(parser)
2008SN/A
2008SN/A    for base in args:
1997SN/A        for filename,language in find_files(base, languages=opts.languages,
2008SN/A                file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
2566SN/A            if opts.dry_run:
2641SN/A                print "%s: %s" % (filename, language)
1156SN/A            else:
2008SN/A                update_file(filename, filename, language, SortIncludes())
1939SN/A