sort_includes.py revision 11406:dd204e5baba7
14661Sksewell@umich.edu#!/usr/bin/env python
25222Sksewell@umich.edu#
34661Sksewell@umich.edu# Copyright (c) 2014-2015 ARM Limited
44661Sksewell@umich.edu# All rights reserved
54661Sksewell@umich.edu#
64661Sksewell@umich.edu# The license below extends only to copyright in the software and shall
74661Sksewell@umich.edu# not be construed as granting a license to any other intellectual
84661Sksewell@umich.edu# property including but not limited to intellectual property relating
94661Sksewell@umich.edu# to a hardware implementation of the functionality of the software
104661Sksewell@umich.edu# licensed hereunder.  You may use the software subject to the license
114661Sksewell@umich.edu# terms below provided that you ensure that this notice is replicated
124661Sksewell@umich.edu# unmodified and in its entirety in all distributions of the software,
134661Sksewell@umich.edu# modified or unmodified, in source code or in binary form.
144661Sksewell@umich.edu#
154661Sksewell@umich.edu# Copyright (c) 2011 The Hewlett-Packard Development Company
164661Sksewell@umich.edu# All rights reserved.
174661Sksewell@umich.edu#
184661Sksewell@umich.edu# Redistribution and use in source and binary forms, with or without
194661Sksewell@umich.edu# modification, are permitted provided that the following conditions are
204661Sksewell@umich.edu# met: redistributions of source code must retain the above copyright
214661Sksewell@umich.edu# notice, this list of conditions and the following disclaimer;
224661Sksewell@umich.edu# redistributions in binary form must reproduce the above copyright
234661Sksewell@umich.edu# notice, this list of conditions and the following disclaimer in the
244661Sksewell@umich.edu# documentation and/or other materials provided with the distribution;
254661Sksewell@umich.edu# neither the name of the copyright holders nor the names of its
264661Sksewell@umich.edu# contributors may be used to endorse or promote products derived from
274661Sksewell@umich.edu# this software without specific prior written permission.
284661Sksewell@umich.edu#
294661Sksewell@umich.edu# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
304661Sksewell@umich.edu# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
314661Sksewell@umich.edu# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
324661Sksewell@umich.edu# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
334661Sksewell@umich.edu# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
344661Sksewell@umich.edu# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
356376Sgblack@eecs.umich.edu# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
364661Sksewell@umich.edu# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
374661Sksewell@umich.edu# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
384661Sksewell@umich.edu# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
394661Sksewell@umich.edu# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
406376Sgblack@eecs.umich.edu#
416376Sgblack@eecs.umich.edu# Authors: Nathan Binkert
426376Sgblack@eecs.umich.edu#          Andreas Sandberg
436376Sgblack@eecs.umich.edu
446376Sgblack@eecs.umich.eduimport os
456376Sgblack@eecs.umich.eduimport re
466376Sgblack@eecs.umich.eduimport sys
476376Sgblack@eecs.umich.edu
484661Sksewell@umich.edufrom file_types import *
496376Sgblack@eecs.umich.edu
506376Sgblack@eecs.umich.educpp_c_headers = {
516376Sgblack@eecs.umich.edu    'assert.h' : 'cassert',
526376Sgblack@eecs.umich.edu    'ctype.h'  : 'cctype',
536376Sgblack@eecs.umich.edu    'errno.h'  : 'cerrno',
544661Sksewell@umich.edu    'float.h'  : 'cfloat',
556376Sgblack@eecs.umich.edu    'limits.h' : 'climits',
566376Sgblack@eecs.umich.edu    'locale.h' : 'clocale',
576376Sgblack@eecs.umich.edu    'math.h'   : 'cmath',
586376Sgblack@eecs.umich.edu    'setjmp.h' : 'csetjmp',
596376Sgblack@eecs.umich.edu    'signal.h' : 'csignal',
606376Sgblack@eecs.umich.edu    'stdarg.h' : 'cstdarg',
616376Sgblack@eecs.umich.edu    'stddef.h' : 'cstddef',
626376Sgblack@eecs.umich.edu    'stdio.h'  : 'cstdio',
634661Sksewell@umich.edu    'stdlib.h' : 'cstdlib',
646376Sgblack@eecs.umich.edu    'string.h' : 'cstring',
656376Sgblack@eecs.umich.edu    'time.h'   : 'ctime',
666376Sgblack@eecs.umich.edu    'wchar.h'  : 'cwchar',
676376Sgblack@eecs.umich.edu    'wctype.h' : 'cwctype',
686376Sgblack@eecs.umich.edu}
694661Sksewell@umich.edu
706376Sgblack@eecs.umich.eduinclude_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
716376Sgblack@eecs.umich.edudef include_key(line):
726376Sgblack@eecs.umich.edu    '''Mark directories with a leading space so directories
736376Sgblack@eecs.umich.edu    are sorted before files'''
746376Sgblack@eecs.umich.edu
756376Sgblack@eecs.umich.edu    match = include_re.match(line)
764661Sksewell@umich.edu    assert match, line
776376Sgblack@eecs.umich.edu    keyword = match.group(2)
786376Sgblack@eecs.umich.edu    include = match.group(3)
796376Sgblack@eecs.umich.edu
806376Sgblack@eecs.umich.edu    # Everything but the file part needs to have a space prepended
816376Sgblack@eecs.umich.edu    parts = include.split('/')
826376Sgblack@eecs.umich.edu    if len(parts) == 2 and parts[0] == 'dnet':
836376Sgblack@eecs.umich.edu        # Don't sort the dnet includes with respect to each other, but
846376Sgblack@eecs.umich.edu        # make them sorted with respect to non dnet includes.  Python
854661Sksewell@umich.edu        # guarantees that sorting is stable, so just clear the
866376Sgblack@eecs.umich.edu        # basename part of the filename.
876376Sgblack@eecs.umich.edu        parts[1] = ' '
886376Sgblack@eecs.umich.edu    parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
896376Sgblack@eecs.umich.edu    key = '/'.join(parts)
904661Sksewell@umich.edu
916376Sgblack@eecs.umich.edu    return key
926376Sgblack@eecs.umich.edu
936376Sgblack@eecs.umich.edu
946376Sgblack@eecs.umich.edudef _include_matcher(keyword="#include", delim="<>"):
954661Sksewell@umich.edu    """Match an include statement and return a (keyword, file, extra)
966376Sgblack@eecs.umich.edu    duple, or a touple of None values if there isn't a match."""
976376Sgblack@eecs.umich.edu
986376Sgblack@eecs.umich.edu    rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
996376Sgblack@eecs.umich.edu
1006376Sgblack@eecs.umich.edu    def matcher(context, line):
1016376Sgblack@eecs.umich.edu        m = rex.match(line)
1026376Sgblack@eecs.umich.edu        return m.groups() if m else (None, ) * 3
1034661Sksewell@umich.edu
1046376Sgblack@eecs.umich.edu    return matcher
1056376Sgblack@eecs.umich.edu
1066376Sgblack@eecs.umich.edudef _include_matcher_fname(fname, **kwargs):
1076376Sgblack@eecs.umich.edu    """Match an include of a specific file name. Any keyword arguments
1086376Sgblack@eecs.umich.edu    are forwarded to _include_matcher, which is used to match the
1096376Sgblack@eecs.umich.edu    actual include line."""
1106376Sgblack@eecs.umich.edu
1116376Sgblack@eecs.umich.edu    rex = re.compile(fname)
1126376Sgblack@eecs.umich.edu    base_matcher = _include_matcher(**kwargs)
1136376Sgblack@eecs.umich.edu
1146376Sgblack@eecs.umich.edu    def matcher(context, line):
1156376Sgblack@eecs.umich.edu        (keyword, fname, extra) = base_matcher(context, line)
1166376Sgblack@eecs.umich.edu        if fname and rex.match(fname):
1176376Sgblack@eecs.umich.edu            return (keyword, fname, extra)
1186376Sgblack@eecs.umich.edu        else:
1196376Sgblack@eecs.umich.edu            return (None, ) * 3
1206376Sgblack@eecs.umich.edu
1216376Sgblack@eecs.umich.edu    return matcher
1226376Sgblack@eecs.umich.edu
1236379Sgblack@eecs.umich.edu
1246379Sgblack@eecs.umich.edudef _include_matcher_main():
1256379Sgblack@eecs.umich.edu    """Match a C/C++ source file's primary header (i.e., a file with
1266379Sgblack@eecs.umich.edu    the same base name, but a header extension)."""
1276379Sgblack@eecs.umich.edu
1286379Sgblack@eecs.umich.edu    base_matcher = _include_matcher(delim='""')
1296379Sgblack@eecs.umich.edu    rex = re.compile(r"^src/(.*)\.([^.]+)$")
1306379Sgblack@eecs.umich.edu    header_map = {
1316379Sgblack@eecs.umich.edu        "c" : "h",
1326379Sgblack@eecs.umich.edu        "cc" : "hh",
1336376Sgblack@eecs.umich.edu        "cpp" : "hh",
1346376Sgblack@eecs.umich.edu        }
1356376Sgblack@eecs.umich.edu    def matcher(context, line):
1366376Sgblack@eecs.umich.edu        m = rex.match(context["filename"])
1376376Sgblack@eecs.umich.edu        if not m:
1386376Sgblack@eecs.umich.edu            return (None, ) * 3
1396376Sgblack@eecs.umich.edu        base, ext = m.groups()
1406376Sgblack@eecs.umich.edu        (keyword, fname, extra) = base_matcher(context, line)
1416376Sgblack@eecs.umich.edu        try:
1426376Sgblack@eecs.umich.edu            if fname == "%s.%s" % (base, header_map[ext]):
1434661Sksewell@umich.edu                return (keyword, fname, extra)
1446376Sgblack@eecs.umich.edu        except KeyError:
1456376Sgblack@eecs.umich.edu            pass
1466376Sgblack@eecs.umich.edu
1476376Sgblack@eecs.umich.edu        return (None, ) * 3
1486376Sgblack@eecs.umich.edu
1496376Sgblack@eecs.umich.edu    return matcher
1506376Sgblack@eecs.umich.edu
1514661Sksewell@umich.educlass SortIncludes(object):
1526376Sgblack@eecs.umich.edu    # different types of includes for different sorting of headers
1536376Sgblack@eecs.umich.edu    # <Python.h>         - Python header needs to be first if it exists
1546376Sgblack@eecs.umich.edu    # <*.h>              - system headers (directories before files)
1556376Sgblack@eecs.umich.edu    # <*>                - STL headers
1566376Sgblack@eecs.umich.edu    # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
1576376Sgblack@eecs.umich.edu    # "*"                - M5 headers (directories before files)
1586376Sgblack@eecs.umich.edu    includes_re = (
1596376Sgblack@eecs.umich.edu        ('main', '""', _include_matcher_main()),
1606376Sgblack@eecs.umich.edu        ('python', '<>', _include_matcher_fname("^Python\.h$")),
1616376Sgblack@eecs.umich.edu        ('c', '<>', _include_matcher_fname("^.*\.h$")),
1626376Sgblack@eecs.umich.edu        ('stl', '<>', _include_matcher_fname("^\w+$")),
1636376Sgblack@eecs.umich.edu        ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
1644661Sksewell@umich.edu        ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
1656376Sgblack@eecs.umich.edu        ('swig0', '<>', _include_matcher(keyword="%import")),
1666376Sgblack@eecs.umich.edu        ('swig1', '<>', _include_matcher(keyword="%include")),
1676376Sgblack@eecs.umich.edu        ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
1686376Sgblack@eecs.umich.edu        ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
1696376Sgblack@eecs.umich.edu        )
1706376Sgblack@eecs.umich.edu
1716376Sgblack@eecs.umich.edu    block_order = (
1726376Sgblack@eecs.umich.edu        ('main', ),
1736376Sgblack@eecs.umich.edu        ('python', ),
1746376Sgblack@eecs.umich.edu        ('c', ),
1754661Sksewell@umich.edu        ('stl', ),
1766376Sgblack@eecs.umich.edu        ('cc', ),
1776376Sgblack@eecs.umich.edu        ('m5header', ),
1786376Sgblack@eecs.umich.edu        ('swig0', 'swig1', 'swig2', 'swig3', ),
1796376Sgblack@eecs.umich.edu        )
1806376Sgblack@eecs.umich.edu
1816376Sgblack@eecs.umich.edu    def __init__(self):
1826376Sgblack@eecs.umich.edu        self.block_priority = {}
1836376Sgblack@eecs.umich.edu        for prio, keys in enumerate(self.block_order):
1846376Sgblack@eecs.umich.edu            for key in keys:
1856376Sgblack@eecs.umich.edu                self.block_priority[key] = prio
1866376Sgblack@eecs.umich.edu
1876379Sgblack@eecs.umich.edu    def reset(self):
1886379Sgblack@eecs.umich.edu        # clear all stored headers
1896379Sgblack@eecs.umich.edu        self.includes = {}
1906379Sgblack@eecs.umich.edu
1916379Sgblack@eecs.umich.edu    def dump_blocks(self, block_types):
1926379Sgblack@eecs.umich.edu        """Merge includes of from several block types into one large
1936379Sgblack@eecs.umich.edu        block of sorted includes. This is useful when we have multiple
1946379Sgblack@eecs.umich.edu        include block types (e.g., swig includes) with the same
1956379Sgblack@eecs.umich.edu        priority."""
1966379Sgblack@eecs.umich.edu
1976376Sgblack@eecs.umich.edu        includes = []
1986376Sgblack@eecs.umich.edu        for block_type in block_types:
1996376Sgblack@eecs.umich.edu            try:
2006376Sgblack@eecs.umich.edu                includes += self.includes[block_type]
2014661Sksewell@umich.edu            except KeyError:
2026376Sgblack@eecs.umich.edu                pass
2036376Sgblack@eecs.umich.edu
2046376Sgblack@eecs.umich.edu        return sorted(set(includes))
2056376Sgblack@eecs.umich.edu
2066376Sgblack@eecs.umich.edu    def dump_includes(self):
2076376Sgblack@eecs.umich.edu        includes = []
2084661Sksewell@umich.edu        for types in self.block_order:
2096376Sgblack@eecs.umich.edu            block = self.dump_blocks(types)
2106376Sgblack@eecs.umich.edu            if includes and block:
2116376Sgblack@eecs.umich.edu                includes.append("")
2126376Sgblack@eecs.umich.edu            includes += block
2136376Sgblack@eecs.umich.edu
2146376Sgblack@eecs.umich.edu        self.reset()
2156376Sgblack@eecs.umich.edu        return includes
2164661Sksewell@umich.edu
2176376Sgblack@eecs.umich.edu    def __call__(self, lines, filename, language):
2186376Sgblack@eecs.umich.edu        self.reset()
2196376Sgblack@eecs.umich.edu
2206376Sgblack@eecs.umich.edu        context = {
2216376Sgblack@eecs.umich.edu            "filename" : filename,
2226376Sgblack@eecs.umich.edu            "language" : language,
2236376Sgblack@eecs.umich.edu            }
2246376Sgblack@eecs.umich.edu
2256376Sgblack@eecs.umich.edu        def match_line(line):
2266376Sgblack@eecs.umich.edu            if not line:
2276376Sgblack@eecs.umich.edu                return (None, line)
2286376Sgblack@eecs.umich.edu
2296376Sgblack@eecs.umich.edu            for include_type, (ldelim, rdelim), matcher in self.includes_re:
2304661Sksewell@umich.edu                keyword, include, extra = matcher(context, line)
2316376Sgblack@eecs.umich.edu                if keyword:
2326376Sgblack@eecs.umich.edu                    # if we've got a match, clean up the #include line,
2336376Sgblack@eecs.umich.edu                    # fix up stl headers and store it in the proper category
2346376Sgblack@eecs.umich.edu                    if include_type == 'c' and language == 'C++':
2356376Sgblack@eecs.umich.edu                        stl_inc = cpp_c_headers.get(include, None)
2366376Sgblack@eecs.umich.edu                        if stl_inc:
2376376Sgblack@eecs.umich.edu                            include = stl_inc
2386376Sgblack@eecs.umich.edu                            include_type = 'stl'
2396376Sgblack@eecs.umich.edu
2406376Sgblack@eecs.umich.edu                    return (include_type,
2416376Sgblack@eecs.umich.edu                            keyword + ' ' + ldelim + include + rdelim + extra)
2426376Sgblack@eecs.umich.edu
2436376Sgblack@eecs.umich.edu            return (None, line)
2446376Sgblack@eecs.umich.edu
2456376Sgblack@eecs.umich.edu        processing_includes = False
2466376Sgblack@eecs.umich.edu        for line in lines:
2476376Sgblack@eecs.umich.edu            include_type, line = match_line(line)
2484661Sksewell@umich.edu            if include_type:
2496376Sgblack@eecs.umich.edu                try:
2506376Sgblack@eecs.umich.edu                    self.includes[include_type].append(line)
2516376Sgblack@eecs.umich.edu                except KeyError:
2526376Sgblack@eecs.umich.edu                    self.includes[include_type] = [ line ]
2536376Sgblack@eecs.umich.edu
2546376Sgblack@eecs.umich.edu                processing_includes = True
2556376Sgblack@eecs.umich.edu            elif processing_includes and not line.strip():
2566376Sgblack@eecs.umich.edu                # Skip empty lines while processing includes
2576376Sgblack@eecs.umich.edu                pass
2586376Sgblack@eecs.umich.edu            elif processing_includes:
2596376Sgblack@eecs.umich.edu                # We are now exiting an include block
2604661Sksewell@umich.edu                processing_includes = False
2616376Sgblack@eecs.umich.edu
2626376Sgblack@eecs.umich.edu                # Output pending includes, a new line between, and the
2636376Sgblack@eecs.umich.edu                # current l.
2646376Sgblack@eecs.umich.edu                for include in self.dump_includes():
2656376Sgblack@eecs.umich.edu                    yield include
2666376Sgblack@eecs.umich.edu                yield ''
2676376Sgblack@eecs.umich.edu                yield line
2686376Sgblack@eecs.umich.edu            else:
2696376Sgblack@eecs.umich.edu                # We are not in an include block, so just emit the line
2706376Sgblack@eecs.umich.edu                yield line
2716376Sgblack@eecs.umich.edu
2726376Sgblack@eecs.umich.edu        # We've reached EOF, so dump any pending includes
2736376Sgblack@eecs.umich.edu        if processing_includes:
2746376Sgblack@eecs.umich.edu            for include in self.dump_includes():
2754661Sksewell@umich.edu                yield include
2766376Sgblack@eecs.umich.edu
2776376Sgblack@eecs.umich.edu# default language types to try to apply our sorting rules to
2786376Sgblack@eecs.umich.edudefault_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
2796376Sgblack@eecs.umich.edu
2806376Sgblack@eecs.umich.edudef options():
2816376Sgblack@eecs.umich.edu    import optparse
2824661Sksewell@umich.edu    options = optparse.OptionParser()
2836376Sgblack@eecs.umich.edu    add_option = options.add_option
2846376Sgblack@eecs.umich.edu    add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
2856376Sgblack@eecs.umich.edu               default=','.join(default_dir_ignore),
2866376Sgblack@eecs.umich.edu               help="ignore directories")
2876376Sgblack@eecs.umich.edu    add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
2886376Sgblack@eecs.umich.edu               default=','.join(default_file_ignore),
2896376Sgblack@eecs.umich.edu               help="ignore files")
2906376Sgblack@eecs.umich.edu    add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
2916376Sgblack@eecs.umich.edu               default=','.join(default_languages),
2926376Sgblack@eecs.umich.edu               help="languages")
2936376Sgblack@eecs.umich.edu    add_option('-n', '--dry-run', action='store_true',
2944661Sksewell@umich.edu               help="don't overwrite files")
2956376Sgblack@eecs.umich.edu
2966376Sgblack@eecs.umich.edu    return options
2976376Sgblack@eecs.umich.edu
2986376Sgblack@eecs.umich.edudef parse_args(parser):
2996376Sgblack@eecs.umich.edu    opts,args = parser.parse_args()
3006376Sgblack@eecs.umich.edu
3016376Sgblack@eecs.umich.edu    opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
3026376Sgblack@eecs.umich.edu    opts.file_ignore = frozenset(opts.file_ignore.split(','))
3036376Sgblack@eecs.umich.edu    opts.languages = frozenset(opts.languages.split(','))
3046376Sgblack@eecs.umich.edu
3056376Sgblack@eecs.umich.edu    return opts,args
3064661Sksewell@umich.edu
3076376Sgblack@eecs.umich.eduif __name__ == '__main__':
3086376Sgblack@eecs.umich.edu    parser = options()
3096376Sgblack@eecs.umich.edu    opts, args = parse_args(parser)
3106376Sgblack@eecs.umich.edu
3116376Sgblack@eecs.umich.edu    for base in args:
3126376Sgblack@eecs.umich.edu        for filename,language in find_files(base, languages=opts.languages,
3136376Sgblack@eecs.umich.edu                file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
3146376Sgblack@eecs.umich.edu            if opts.dry_run:
3156376Sgblack@eecs.umich.edu                print "%s: %s" % (filename, language)
3166376Sgblack@eecs.umich.edu            else:
3176376Sgblack@eecs.umich.edu                update_file(filename, filename, language, SortIncludes())
3184661Sksewell@umich.edu