sort_includes.py revision 11402
11156SN/A#!/usr/bin/env python 21762SN/A# 31156SN/A# Copyright (c) 2014-2015 ARM Limited 41156SN/A# All rights reserved 51156SN/A# 61156SN/A# The license below extends only to copyright in the software and shall 71156SN/A# not be construed as granting a license to any other intellectual 81156SN/A# property including but not limited to intellectual property relating 91156SN/A# to a hardware implementation of the functionality of the software 101156SN/A# licensed hereunder. You may use the software subject to the license 111156SN/A# terms below provided that you ensure that this notice is replicated 121156SN/A# unmodified and in its entirety in all distributions of the software, 131156SN/A# modified or unmodified, in source code or in binary form. 141156SN/A# 151156SN/A# Copyright (c) 2011 The Hewlett-Packard Development Company 161156SN/A# All rights reserved. 171156SN/A# 181156SN/A# Redistribution and use in source and binary forms, with or without 191156SN/A# modification, are permitted provided that the following conditions are 201156SN/A# met: redistributions of source code must retain the above copyright 211156SN/A# notice, this list of conditions and the following disclaimer; 221156SN/A# redistributions in binary form must reproduce the above copyright 231156SN/A# notice, this list of conditions and the following disclaimer in the 241156SN/A# documentation and/or other materials provided with the distribution; 251156SN/A# neither the name of the copyright holders nor the names of its 261156SN/A# contributors may be used to endorse or promote products derived from 272665SN/A# this software without specific prior written permission. 282665SN/A# 291156SN/A# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 301156SN/A# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 3111263Sandreas.sandberg@arm.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 3211263Sandreas.sandberg@arm.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 331156SN/A# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 342566SN/A# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 351156SN/A# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 361156SN/A# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 379850SN/A# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 384762SN/A# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 3911263Sandreas.sandberg@arm.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 409850SN/A# 418641SN/A# Authors: Nathan Binkert 425882SN/A# Andreas Sandberg 431156SN/A 446216SN/Aimport os 456658SN/Aimport re 468232SN/Aimport sys 4711263Sandreas.sandberg@arm.com 482566SN/Afrom file_types import * 493348SN/A 501156SN/Acpp_c_headers = { 511157SN/A 'assert.h' : 'cassert', 521156SN/A 'ctype.h' : 'cctype', 535603SN/A 'errno.h' : 'cerrno', 541156SN/A 'float.h' : 'cfloat', 552107SN/A 'limits.h' : 'climits', 561156SN/A 'locale.h' : 'clocale', 571156SN/A 'math.h' : 'cmath', 581156SN/A 'setjmp.h' : 'csetjmp', 591156SN/A 'signal.h' : 'csignal', 601156SN/A 'stdarg.h' : 'cstdarg', 611156SN/A 'stddef.h' : 'cstddef', 621156SN/A 'stdio.h' : 'cstdio', 631156SN/A 'stdlib.h' : 'cstdlib', 641156SN/A 'string.h' : 'cstring', 651156SN/A 'time.h' : 'ctime', 661156SN/A 'wchar.h' : 'cwchar', 671156SN/A 'wctype.h' : 'cwctype', 681156SN/A} 691156SN/A 701156SN/Ainclude_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]') 711156SN/Adef include_key(line): 721156SN/A '''Mark directories with a leading space so directories 731156SN/A are sorted before files''' 741156SN/A 751156SN/A match = include_re.match(line) 761156SN/A assert match, line 771156SN/A keyword = match.group(2) 781156SN/A include = match.group(3) 791156SN/A 801156SN/A # Everything but the file part needs to have a space prepended 811156SN/A parts = include.split('/') 824981SN/A if len(parts) == 2 and parts[0] == 'dnet': 839339SN/A # Don't sort the dnet includes with respect to each other, but 841634SN/A # make them sorted with respect to non dnet includes. Python 851634SN/A # guarantees that sorting is stable, so just clear the 861156SN/A # basename part of the filename. 871156SN/A parts[1] = ' ' 881156SN/A parts[0:-1] = [ ' ' + s for s in parts[0:-1] ] 894981SN/A key = '/'.join(parts) 902627SN/A 912282SN/A return key 922627SN/A 931156SN/A 9412087Sspwilson2@wisc.edudef _include_matcher(keyword="#include", delim="<>"): 9512087Sspwilson2@wisc.edu """Match an include statement and return a (keyword, file, extra) 9612087Sspwilson2@wisc.edu duple, or a touple of None values if there isn't a match.""" 971156SN/A 981156SN/A rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1])) 991156SN/A 1004981SN/A def matcher(context, line): 1011156SN/A m = rex.match(line) 1021156SN/A return m.groups() if m else (None, ) * 3 1031156SN/A 1041156SN/A return matcher 1051156SN/A 1061156SN/Adef _include_matcher_fname(fname, **kwargs): 1071156SN/A """Match an include of a specific file name. Any keyword arguments 1081156SN/A are forwarded to _include_matcher, which is used to match the 1091156SN/A actual include line.""" 1101156SN/A 1119339SN/A rex = re.compile(fname) 1125603SN/A base_matcher = _include_matcher(**kwargs) 1135603SN/A 1145603SN/A def matcher(context, line): 1155603SN/A (keyword, fname, extra) = base_matcher(context, line) 1165603SN/A if fname and rex.match(fname): 1175603SN/A return (keyword, fname, extra) 1185603SN/A else: 1195603SN/A return (None, ) * 3 1205603SN/A 1215603SN/A return matcher 1225603SN/A 1235603SN/A 1245603SN/Adef _include_matcher_main(): 1255603SN/A """Match a C/C++ source file's primary header (i.e., a file with 1265603SN/A the same base name, but a header extension).""" 1275603SN/A 1285603SN/A base_matcher = _include_matcher(delim='""') 1295603SN/A rex = re.compile(r"^src/(.*)\.([^.]+)$") 1305603SN/A header_map = { 1315603SN/A "c" : "h", 1325603SN/A "cc" : "hh", 1335603SN/A "cpp" : "hh", 1345603SN/A } 1355603SN/A def matcher(context, line): 1365603SN/A m = rex.match(context["filename"]) 1375603SN/A if not m: 1385603SN/A return (None, ) * 3 1395603SN/A base, ext = m.groups() 1409339SN/A (keyword, fname, extra) = base_matcher(context, line) 1419339SN/A try: 1425603SN/A if fname == "%s.%s" % (base, header_map[ext]): 1431156SN/A return (keyword, fname, extra) 1441156SN/A except KeyError: 14513784Sgabeblack@google.com pass 14613784Sgabeblack@google.com 1474981SN/A return (None, ) * 3 14813784Sgabeblack@google.com 14913784Sgabeblack@google.com return matcher 15013784Sgabeblack@google.com 1514981SN/Aclass SortIncludes(object): 1524981SN/A # different types of includes for different sorting of headers 1534981SN/A # <Python.h> - Python header needs to be first if it exists 1541939SN/A # <*.h> - system headers (directories before files) 15511005SN/A # <*> - STL headers 1562008SN/A # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files) 1572008SN/A # "*" - M5 headers (directories before files) 1582282SN/A includes_re = ( 1592282SN/A ('main', '""', _include_matcher_main()), 1602282SN/A ('python', '<>', _include_matcher_fname("^Python\.h$")), 1612008SN/A ('c', '<>', _include_matcher_fname("^.*\.h$")), 1622008SN/A ('stl', '<>', _include_matcher_fname("^\w+$")), 1635603SN/A ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")), 1645603SN/A ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')), 1655603SN/A ('swig0', '<>', _include_matcher(keyword="%import")), 1665603SN/A ('swig1', '<>', _include_matcher(keyword="%include")), 1672008SN/A ('swig2', '""', _include_matcher(keyword="%import", delim='""')), 16811005SN/A ('swig3', '""', _include_matcher(keyword="%include", delim='""')), 1692008SN/A ) 1702008SN/A 1712008SN/A block_order = ( 1722008SN/A ('main', ), 1732008SN/A ('python', ), 1742008SN/A ('c', ), 1752008SN/A ('stl', ), 1762008SN/A ('cc', ), 1772282SN/A ('m5header', ), 1782282SN/A ('swig0', 'swig1', 'swig2', 'swig3', ), 1795603SN/A ) 1802282SN/A 1812008SN/A def __init__(self): 1822008SN/A self.block_priority = {} 1832008SN/A for prio, keys in enumerate(self.block_order): 1842008SN/A for key in keys: 1852008SN/A self.block_priority[key] = prio 1862008SN/A 1872008SN/A def reset(self): 1882008SN/A # clear all stored headers 1895603SN/A self.includes = {} 1902008SN/A 1912008SN/A def dump_blocks(self, block_types): 1922008SN/A """Merge includes of from several block types into one large 1935603SN/A block of sorted includes. This is useful when we have multiple 1945603SN/A include block types (e.g., swig includes) with the same 1955603SN/A priority.""" 1965603SN/A 1975603SN/A includes = [] 1985603SN/A for block_type in block_types: 1995603SN/A try: 2005603SN/A includes += self.includes[block_type] 2015603SN/A except KeyError: 2025603SN/A pass 2035603SN/A 2045603SN/A return sorted(set(includes)) 2055603SN/A 2062008SN/A def dump_includes(self): 2072008SN/A includes = [] 2082008SN/A for types in self.block_order: 20911005SN/A block = self.dump_blocks(types) 2101997SN/A if includes and block: 2112008SN/A includes.append("") 2121997SN/A includes += block 2131997SN/A 2141156SN/A self.reset() 2151939SN/A return includes 2161156SN/A 2172566SN/A def __call__(self, lines, filename, language): 2183349SN/A self.reset() 2191156SN/A 2201817SN/A context = { 2212641SN/A "filename" : filename, 2221156SN/A "language" : language, 22311005SN/A } 2242641SN/A 2252008SN/A def match_line(line): 2262008SN/A if not line: 2271997SN/A return (None, line) 2282008SN/A 2292617SN/A for include_type, (ldelim, rdelim), matcher in self.includes_re: 2302641SN/A keyword, include, extra = matcher(context, line) 2311156SN/A if keyword: 2322008SN/A # if we've got a match, clean up the #include line, 2331939SN/A # fix up stl headers and store it in the proper category 2342279SN/A if include_type == 'c' and language == 'C++': 2352617SN/A stl_inc = cpp_c_headers.get(include, None) 2362641SN/A if stl_inc: 2371939SN/A include = stl_inc 2382279SN/A include_type = 'stl' 2392617SN/A 2402641SN/A return (include_type, 2411156SN/A keyword + ' ' + ldelim + include + rdelim + extra) 2422008SN/A 2431156SN/A return (None, line) 2448641SN/A 2452641SN/A processing_includes = False 2462566SN/A for line in lines: 24713342Sgabeblack@google.com include_type, line = match_line(line) 2481939SN/A if include_type: 2491156SN/A try: 2501156SN/A self.includes[include_type].append(line) 2512641SN/A except KeyError: 2522566SN/A self.includes[include_type] = [ line ] 25313342Sgabeblack@google.com 2541939SN/A processing_includes = True 2551939SN/A elif processing_includes and not line.strip(): 2561939SN/A # Skip empty lines while processing includes 2571998SN/A pass 2582617SN/A elif processing_includes: 2592641SN/A # We are now exiting an include block 2601939SN/A processing_includes = False 2611939SN/A 2621939SN/A # Output pending includes, a new line between, and the 2632008SN/A # current l. 2641939SN/A for include in self.dump_includes(): 2651156SN/A yield include 2662566SN/A yield '' 2671156SN/A yield line 2681156SN/A else: 2691939SN/A # We are not in an include block, so just emit the line 2701939SN/A yield line 2712566SN/A 2722566SN/A # We've reached EOF, so dump any pending includes 27311005SN/A if processing_includes: 2741939SN/A for include in self.dump_includes(): 2751939SN/A yield include 2761939SN/A 2771939SN/A# default language types to try to apply our sorting rules to 2781939SN/Adefault_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig')) 2791939SN/A 2801998SN/Adef options(): 2811939SN/A import optparse 2821998SN/A options = optparse.OptionParser() 2831998SN/A add_option = options.add_option 2841939SN/A add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string', 2852008SN/A default=','.join(default_dir_ignore), 2861939SN/A help="ignore directories") 2871939SN/A add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string', 2881939SN/A default=','.join(default_file_ignore), 2891939SN/A help="ignore files") 2901939SN/A add_option('-l', '--languages', metavar="LANG[,LANG]", type='string', 2911939SN/A default=','.join(default_languages), 2921939SN/A help="languages") 2931998SN/A add_option('-n', '--dry-run', action='store_true', 2941998SN/A help="don't overwrite files") 2951939SN/A 2962090SN/A return options 2971939SN/A 2982566SN/Adef parse_args(parser): 2991939SN/A opts,args = parser.parse_args() 3001939SN/A 3011939SN/A opts.dir_ignore = frozenset(opts.dir_ignore.split(',')) 3022566SN/A opts.file_ignore = frozenset(opts.file_ignore.split(',')) 3033349SN/A opts.languages = frozenset(opts.languages.split(',')) 3041156SN/A 3051817SN/A return opts,args 3062641SN/A 3071939SN/Aif __name__ == '__main__': 30811005SN/A parser = options() 3092641SN/A opts, args = parse_args(parser) 3102008SN/A 3112008SN/A for base in args: 3121997SN/A for filename,language in find_files(base, languages=opts.languages, 3132008SN/A file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore): 3142566SN/A if opts.dry_run: 3152641SN/A print "%s: %s" % (filename, language) 3161156SN/A else: 3172008SN/A update_file(filename, filename, language, SortIncludes()) 3181939SN/A