sort_includes.py revision 11406:dd204e5baba7
14661Sksewell@umich.edu#!/usr/bin/env python 25222Sksewell@umich.edu# 34661Sksewell@umich.edu# Copyright (c) 2014-2015 ARM Limited 44661Sksewell@umich.edu# All rights reserved 54661Sksewell@umich.edu# 64661Sksewell@umich.edu# The license below extends only to copyright in the software and shall 74661Sksewell@umich.edu# not be construed as granting a license to any other intellectual 84661Sksewell@umich.edu# property including but not limited to intellectual property relating 94661Sksewell@umich.edu# to a hardware implementation of the functionality of the software 104661Sksewell@umich.edu# licensed hereunder. You may use the software subject to the license 114661Sksewell@umich.edu# terms below provided that you ensure that this notice is replicated 124661Sksewell@umich.edu# unmodified and in its entirety in all distributions of the software, 134661Sksewell@umich.edu# modified or unmodified, in source code or in binary form. 144661Sksewell@umich.edu# 154661Sksewell@umich.edu# Copyright (c) 2011 The Hewlett-Packard Development Company 164661Sksewell@umich.edu# All rights reserved. 174661Sksewell@umich.edu# 184661Sksewell@umich.edu# Redistribution and use in source and binary forms, with or without 194661Sksewell@umich.edu# modification, are permitted provided that the following conditions are 204661Sksewell@umich.edu# met: redistributions of source code must retain the above copyright 214661Sksewell@umich.edu# notice, this list of conditions and the following disclaimer; 224661Sksewell@umich.edu# redistributions in binary form must reproduce the above copyright 234661Sksewell@umich.edu# notice, this list of conditions and the following disclaimer in the 244661Sksewell@umich.edu# documentation and/or other materials provided with the distribution; 254661Sksewell@umich.edu# neither the name of the copyright holders nor the names of its 264661Sksewell@umich.edu# contributors may be used to endorse or promote products derived from 274661Sksewell@umich.edu# this software without specific prior written permission. 284661Sksewell@umich.edu# 294661Sksewell@umich.edu# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 304661Sksewell@umich.edu# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 314661Sksewell@umich.edu# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 324661Sksewell@umich.edu# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 334661Sksewell@umich.edu# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 344661Sksewell@umich.edu# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 356376Sgblack@eecs.umich.edu# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 364661Sksewell@umich.edu# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 374661Sksewell@umich.edu# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 384661Sksewell@umich.edu# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 394661Sksewell@umich.edu# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 406376Sgblack@eecs.umich.edu# 416376Sgblack@eecs.umich.edu# Authors: Nathan Binkert 426376Sgblack@eecs.umich.edu# Andreas Sandberg 436376Sgblack@eecs.umich.edu 446376Sgblack@eecs.umich.eduimport os 456376Sgblack@eecs.umich.eduimport re 466376Sgblack@eecs.umich.eduimport sys 476376Sgblack@eecs.umich.edu 484661Sksewell@umich.edufrom file_types import * 496376Sgblack@eecs.umich.edu 506376Sgblack@eecs.umich.educpp_c_headers = { 516376Sgblack@eecs.umich.edu 'assert.h' : 'cassert', 526376Sgblack@eecs.umich.edu 'ctype.h' : 'cctype', 536376Sgblack@eecs.umich.edu 'errno.h' : 'cerrno', 544661Sksewell@umich.edu 'float.h' : 'cfloat', 556376Sgblack@eecs.umich.edu 'limits.h' : 'climits', 566376Sgblack@eecs.umich.edu 'locale.h' : 'clocale', 576376Sgblack@eecs.umich.edu 'math.h' : 'cmath', 586376Sgblack@eecs.umich.edu 'setjmp.h' : 'csetjmp', 596376Sgblack@eecs.umich.edu 'signal.h' : 'csignal', 606376Sgblack@eecs.umich.edu 'stdarg.h' : 'cstdarg', 616376Sgblack@eecs.umich.edu 'stddef.h' : 'cstddef', 626376Sgblack@eecs.umich.edu 'stdio.h' : 'cstdio', 634661Sksewell@umich.edu 'stdlib.h' : 'cstdlib', 646376Sgblack@eecs.umich.edu 'string.h' : 'cstring', 656376Sgblack@eecs.umich.edu 'time.h' : 'ctime', 666376Sgblack@eecs.umich.edu 'wchar.h' : 'cwchar', 676376Sgblack@eecs.umich.edu 'wctype.h' : 'cwctype', 686376Sgblack@eecs.umich.edu} 694661Sksewell@umich.edu 706376Sgblack@eecs.umich.eduinclude_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]') 716376Sgblack@eecs.umich.edudef include_key(line): 726376Sgblack@eecs.umich.edu '''Mark directories with a leading space so directories 736376Sgblack@eecs.umich.edu are sorted before files''' 746376Sgblack@eecs.umich.edu 756376Sgblack@eecs.umich.edu match = include_re.match(line) 764661Sksewell@umich.edu assert match, line 776376Sgblack@eecs.umich.edu keyword = match.group(2) 786376Sgblack@eecs.umich.edu include = match.group(3) 796376Sgblack@eecs.umich.edu 806376Sgblack@eecs.umich.edu # Everything but the file part needs to have a space prepended 816376Sgblack@eecs.umich.edu parts = include.split('/') 826376Sgblack@eecs.umich.edu if len(parts) == 2 and parts[0] == 'dnet': 836376Sgblack@eecs.umich.edu # Don't sort the dnet includes with respect to each other, but 846376Sgblack@eecs.umich.edu # make them sorted with respect to non dnet includes. Python 854661Sksewell@umich.edu # guarantees that sorting is stable, so just clear the 866376Sgblack@eecs.umich.edu # basename part of the filename. 876376Sgblack@eecs.umich.edu parts[1] = ' ' 886376Sgblack@eecs.umich.edu parts[0:-1] = [ ' ' + s for s in parts[0:-1] ] 896376Sgblack@eecs.umich.edu key = '/'.join(parts) 904661Sksewell@umich.edu 916376Sgblack@eecs.umich.edu return key 926376Sgblack@eecs.umich.edu 936376Sgblack@eecs.umich.edu 946376Sgblack@eecs.umich.edudef _include_matcher(keyword="#include", delim="<>"): 954661Sksewell@umich.edu """Match an include statement and return a (keyword, file, extra) 966376Sgblack@eecs.umich.edu duple, or a touple of None values if there isn't a match.""" 976376Sgblack@eecs.umich.edu 986376Sgblack@eecs.umich.edu rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1])) 996376Sgblack@eecs.umich.edu 1006376Sgblack@eecs.umich.edu def matcher(context, line): 1016376Sgblack@eecs.umich.edu m = rex.match(line) 1026376Sgblack@eecs.umich.edu return m.groups() if m else (None, ) * 3 1034661Sksewell@umich.edu 1046376Sgblack@eecs.umich.edu return matcher 1056376Sgblack@eecs.umich.edu 1066376Sgblack@eecs.umich.edudef _include_matcher_fname(fname, **kwargs): 1076376Sgblack@eecs.umich.edu """Match an include of a specific file name. Any keyword arguments 1086376Sgblack@eecs.umich.edu are forwarded to _include_matcher, which is used to match the 1096376Sgblack@eecs.umich.edu actual include line.""" 1106376Sgblack@eecs.umich.edu 1116376Sgblack@eecs.umich.edu rex = re.compile(fname) 1126376Sgblack@eecs.umich.edu base_matcher = _include_matcher(**kwargs) 1136376Sgblack@eecs.umich.edu 1146376Sgblack@eecs.umich.edu def matcher(context, line): 1156376Sgblack@eecs.umich.edu (keyword, fname, extra) = base_matcher(context, line) 1166376Sgblack@eecs.umich.edu if fname and rex.match(fname): 1176376Sgblack@eecs.umich.edu return (keyword, fname, extra) 1186376Sgblack@eecs.umich.edu else: 1196376Sgblack@eecs.umich.edu return (None, ) * 3 1206376Sgblack@eecs.umich.edu 1216376Sgblack@eecs.umich.edu return matcher 1226376Sgblack@eecs.umich.edu 1236379Sgblack@eecs.umich.edu 1246379Sgblack@eecs.umich.edudef _include_matcher_main(): 1256379Sgblack@eecs.umich.edu """Match a C/C++ source file's primary header (i.e., a file with 1266379Sgblack@eecs.umich.edu the same base name, but a header extension).""" 1276379Sgblack@eecs.umich.edu 1286379Sgblack@eecs.umich.edu base_matcher = _include_matcher(delim='""') 1296379Sgblack@eecs.umich.edu rex = re.compile(r"^src/(.*)\.([^.]+)$") 1306379Sgblack@eecs.umich.edu header_map = { 1316379Sgblack@eecs.umich.edu "c" : "h", 1326379Sgblack@eecs.umich.edu "cc" : "hh", 1336376Sgblack@eecs.umich.edu "cpp" : "hh", 1346376Sgblack@eecs.umich.edu } 1356376Sgblack@eecs.umich.edu def matcher(context, line): 1366376Sgblack@eecs.umich.edu m = rex.match(context["filename"]) 1376376Sgblack@eecs.umich.edu if not m: 1386376Sgblack@eecs.umich.edu return (None, ) * 3 1396376Sgblack@eecs.umich.edu base, ext = m.groups() 1406376Sgblack@eecs.umich.edu (keyword, fname, extra) = base_matcher(context, line) 1416376Sgblack@eecs.umich.edu try: 1426376Sgblack@eecs.umich.edu if fname == "%s.%s" % (base, header_map[ext]): 1434661Sksewell@umich.edu return (keyword, fname, extra) 1446376Sgblack@eecs.umich.edu except KeyError: 1456376Sgblack@eecs.umich.edu pass 1466376Sgblack@eecs.umich.edu 1476376Sgblack@eecs.umich.edu return (None, ) * 3 1486376Sgblack@eecs.umich.edu 1496376Sgblack@eecs.umich.edu return matcher 1506376Sgblack@eecs.umich.edu 1514661Sksewell@umich.educlass SortIncludes(object): 1526376Sgblack@eecs.umich.edu # different types of includes for different sorting of headers 1536376Sgblack@eecs.umich.edu # <Python.h> - Python header needs to be first if it exists 1546376Sgblack@eecs.umich.edu # <*.h> - system headers (directories before files) 1556376Sgblack@eecs.umich.edu # <*> - STL headers 1566376Sgblack@eecs.umich.edu # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files) 1576376Sgblack@eecs.umich.edu # "*" - M5 headers (directories before files) 1586376Sgblack@eecs.umich.edu includes_re = ( 1596376Sgblack@eecs.umich.edu ('main', '""', _include_matcher_main()), 1606376Sgblack@eecs.umich.edu ('python', '<>', _include_matcher_fname("^Python\.h$")), 1616376Sgblack@eecs.umich.edu ('c', '<>', _include_matcher_fname("^.*\.h$")), 1626376Sgblack@eecs.umich.edu ('stl', '<>', _include_matcher_fname("^\w+$")), 1636376Sgblack@eecs.umich.edu ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")), 1644661Sksewell@umich.edu ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')), 1656376Sgblack@eecs.umich.edu ('swig0', '<>', _include_matcher(keyword="%import")), 1666376Sgblack@eecs.umich.edu ('swig1', '<>', _include_matcher(keyword="%include")), 1676376Sgblack@eecs.umich.edu ('swig2', '""', _include_matcher(keyword="%import", delim='""')), 1686376Sgblack@eecs.umich.edu ('swig3', '""', _include_matcher(keyword="%include", delim='""')), 1696376Sgblack@eecs.umich.edu ) 1706376Sgblack@eecs.umich.edu 1716376Sgblack@eecs.umich.edu block_order = ( 1726376Sgblack@eecs.umich.edu ('main', ), 1736376Sgblack@eecs.umich.edu ('python', ), 1746376Sgblack@eecs.umich.edu ('c', ), 1754661Sksewell@umich.edu ('stl', ), 1766376Sgblack@eecs.umich.edu ('cc', ), 1776376Sgblack@eecs.umich.edu ('m5header', ), 1786376Sgblack@eecs.umich.edu ('swig0', 'swig1', 'swig2', 'swig3', ), 1796376Sgblack@eecs.umich.edu ) 1806376Sgblack@eecs.umich.edu 1816376Sgblack@eecs.umich.edu def __init__(self): 1826376Sgblack@eecs.umich.edu self.block_priority = {} 1836376Sgblack@eecs.umich.edu for prio, keys in enumerate(self.block_order): 1846376Sgblack@eecs.umich.edu for key in keys: 1856376Sgblack@eecs.umich.edu self.block_priority[key] = prio 1866376Sgblack@eecs.umich.edu 1876379Sgblack@eecs.umich.edu def reset(self): 1886379Sgblack@eecs.umich.edu # clear all stored headers 1896379Sgblack@eecs.umich.edu self.includes = {} 1906379Sgblack@eecs.umich.edu 1916379Sgblack@eecs.umich.edu def dump_blocks(self, block_types): 1926379Sgblack@eecs.umich.edu """Merge includes of from several block types into one large 1936379Sgblack@eecs.umich.edu block of sorted includes. This is useful when we have multiple 1946379Sgblack@eecs.umich.edu include block types (e.g., swig includes) with the same 1956379Sgblack@eecs.umich.edu priority.""" 1966379Sgblack@eecs.umich.edu 1976376Sgblack@eecs.umich.edu includes = [] 1986376Sgblack@eecs.umich.edu for block_type in block_types: 1996376Sgblack@eecs.umich.edu try: 2006376Sgblack@eecs.umich.edu includes += self.includes[block_type] 2014661Sksewell@umich.edu except KeyError: 2026376Sgblack@eecs.umich.edu pass 2036376Sgblack@eecs.umich.edu 2046376Sgblack@eecs.umich.edu return sorted(set(includes)) 2056376Sgblack@eecs.umich.edu 2066376Sgblack@eecs.umich.edu def dump_includes(self): 2076376Sgblack@eecs.umich.edu includes = [] 2084661Sksewell@umich.edu for types in self.block_order: 2096376Sgblack@eecs.umich.edu block = self.dump_blocks(types) 2106376Sgblack@eecs.umich.edu if includes and block: 2116376Sgblack@eecs.umich.edu includes.append("") 2126376Sgblack@eecs.umich.edu includes += block 2136376Sgblack@eecs.umich.edu 2146376Sgblack@eecs.umich.edu self.reset() 2156376Sgblack@eecs.umich.edu return includes 2164661Sksewell@umich.edu 2176376Sgblack@eecs.umich.edu def __call__(self, lines, filename, language): 2186376Sgblack@eecs.umich.edu self.reset() 2196376Sgblack@eecs.umich.edu 2206376Sgblack@eecs.umich.edu context = { 2216376Sgblack@eecs.umich.edu "filename" : filename, 2226376Sgblack@eecs.umich.edu "language" : language, 2236376Sgblack@eecs.umich.edu } 2246376Sgblack@eecs.umich.edu 2256376Sgblack@eecs.umich.edu def match_line(line): 2266376Sgblack@eecs.umich.edu if not line: 2276376Sgblack@eecs.umich.edu return (None, line) 2286376Sgblack@eecs.umich.edu 2296376Sgblack@eecs.umich.edu for include_type, (ldelim, rdelim), matcher in self.includes_re: 2304661Sksewell@umich.edu keyword, include, extra = matcher(context, line) 2316376Sgblack@eecs.umich.edu if keyword: 2326376Sgblack@eecs.umich.edu # if we've got a match, clean up the #include line, 2336376Sgblack@eecs.umich.edu # fix up stl headers and store it in the proper category 2346376Sgblack@eecs.umich.edu if include_type == 'c' and language == 'C++': 2356376Sgblack@eecs.umich.edu stl_inc = cpp_c_headers.get(include, None) 2366376Sgblack@eecs.umich.edu if stl_inc: 2376376Sgblack@eecs.umich.edu include = stl_inc 2386376Sgblack@eecs.umich.edu include_type = 'stl' 2396376Sgblack@eecs.umich.edu 2406376Sgblack@eecs.umich.edu return (include_type, 2416376Sgblack@eecs.umich.edu keyword + ' ' + ldelim + include + rdelim + extra) 2426376Sgblack@eecs.umich.edu 2436376Sgblack@eecs.umich.edu return (None, line) 2446376Sgblack@eecs.umich.edu 2456376Sgblack@eecs.umich.edu processing_includes = False 2466376Sgblack@eecs.umich.edu for line in lines: 2476376Sgblack@eecs.umich.edu include_type, line = match_line(line) 2484661Sksewell@umich.edu if include_type: 2496376Sgblack@eecs.umich.edu try: 2506376Sgblack@eecs.umich.edu self.includes[include_type].append(line) 2516376Sgblack@eecs.umich.edu except KeyError: 2526376Sgblack@eecs.umich.edu self.includes[include_type] = [ line ] 2536376Sgblack@eecs.umich.edu 2546376Sgblack@eecs.umich.edu processing_includes = True 2556376Sgblack@eecs.umich.edu elif processing_includes and not line.strip(): 2566376Sgblack@eecs.umich.edu # Skip empty lines while processing includes 2576376Sgblack@eecs.umich.edu pass 2586376Sgblack@eecs.umich.edu elif processing_includes: 2596376Sgblack@eecs.umich.edu # We are now exiting an include block 2604661Sksewell@umich.edu processing_includes = False 2616376Sgblack@eecs.umich.edu 2626376Sgblack@eecs.umich.edu # Output pending includes, a new line between, and the 2636376Sgblack@eecs.umich.edu # current l. 2646376Sgblack@eecs.umich.edu for include in self.dump_includes(): 2656376Sgblack@eecs.umich.edu yield include 2666376Sgblack@eecs.umich.edu yield '' 2676376Sgblack@eecs.umich.edu yield line 2686376Sgblack@eecs.umich.edu else: 2696376Sgblack@eecs.umich.edu # We are not in an include block, so just emit the line 2706376Sgblack@eecs.umich.edu yield line 2716376Sgblack@eecs.umich.edu 2726376Sgblack@eecs.umich.edu # We've reached EOF, so dump any pending includes 2736376Sgblack@eecs.umich.edu if processing_includes: 2746376Sgblack@eecs.umich.edu for include in self.dump_includes(): 2754661Sksewell@umich.edu yield include 2766376Sgblack@eecs.umich.edu 2776376Sgblack@eecs.umich.edu# default language types to try to apply our sorting rules to 2786376Sgblack@eecs.umich.edudefault_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig')) 2796376Sgblack@eecs.umich.edu 2806376Sgblack@eecs.umich.edudef options(): 2816376Sgblack@eecs.umich.edu import optparse 2824661Sksewell@umich.edu options = optparse.OptionParser() 2836376Sgblack@eecs.umich.edu add_option = options.add_option 2846376Sgblack@eecs.umich.edu add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string', 2856376Sgblack@eecs.umich.edu default=','.join(default_dir_ignore), 2866376Sgblack@eecs.umich.edu help="ignore directories") 2876376Sgblack@eecs.umich.edu add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string', 2886376Sgblack@eecs.umich.edu default=','.join(default_file_ignore), 2896376Sgblack@eecs.umich.edu help="ignore files") 2906376Sgblack@eecs.umich.edu add_option('-l', '--languages', metavar="LANG[,LANG]", type='string', 2916376Sgblack@eecs.umich.edu default=','.join(default_languages), 2926376Sgblack@eecs.umich.edu help="languages") 2936376Sgblack@eecs.umich.edu add_option('-n', '--dry-run', action='store_true', 2944661Sksewell@umich.edu help="don't overwrite files") 2956376Sgblack@eecs.umich.edu 2966376Sgblack@eecs.umich.edu return options 2976376Sgblack@eecs.umich.edu 2986376Sgblack@eecs.umich.edudef parse_args(parser): 2996376Sgblack@eecs.umich.edu opts,args = parser.parse_args() 3006376Sgblack@eecs.umich.edu 3016376Sgblack@eecs.umich.edu opts.dir_ignore = frozenset(opts.dir_ignore.split(',')) 3026376Sgblack@eecs.umich.edu opts.file_ignore = frozenset(opts.file_ignore.split(',')) 3036376Sgblack@eecs.umich.edu opts.languages = frozenset(opts.languages.split(',')) 3046376Sgblack@eecs.umich.edu 3056376Sgblack@eecs.umich.edu return opts,args 3064661Sksewell@umich.edu 3076376Sgblack@eecs.umich.eduif __name__ == '__main__': 3086376Sgblack@eecs.umich.edu parser = options() 3096376Sgblack@eecs.umich.edu opts, args = parse_args(parser) 3106376Sgblack@eecs.umich.edu 3116376Sgblack@eecs.umich.edu for base in args: 3126376Sgblack@eecs.umich.edu for filename,language in find_files(base, languages=opts.languages, 3136376Sgblack@eecs.umich.edu file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore): 3146376Sgblack@eecs.umich.edu if opts.dry_run: 3156376Sgblack@eecs.umich.edu print "%s: %s" % (filename, language) 3166376Sgblack@eecs.umich.edu else: 3176376Sgblack@eecs.umich.edu update_file(filename, filename, language, SortIncludes()) 3184661Sksewell@umich.edu