sort_includes.py revision 11397
13005Sstever@eecs.umich.edu#!/usr/bin/env python 23005Sstever@eecs.umich.edu 33005Sstever@eecs.umich.edu# Copyright (c) 2011 The Hewlett-Packard Development Company 43005Sstever@eecs.umich.edu# All rights reserved. 53005Sstever@eecs.umich.edu# 63005Sstever@eecs.umich.edu# Redistribution and use in source and binary forms, with or without 73005Sstever@eecs.umich.edu# modification, are permitted provided that the following conditions are 83005Sstever@eecs.umich.edu# met: redistributions of source code must retain the above copyright 93005Sstever@eecs.umich.edu# notice, this list of conditions and the following disclaimer; 103005Sstever@eecs.umich.edu# redistributions in binary form must reproduce the above copyright 113005Sstever@eecs.umich.edu# notice, this list of conditions and the following disclaimer in the 123005Sstever@eecs.umich.edu# documentation and/or other materials provided with the distribution; 133005Sstever@eecs.umich.edu# neither the name of the copyright holders nor the names of its 143005Sstever@eecs.umich.edu# contributors may be used to endorse or promote products derived from 153005Sstever@eecs.umich.edu# this software without specific prior written permission. 163005Sstever@eecs.umich.edu# 173005Sstever@eecs.umich.edu# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 183005Sstever@eecs.umich.edu# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 193005Sstever@eecs.umich.edu# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 203005Sstever@eecs.umich.edu# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 213005Sstever@eecs.umich.edu# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 223005Sstever@eecs.umich.edu# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 233005Sstever@eecs.umich.edu# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 243005Sstever@eecs.umich.edu# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 253005Sstever@eecs.umich.edu# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 263005Sstever@eecs.umich.edu# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 273005Sstever@eecs.umich.edu# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 283005Sstever@eecs.umich.edu# 292889SN/A# Authors: Nathan Binkert 302889SN/A 312710SN/Aimport os 322710SN/Aimport re 332934SN/Aimport sys 342934SN/A 352549SN/Afrom file_types import * 362995SN/A 372549SN/Acpp_c_headers = { 383088Sstever@eecs.umich.edu 'assert.h' : 'cassert', 393088Sstever@eecs.umich.edu 'ctype.h' : 'cctype', 403088Sstever@eecs.umich.edu 'errno.h' : 'cerrno', 412889SN/A 'float.h' : 'cfloat', 422710SN/A 'limits.h' : 'climits', 432917SN/A 'locale.h' : 'clocale', 442710SN/A 'math.h' : 'cmath', 452917SN/A 'setjmp.h' : 'csetjmp', 462948SN/A 'signal.h' : 'csignal', 472995SN/A 'stdarg.h' : 'cstdarg', 482995SN/A 'stddef.h' : 'cstddef', 492995SN/A 'stdio.h' : 'cstdio', 502995SN/A 'stdlib.h' : 'cstdlib', 512995SN/A 'string.h' : 'cstring', 523143Shsul@eecs.umich.edu 'time.h' : 'ctime', 533025Ssaidi@eecs.umich.edu 'wchar.h' : 'cwchar', 543143Shsul@eecs.umich.edu 'wctype.h' : 'cwctype', 553143Shsul@eecs.umich.edu} 563143Shsul@eecs.umich.edu 573143Shsul@eecs.umich.eduinclude_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]') 583183Shsul@eecs.umich.edudef include_key(line): 593183Shsul@eecs.umich.edu '''Mark directories with a leading space so directories 602710SN/A are sorted before files''' 612710SN/A 622710SN/A match = include_re.match(line) 632710SN/A assert match, line 642710SN/A keyword = match.group(2) 652710SN/A include = match.group(3) 662710SN/A 672934SN/A # Everything but the file part needs to have a space prepended 682934SN/A parts = include.split('/') 692934SN/A if len(parts) == 2 and parts[0] == 'dnet': 702953SN/A # Don't sort the dnet includes with respect to each other, but 712934SN/A # make them sorted with respect to non dnet includes. Python 722934SN/A # guarantees that sorting is stable, so just clear the 732934SN/A # basename part of the filename. 742953SN/A parts[1] = ' ' 752934SN/A parts[0:-1] = [ ' ' + s for s in parts[0:-1] ] 762934SN/A key = '/'.join(parts) 772934SN/A 782953SN/A return key 792566SN/A 803005Sstever@eecs.umich.edu 813005Sstever@eecs.umich.edudef _include_matcher(keyword="#include", delim="<>"): 823180Sstever@eecs.umich.edu """Match an include statement and return a (keyword, file, extra) 833180Sstever@eecs.umich.edu duple, or a touple of None values if there isn't a match.""" 842995SN/A 852995SN/A rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1])) 862995SN/A 872995SN/A def matcher(context, line): 882995SN/A m = rex.match(line) 892995SN/A return m.groups() if m else (None, ) * 3 902995SN/A 912995SN/A return matcher 922917SN/A 932995SN/Adef _include_matcher_fname(fname, **kwargs): 943005Sstever@eecs.umich.edu """Match an include of a specific file name. Any keyword arguments 952995SN/A are forwarded to _include_matcher, which is used to match the 963005Sstever@eecs.umich.edu actual include line.""" 973005Sstever@eecs.umich.edu 983005Sstever@eecs.umich.edu rex = re.compile(fname) 993005Sstever@eecs.umich.edu base_matcher = _include_matcher(**kwargs) 1003005Sstever@eecs.umich.edu 1013005Sstever@eecs.umich.edu def matcher(context, line): 1023050Sstever@eecs.umich.edu (keyword, fname, extra) = base_matcher(context, line) 1033005Sstever@eecs.umich.edu if fname and rex.match(fname): 1043005Sstever@eecs.umich.edu return (keyword, fname, extra) 1053005Sstever@eecs.umich.edu else: 1063050Sstever@eecs.umich.edu return (None, ) * 3 1073025Ssaidi@eecs.umich.edu 1083005Sstever@eecs.umich.edu return matcher 1093005Sstever@eecs.umich.edu 1103005Sstever@eecs.umich.edu 1113005Sstever@eecs.umich.edudef _include_matcher_main(): 1123005Sstever@eecs.umich.edu """Match a C/C++ source file's primary header (i.e., a file with 1133050Sstever@eecs.umich.edu the same base name, but a header extension).""" 1143005Sstever@eecs.umich.edu 1153005Sstever@eecs.umich.edu base_matcher = _include_matcher(delim='""') 1163005Sstever@eecs.umich.edu rex = re.compile(r"^src/(.*)\.([^.]+)$") 1172566SN/A header_map = { 1182710SN/A "c" : "h", 1192710SN/A "cc" : "hh", 1203183Shsul@eecs.umich.edu "cpp" : "hh", 1213183Shsul@eecs.umich.edu } 1223183Shsul@eecs.umich.edu def matcher(context, line): 1233183Shsul@eecs.umich.edu m = rex.match(context["filename"]) 1243183Shsul@eecs.umich.edu if not m: 1253183Shsul@eecs.umich.edu return (None, ) * 3 1263183Shsul@eecs.umich.edu base, ext = m.groups() 1273183Shsul@eecs.umich.edu (keyword, fname, extra) = base_matcher(context, line) 1283183Shsul@eecs.umich.edu try: 1293183Shsul@eecs.umich.edu if fname == "%s.%s" % (base, header_map[ext]): 1303183Shsul@eecs.umich.edu return (keyword, fname, extra) 1313183Shsul@eecs.umich.edu except KeyError: 1323183Shsul@eecs.umich.edu pass 1333183Shsul@eecs.umich.edu 1343183Shsul@eecs.umich.edu return (None, ) * 3 1353183Shsul@eecs.umich.edu 1363183Shsul@eecs.umich.edu return matcher 1373183Shsul@eecs.umich.edu 1383183Shsul@eecs.umich.educlass SortIncludes(object): 1393183Shsul@eecs.umich.edu # different types of includes for different sorting of headers 1403183Shsul@eecs.umich.edu # <Python.h> - Python header needs to be first if it exists 1413183Shsul@eecs.umich.edu # <*.h> - system headers (directories before files) 1423183Shsul@eecs.umich.edu # <*> - STL headers 1433183Shsul@eecs.umich.edu # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files) 1443183Shsul@eecs.umich.edu # "*" - M5 headers (directories before files) 1452917SN/A includes_re = ( 1463046Sstever@eecs.umich.edu ('main', '""', _include_matcher_main()), 1472948SN/A ('python', '<>', _include_matcher_fname("^Python\.h$")), 1482948SN/A ('c', '<>', _include_matcher_fname("^.*\.h$")), 1492948SN/A ('stl', '<>', _include_matcher_fname("^\w+$")), 1503046Sstever@eecs.umich.edu ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")), 1512917SN/A ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')), 1523046Sstever@eecs.umich.edu ('swig0', '<>', _include_matcher(keyword="%import")), 1533022Shsul@eecs.umich.edu ('swig1', '<>', _include_matcher(keyword="%include")), 1543046Sstever@eecs.umich.edu ('swig2', '""', _include_matcher(keyword="%import", delim='""')), 1553022Shsul@eecs.umich.edu ('swig3', '""', _include_matcher(keyword="%include", delim='""')), 1563022Shsul@eecs.umich.edu ) 1573143Shsul@eecs.umich.edu 1583143Shsul@eecs.umich.edu block_order = ( 1593143Shsul@eecs.umich.edu ('main', ), 1603143Shsul@eecs.umich.edu ('python', ), 1613143Shsul@eecs.umich.edu ('c', ), 1623133Shsul@eecs.umich.edu ('stl', ), 1633133Shsul@eecs.umich.edu ('cc', ), 1643133Shsul@eecs.umich.edu ('m5header', ), 1653133Shsul@eecs.umich.edu ('swig0', 'swig1', 'swig2', 'swig3', ), 1662710SN/A ) 1672740SN/A 168 def __init__(self): 169 self.block_priority = {} 170 for prio, keys in enumerate(self.block_order): 171 for key in keys: 172 self.block_priority[key] = prio 173 174 def reset(self): 175 # clear all stored headers 176 self.includes = {} 177 178 def dump_blocks(self, block_types): 179 """Merge includes of from several block types into one large 180 block of sorted includes. This is useful when we have multiple 181 include block types (e.g., swig includes) with the same 182 priority.""" 183 184 includes = [] 185 for block_type in block_types: 186 try: 187 includes += self.includes[block_type] 188 except KeyError: 189 pass 190 191 return sorted(set(includes)) 192 193 def dump_includes(self): 194 blocks = [] 195 # Create a list of blocks in the prescribed include 196 # order. Each entry in the list is a multi-line string with 197 # multiple includes. 198 for types in self.block_order: 199 block = "\n".join(self.dump_blocks(types)) 200 if block: 201 blocks.append(block) 202 203 self.reset() 204 return "\n\n".join(blocks) 205 206 def __call__(self, lines, filename, language): 207 self.reset() 208 209 context = { 210 "filename" : filename, 211 "language" : language, 212 } 213 214 def match_line(line): 215 if not line: 216 return (None, line) 217 218 for include_type, (ldelim, rdelim), matcher in self.includes_re: 219 keyword, include, extra = matcher(context, line) 220 if keyword: 221 # if we've got a match, clean up the #include line, 222 # fix up stl headers and store it in the proper category 223 if include_type == 'c' and language == 'C++': 224 stl_inc = cpp_c_headers.get(include, None) 225 if stl_inc: 226 include = stl_inc 227 include_type = 'stl' 228 229 return (include_type, 230 keyword + ' ' + ldelim + include + rdelim + extra) 231 232 return (None, line) 233 234 processing_includes = False 235 for line in lines: 236 include_type, line = match_line(line) 237 if include_type: 238 try: 239 self.includes[include_type].append(line) 240 except KeyError: 241 self.includes[include_type] = [ line ] 242 243 processing_includes = True 244 elif processing_includes and not line.strip(): 245 # Skip empty lines while processing includes 246 pass 247 elif processing_includes: 248 # We are now exiting an include block 249 processing_includes = False 250 251 # Output pending includes, a new line between, and the 252 # current l. 253 yield self.dump_includes() 254 yield '' 255 yield line 256 else: 257 # We are not in an include block, so just emit the line 258 yield line 259 260 # We've reached EOF, so dump any pending includes 261 if processing_includes: 262 yield self.dump_includes() 263 264 265 266# default language types to try to apply our sorting rules to 267default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig')) 268 269def options(): 270 import optparse 271 options = optparse.OptionParser() 272 add_option = options.add_option 273 add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string', 274 default=','.join(default_dir_ignore), 275 help="ignore directories") 276 add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string', 277 default=','.join(default_file_ignore), 278 help="ignore files") 279 add_option('-l', '--languages', metavar="LANG[,LANG]", type='string', 280 default=','.join(default_languages), 281 help="languages") 282 add_option('-n', '--dry-run', action='store_true', 283 help="don't overwrite files") 284 285 return options 286 287def parse_args(parser): 288 opts,args = parser.parse_args() 289 290 opts.dir_ignore = frozenset(opts.dir_ignore.split(',')) 291 opts.file_ignore = frozenset(opts.file_ignore.split(',')) 292 opts.languages = frozenset(opts.languages.split(',')) 293 294 return opts,args 295 296if __name__ == '__main__': 297 parser = options() 298 opts, args = parse_args(parser) 299 300 for base in args: 301 for filename,language in find_files(base, languages=opts.languages, 302 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore): 303 if opts.dry_run: 304 print "%s: %s" % (filename, language) 305 else: 306 update_file(filename, filename, language, SortIncludes()) 307