sort_includes.py revision 10674
1#!/usr/bin/env python 2 3import os 4import re 5import sys 6 7from file_types import * 8 9cpp_c_headers = { 10 'assert.h' : 'cassert', 11 'ctype.h' : 'cctype', 12 'errno.h' : 'cerrno', 13 'float.h' : 'cfloat', 14 'limits.h' : 'climits', 15 'locale.h' : 'clocale', 16 'math.h' : 'cmath', 17 'setjmp.h' : 'csetjmp', 18 'signal.h' : 'csignal', 19 'stdarg.h' : 'cstdarg', 20 'stddef.h' : 'cstddef', 21 'stdio.h' : 'cstdio', 22 'stdlib.h' : 'cstdlib', 23 'string.h' : 'cstring', 24 'time.h' : 'ctime', 25 'wchar.h' : 'cwchar', 26 'wctype.h' : 'cwctype', 27} 28 29include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]') 30def include_key(line): 31 '''Mark directories with a leading space so directories 32 are sorted before files''' 33 34 match = include_re.match(line) 35 assert match, line 36 keyword = match.group(2) 37 include = match.group(3) 38 39 # Everything but the file part needs to have a space prepended 40 parts = include.split('/') 41 if len(parts) == 2 and parts[0] == 'dnet': 42 # Don't sort the dnet includes with respect to each other, but 43 # make them sorted with respect to non dnet includes. Python 44 # guarantees that sorting is stable, so just clear the 45 # basename part of the filename. 46 parts[1] = ' ' 47 parts[0:-1] = [ ' ' + s for s in parts[0:-1] ] 48 key = '/'.join(parts) 49 50 return key 51 52 53def _include_matcher(keyword="#include", delim="<>"): 54 """Match an include statement and return a (keyword, file, extra) 55 duple, or a touple of None values if there isn't a match.""" 56 57 rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1])) 58 59 def matcher(context, line): 60 m = rex.match(line) 61 return m.groups() if m else (None, ) * 3 62 63 return matcher 64 65def _include_matcher_fname(fname, **kwargs): 66 """Match an include of a specific file name. Any keyword arguments 67 are forwarded to _include_matcher, which is used to match the 68 actual include line.""" 69 70 rex = re.compile(fname) 71 base_matcher = _include_matcher(**kwargs) 72 73 def matcher(context, line): 74 (keyword, fname, extra) = base_matcher(context, line) 75 if fname and rex.match(fname): 76 return (keyword, fname, extra) 77 else: 78 return (None, ) * 3 79 80 return matcher 81 82 83def _include_matcher_main(): 84 """Match a C/C++ source file's primary header (i.e., a file with 85 the same base name, but a header extension).""" 86 87 base_matcher = _include_matcher(delim='""') 88 rex = re.compile(r"^src/(.*)\.([^.]+)$") 89 header_map = { 90 "c" : "h", 91 "cc" : "hh", 92 "cpp" : "hh", 93 } 94 def matcher(context, line): 95 m = rex.match(context["filename"]) 96 if not m: 97 return (None, ) * 3 98 base, ext = m.groups() 99 (keyword, fname, extra) = base_matcher(context, line) 100 try: 101 if fname == "%s.%s" % (base, header_map[ext]): 102 return (keyword, fname, extra) 103 except KeyError: 104 pass 105 106 return (None, ) * 3 107 108 return matcher 109 110class SortIncludes(object): 111 # different types of includes for different sorting of headers 112 # <Python.h> - Python header needs to be first if it exists 113 # <*.h> - system headers (directories before files) 114 # <*> - STL headers 115 # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files) 116 # "*" - M5 headers (directories before files) 117 includes_re = ( 118 ('main', '""', _include_matcher_main()), 119 ('python', '<>', _include_matcher_fname("^Python\.h$")), 120 ('c', '<>', _include_matcher_fname("^.*\.h$")), 121 ('stl', '<>', _include_matcher_fname("^\w+$")), 122 ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")), 123 ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')), 124 ('swig0', '<>', _include_matcher(keyword="%import")), 125 ('swig1', '<>', _include_matcher(keyword="%include")), 126 ('swig2', '""', _include_matcher(keyword="%import", delim='""')), 127 ('swig3', '""', _include_matcher(keyword="%include", delim='""')), 128 ) 129 130 block_order = ( 131 ('main', ), 132 ('python', ), 133 ('c', ), 134 ('stl', ), 135 ('cc', ), 136 ('m5header', ), 137 ('swig0', 'swig1', 'swig2', 'swig3', ), 138 ) 139 140 def __init__(self): 141 self.block_priority = {} 142 for prio, keys in enumerate(self.block_order): 143 for key in keys: 144 self.block_priority[key] = prio 145 146 def reset(self): 147 # clear all stored headers 148 self.includes = {} 149 150 def dump_blocks(self, block_types): 151 """Merge includes of from several block types into one large 152 block of sorted includes. This is useful when we have multiple 153 include block types (e.g., swig includes) with the same 154 priority.""" 155 156 includes = [] 157 for block_type in block_types: 158 try: 159 includes += self.includes[block_type] 160 except KeyError: 161 pass 162 163 return sorted(set(includes)) 164 165 def dump_includes(self): 166 blocks = [] 167 # Create a list of blocks in the prescribed include 168 # order. Each entry in the list is a multi-line string with 169 # multiple includes. 170 for types in self.block_order: 171 block = "\n".join(self.dump_blocks(types)) 172 if block: 173 blocks.append(block) 174 175 self.reset() 176 return "\n\n".join(blocks) 177 178 def __call__(self, lines, filename, language): 179 self.reset() 180 181 context = { 182 "filename" : filename, 183 "language" : language, 184 } 185 186 def match_line(line): 187 if not line: 188 return (None, line) 189 190 for include_type, (ldelim, rdelim), matcher in self.includes_re: 191 keyword, include, extra = matcher(context, line) 192 if keyword: 193 # if we've got a match, clean up the #include line, 194 # fix up stl headers and store it in the proper category 195 if include_type == 'c' and language == 'C++': 196 stl_inc = cpp_c_headers.get(include, None) 197 if stl_inc: 198 include = stl_inc 199 include_type = 'stl' 200 201 return (include_type, 202 keyword + ' ' + ldelim + include + rdelim + extra) 203 204 return (None, line) 205 206 processing_includes = False 207 for line in lines: 208 include_type, line = match_line(line) 209 if include_type: 210 try: 211 self.includes[include_type].append(line) 212 except KeyError: 213 self.includes[include_type] = [ line ] 214 215 processing_includes = True 216 elif processing_includes and not line.strip(): 217 # Skip empty lines while processing includes 218 pass 219 elif processing_includes: 220 # We are now exiting an include block 221 processing_includes = False 222 223 # Output pending includes, a new line between, and the 224 # current l. 225 yield self.dump_includes() 226 yield '' 227 yield line 228 else: 229 # We are not in an include block, so just emit the line 230 yield line 231 232 # We've reached EOF, so dump any pending includes 233 if processing_includes: 234 yield self.dump_includes() 235 236 237 238# default language types to try to apply our sorting rules to 239default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig')) 240 241def options(): 242 import optparse 243 options = optparse.OptionParser() 244 add_option = options.add_option 245 add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string', 246 default=','.join(default_dir_ignore), 247 help="ignore directories") 248 add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string', 249 default=','.join(default_file_ignore), 250 help="ignore files") 251 add_option('-l', '--languages', metavar="LANG[,LANG]", type='string', 252 default=','.join(default_languages), 253 help="languages") 254 add_option('-n', '--dry-run', action='store_true', 255 help="don't overwrite files") 256 257 return options 258 259def parse_args(parser): 260 opts,args = parser.parse_args() 261 262 opts.dir_ignore = frozenset(opts.dir_ignore.split(',')) 263 opts.file_ignore = frozenset(opts.file_ignore.split(',')) 264 opts.languages = frozenset(opts.languages.split(',')) 265 266 return opts,args 267 268if __name__ == '__main__': 269 parser = options() 270 opts, args = parse_args(parser) 271 272 for base in args: 273 for filename,language in find_files(base, languages=opts.languages, 274 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore): 275 if opts.dry_run: 276 print "%s: %s" % (filename, language) 277 else: 278 update_file(filename, filename, language, SortIncludes()) 279