sort_includes.py revision 11397
1#!/usr/bin/env python 2 3# Copyright (c) 2011 The Hewlett-Packard Development Company 4# All rights reserved. 5# 6# Redistribution and use in source and binary forms, with or without 7# modification, are permitted provided that the following conditions are 8# met: redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer; 10# redistributions in binary form must reproduce the above copyright 11# notice, this list of conditions and the following disclaimer in the 12# documentation and/or other materials provided with the distribution; 13# neither the name of the copyright holders nor the names of its 14# contributors may be used to endorse or promote products derived from 15# this software without specific prior written permission. 16# 17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28# 29# Authors: Nathan Binkert 30 31import os 32import re 33import sys 34 35from file_types import * 36 37cpp_c_headers = { 38 'assert.h' : 'cassert', 39 'ctype.h' : 'cctype', 40 'errno.h' : 'cerrno', 41 'float.h' : 'cfloat', 42 'limits.h' : 'climits', 43 'locale.h' : 'clocale', 44 'math.h' : 'cmath', 45 'setjmp.h' : 'csetjmp', 46 'signal.h' : 'csignal', 47 'stdarg.h' : 'cstdarg', 48 'stddef.h' : 'cstddef', 49 'stdio.h' : 'cstdio', 50 'stdlib.h' : 'cstdlib', 51 'string.h' : 'cstring', 52 'time.h' : 'ctime', 53 'wchar.h' : 'cwchar', 54 'wctype.h' : 'cwctype', 55} 56 57include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]') 58def include_key(line): 59 '''Mark directories with a leading space so directories 60 are sorted before files''' 61 62 match = include_re.match(line) 63 assert match, line 64 keyword = match.group(2) 65 include = match.group(3) 66 67 # Everything but the file part needs to have a space prepended 68 parts = include.split('/') 69 if len(parts) == 2 and parts[0] == 'dnet': 70 # Don't sort the dnet includes with respect to each other, but 71 # make them sorted with respect to non dnet includes. Python 72 # guarantees that sorting is stable, so just clear the 73 # basename part of the filename. 74 parts[1] = ' ' 75 parts[0:-1] = [ ' ' + s for s in parts[0:-1] ] 76 key = '/'.join(parts) 77 78 return key 79 80 81def _include_matcher(keyword="#include", delim="<>"): 82 """Match an include statement and return a (keyword, file, extra) 83 duple, or a touple of None values if there isn't a match.""" 84 85 rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1])) 86 87 def matcher(context, line): 88 m = rex.match(line) 89 return m.groups() if m else (None, ) * 3 90 91 return matcher 92 93def _include_matcher_fname(fname, **kwargs): 94 """Match an include of a specific file name. Any keyword arguments 95 are forwarded to _include_matcher, which is used to match the 96 actual include line.""" 97 98 rex = re.compile(fname) 99 base_matcher = _include_matcher(**kwargs) 100 101 def matcher(context, line): 102 (keyword, fname, extra) = base_matcher(context, line) 103 if fname and rex.match(fname): 104 return (keyword, fname, extra) 105 else: 106 return (None, ) * 3 107 108 return matcher 109 110 111def _include_matcher_main(): 112 """Match a C/C++ source file's primary header (i.e., a file with 113 the same base name, but a header extension).""" 114 115 base_matcher = _include_matcher(delim='""') 116 rex = re.compile(r"^src/(.*)\.([^.]+)$") 117 header_map = { 118 "c" : "h", 119 "cc" : "hh", 120 "cpp" : "hh", 121 } 122 def matcher(context, line): 123 m = rex.match(context["filename"]) 124 if not m: 125 return (None, ) * 3 126 base, ext = m.groups() 127 (keyword, fname, extra) = base_matcher(context, line) 128 try: 129 if fname == "%s.%s" % (base, header_map[ext]): 130 return (keyword, fname, extra) 131 except KeyError: 132 pass 133 134 return (None, ) * 3 135 136 return matcher 137 138class SortIncludes(object): 139 # different types of includes for different sorting of headers 140 # <Python.h> - Python header needs to be first if it exists 141 # <*.h> - system headers (directories before files) 142 # <*> - STL headers 143 # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files) 144 # "*" - M5 headers (directories before files) 145 includes_re = ( 146 ('main', '""', _include_matcher_main()), 147 ('python', '<>', _include_matcher_fname("^Python\.h$")), 148 ('c', '<>', _include_matcher_fname("^.*\.h$")), 149 ('stl', '<>', _include_matcher_fname("^\w+$")), 150 ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")), 151 ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')), 152 ('swig0', '<>', _include_matcher(keyword="%import")), 153 ('swig1', '<>', _include_matcher(keyword="%include")), 154 ('swig2', '""', _include_matcher(keyword="%import", delim='""')), 155 ('swig3', '""', _include_matcher(keyword="%include", delim='""')), 156 ) 157 158 block_order = ( 159 ('main', ), 160 ('python', ), 161 ('c', ), 162 ('stl', ), 163 ('cc', ), 164 ('m5header', ), 165 ('swig0', 'swig1', 'swig2', 'swig3', ), 166 ) 167 168 def __init__(self): 169 self.block_priority = {} 170 for prio, keys in enumerate(self.block_order): 171 for key in keys: 172 self.block_priority[key] = prio 173 174 def reset(self): 175 # clear all stored headers 176 self.includes = {} 177 178 def dump_blocks(self, block_types): 179 """Merge includes of from several block types into one large 180 block of sorted includes. This is useful when we have multiple 181 include block types (e.g., swig includes) with the same 182 priority.""" 183 184 includes = [] 185 for block_type in block_types: 186 try: 187 includes += self.includes[block_type] 188 except KeyError: 189 pass 190 191 return sorted(set(includes)) 192 193 def dump_includes(self): 194 blocks = [] 195 # Create a list of blocks in the prescribed include 196 # order. Each entry in the list is a multi-line string with 197 # multiple includes. 198 for types in self.block_order: 199 block = "\n".join(self.dump_blocks(types)) 200 if block: 201 blocks.append(block) 202 203 self.reset() 204 return "\n\n".join(blocks) 205 206 def __call__(self, lines, filename, language): 207 self.reset() 208 209 context = { 210 "filename" : filename, 211 "language" : language, 212 } 213 214 def match_line(line): 215 if not line: 216 return (None, line) 217 218 for include_type, (ldelim, rdelim), matcher in self.includes_re: 219 keyword, include, extra = matcher(context, line) 220 if keyword: 221 # if we've got a match, clean up the #include line, 222 # fix up stl headers and store it in the proper category 223 if include_type == 'c' and language == 'C++': 224 stl_inc = cpp_c_headers.get(include, None) 225 if stl_inc: 226 include = stl_inc 227 include_type = 'stl' 228 229 return (include_type, 230 keyword + ' ' + ldelim + include + rdelim + extra) 231 232 return (None, line) 233 234 processing_includes = False 235 for line in lines: 236 include_type, line = match_line(line) 237 if include_type: 238 try: 239 self.includes[include_type].append(line) 240 except KeyError: 241 self.includes[include_type] = [ line ] 242 243 processing_includes = True 244 elif processing_includes and not line.strip(): 245 # Skip empty lines while processing includes 246 pass 247 elif processing_includes: 248 # We are now exiting an include block 249 processing_includes = False 250 251 # Output pending includes, a new line between, and the 252 # current l. 253 yield self.dump_includes() 254 yield '' 255 yield line 256 else: 257 # We are not in an include block, so just emit the line 258 yield line 259 260 # We've reached EOF, so dump any pending includes 261 if processing_includes: 262 yield self.dump_includes() 263 264 265 266# default language types to try to apply our sorting rules to 267default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig')) 268 269def options(): 270 import optparse 271 options = optparse.OptionParser() 272 add_option = options.add_option 273 add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string', 274 default=','.join(default_dir_ignore), 275 help="ignore directories") 276 add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string', 277 default=','.join(default_file_ignore), 278 help="ignore files") 279 add_option('-l', '--languages', metavar="LANG[,LANG]", type='string', 280 default=','.join(default_languages), 281 help="languages") 282 add_option('-n', '--dry-run', action='store_true', 283 help="don't overwrite files") 284 285 return options 286 287def parse_args(parser): 288 opts,args = parser.parse_args() 289 290 opts.dir_ignore = frozenset(opts.dir_ignore.split(',')) 291 opts.file_ignore = frozenset(opts.file_ignore.split(',')) 292 opts.languages = frozenset(opts.languages.split(',')) 293 294 return opts,args 295 296if __name__ == '__main__': 297 parser = options() 298 opts, args = parse_args(parser) 299 300 for base in args: 301 for filename,language in find_files(base, languages=opts.languages, 302 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore): 303 if opts.dry_run: 304 print "%s: %s" % (filename, language) 305 else: 306 update_file(filename, filename, language, SortIncludes()) 307