sort_includes.py revision 12009:3345827969f5
1#!/usr/bin/env python2 2# 3# Copyright (c) 2014-2015 ARM Limited 4# All rights reserved 5# 6# The license below extends only to copyright in the software and shall 7# not be construed as granting a license to any other intellectual 8# property including but not limited to intellectual property relating 9# to a hardware implementation of the functionality of the software 10# licensed hereunder. You may use the software subject to the license 11# terms below provided that you ensure that this notice is replicated 12# unmodified and in its entirety in all distributions of the software, 13# modified or unmodified, in source code or in binary form. 14# 15# Copyright (c) 2011 The Hewlett-Packard Development Company 16# All rights reserved. 17# 18# Redistribution and use in source and binary forms, with or without 19# modification, are permitted provided that the following conditions are 20# met: redistributions of source code must retain the above copyright 21# notice, this list of conditions and the following disclaimer; 22# redistributions in binary form must reproduce the above copyright 23# notice, this list of conditions and the following disclaimer in the 24# documentation and/or other materials provided with the distribution; 25# neither the name of the copyright holders nor the names of its 26# contributors may be used to endorse or promote products derived from 27# this software without specific prior written permission. 28# 29# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40# 41# Authors: Nathan Binkert 42# Andreas Sandberg 43 44import os 45import re 46import sys 47 48from file_types import * 49 50cpp_c_headers = { 51 'assert.h' : 'cassert', 52 'ctype.h' : 'cctype', 53 'errno.h' : 'cerrno', 54 'float.h' : 'cfloat', 55 'limits.h' : 'climits', 56 'locale.h' : 'clocale', 57 'math.h' : 'cmath', 58 'setjmp.h' : 'csetjmp', 59 'signal.h' : 'csignal', 60 'stdarg.h' : 'cstdarg', 61 'stddef.h' : 'cstddef', 62 'stdio.h' : 'cstdio', 63 'stdlib.h' : 'cstdlib', 64 'string.h' : 'cstring', 65 'time.h' : 'ctime', 66 'wchar.h' : 'cwchar', 67 'wctype.h' : 'cwctype', 68} 69 70include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]') 71def include_key(line): 72 '''Mark directories with a leading space so directories 73 are sorted before files''' 74 75 match = include_re.match(line) 76 assert match, line 77 keyword = match.group(2) 78 include = match.group(3) 79 80 # Everything but the file part needs to have a space prepended 81 parts = include.split('/') 82 if len(parts) == 2 and parts[0] == 'dnet': 83 # Don't sort the dnet includes with respect to each other, but 84 # make them sorted with respect to non dnet includes. Python 85 # guarantees that sorting is stable, so just clear the 86 # basename part of the filename. 87 parts[1] = ' ' 88 parts[0:-1] = [ ' ' + s for s in parts[0:-1] ] 89 key = '/'.join(parts) 90 91 return key 92 93 94def _include_matcher(keyword="#include", delim="<>"): 95 """Match an include statement and return a (keyword, file, extra) 96 duple, or a touple of None values if there isn't a match.""" 97 98 rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1])) 99 100 def matcher(context, line): 101 m = rex.match(line) 102 return m.groups() if m else (None, ) * 3 103 104 return matcher 105 106def _include_matcher_fname(fname, **kwargs): 107 """Match an include of a specific file name. Any keyword arguments 108 are forwarded to _include_matcher, which is used to match the 109 actual include line.""" 110 111 rex = re.compile(fname) 112 base_matcher = _include_matcher(**kwargs) 113 114 def matcher(context, line): 115 (keyword, fname, extra) = base_matcher(context, line) 116 if fname and rex.match(fname): 117 return (keyword, fname, extra) 118 else: 119 return (None, ) * 3 120 121 return matcher 122 123 124def _include_matcher_main(): 125 """Match a C/C++ source file's primary header (i.e., a file with 126 the same base name, but a header extension).""" 127 128 base_matcher = _include_matcher(delim='""') 129 rex = re.compile(r"^src/(.*)\.([^.]+)$") 130 header_map = { 131 "c" : "h", 132 "cc" : "hh", 133 "cpp" : "hh", 134 } 135 def matcher(context, line): 136 m = rex.match(context["filename"]) 137 if not m: 138 return (None, ) * 3 139 base, ext = m.groups() 140 (keyword, fname, extra) = base_matcher(context, line) 141 try: 142 if fname == "%s.%s" % (base, header_map[ext]): 143 return (keyword, fname, extra) 144 except KeyError: 145 pass 146 147 return (None, ) * 3 148 149 return matcher 150 151class SortIncludes(object): 152 # different types of includes for different sorting of headers 153 # <Python.h> - Python header needs to be first if it exists 154 # <*.h> - system headers (directories before files) 155 # <*> - STL headers 156 # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files) 157 # "*" - M5 headers (directories before files) 158 includes_re = ( 159 ('main', '""', _include_matcher_main()), 160 ('python', '<>', _include_matcher_fname("^Python\.h$")), 161 ('pybind', '""', _include_matcher_fname("^pybind11/.*\.h$", 162 delim='""')), 163 ('c', '<>', _include_matcher_fname("^.*\.h$")), 164 ('stl', '<>', _include_matcher_fname("^\w+$")), 165 ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")), 166 ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')), 167 ('swig0', '<>', _include_matcher(keyword="%import")), 168 ('swig1', '<>', _include_matcher(keyword="%include")), 169 ('swig2', '""', _include_matcher(keyword="%import", delim='""')), 170 ('swig3', '""', _include_matcher(keyword="%include", delim='""')), 171 ) 172 173 block_order = ( 174 ('python', ), 175 ('pybind', ), 176 ('main', ), 177 ('c', ), 178 ('stl', ), 179 ('cc', ), 180 ('m5header', ), 181 ('swig0', 'swig1', 'swig2', 'swig3', ), 182 ) 183 184 def __init__(self): 185 self.block_priority = {} 186 for prio, keys in enumerate(self.block_order): 187 for key in keys: 188 self.block_priority[key] = prio 189 190 def reset(self): 191 # clear all stored headers 192 self.includes = {} 193 194 def dump_blocks(self, block_types): 195 """Merge includes of from several block types into one large 196 block of sorted includes. This is useful when we have multiple 197 include block types (e.g., swig includes) with the same 198 priority.""" 199 200 includes = [] 201 for block_type in block_types: 202 try: 203 includes += self.includes[block_type] 204 except KeyError: 205 pass 206 207 return sorted(set(includes)) 208 209 def dump_includes(self): 210 includes = [] 211 for types in self.block_order: 212 block = self.dump_blocks(types) 213 if includes and block: 214 includes.append("") 215 includes += block 216 217 self.reset() 218 return includes 219 220 def __call__(self, lines, filename, language): 221 self.reset() 222 223 context = { 224 "filename" : filename, 225 "language" : language, 226 } 227 228 def match_line(line): 229 if not line: 230 return (None, line) 231 232 for include_type, (ldelim, rdelim), matcher in self.includes_re: 233 keyword, include, extra = matcher(context, line) 234 if keyword: 235 # if we've got a match, clean up the #include line, 236 # fix up stl headers and store it in the proper category 237 if include_type == 'c' and language == 'C++': 238 stl_inc = cpp_c_headers.get(include, None) 239 if stl_inc: 240 include = stl_inc 241 include_type = 'stl' 242 243 return (include_type, 244 keyword + ' ' + ldelim + include + rdelim + extra) 245 246 return (None, line) 247 248 processing_includes = False 249 for line in lines: 250 include_type, line = match_line(line) 251 if include_type: 252 try: 253 self.includes[include_type].append(line) 254 except KeyError: 255 self.includes[include_type] = [ line ] 256 257 processing_includes = True 258 elif processing_includes and not line.strip(): 259 # Skip empty lines while processing includes 260 pass 261 elif processing_includes: 262 # We are now exiting an include block 263 processing_includes = False 264 265 # Output pending includes, a new line between, and the 266 # current l. 267 for include in self.dump_includes(): 268 yield include 269 yield '' 270 yield line 271 else: 272 # We are not in an include block, so just emit the line 273 yield line 274 275 # We've reached EOF, so dump any pending includes 276 if processing_includes: 277 for include in self.dump_includes(): 278 yield include 279 280# default language types to try to apply our sorting rules to 281default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig')) 282 283def options(): 284 import optparse 285 options = optparse.OptionParser() 286 add_option = options.add_option 287 add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string', 288 default=','.join(default_dir_ignore), 289 help="ignore directories") 290 add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string', 291 default=','.join(default_file_ignore), 292 help="ignore files") 293 add_option('-l', '--languages', metavar="LANG[,LANG]", type='string', 294 default=','.join(default_languages), 295 help="languages") 296 add_option('-n', '--dry-run', action='store_true', 297 help="don't overwrite files") 298 299 return options 300 301def parse_args(parser): 302 opts,args = parser.parse_args() 303 304 opts.dir_ignore = frozenset(opts.dir_ignore.split(',')) 305 opts.file_ignore = frozenset(opts.file_ignore.split(',')) 306 opts.languages = frozenset(opts.languages.split(',')) 307 308 return opts,args 309 310if __name__ == '__main__': 311 parser = options() 312 opts, args = parse_args(parser) 313 314 for base in args: 315 for filename,language in find_files(base, languages=opts.languages, 316 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore): 317 if opts.dry_run: 318 print "%s: %s" % (filename, language) 319 else: 320 update_file(filename, filename, language, SortIncludes()) 321