sort_includes.py revision 11409:72f80dd8b194
1#!/usr/bin/env python 2# 3# Copyright (c) 2014-2015 ARM Limited 4# All rights reserved 5# 6# The license below extends only to copyright in the software and shall 7# not be construed as granting a license to any other intellectual 8# property including but not limited to intellectual property relating 9# to a hardware implementation of the functionality of the software 10# licensed hereunder. You may use the software subject to the license 11# terms below provided that you ensure that this notice is replicated 12# unmodified and in its entirety in all distributions of the software, 13# modified or unmodified, in source code or in binary form. 14# 15# Copyright (c) 2011 The Hewlett-Packard Development Company 16# All rights reserved. 17# 18# Redistribution and use in source and binary forms, with or without 19# modification, are permitted provided that the following conditions are 20# met: redistributions of source code must retain the above copyright 21# notice, this list of conditions and the following disclaimer; 22# redistributions in binary form must reproduce the above copyright 23# notice, this list of conditions and the following disclaimer in the 24# documentation and/or other materials provided with the distribution; 25# neither the name of the copyright holders nor the names of its 26# contributors may be used to endorse or promote products derived from 27# this software without specific prior written permission. 28# 29# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40# 41# Authors: Nathan Binkert 42# Andreas Sandberg 43 44import os 45import re 46import sys 47 48from file_types import * 49 50cpp_c_headers = { 51 'assert.h' : 'cassert', 52 'ctype.h' : 'cctype', 53 'errno.h' : 'cerrno', 54 'float.h' : 'cfloat', 55 'limits.h' : 'climits', 56 'locale.h' : 'clocale', 57 'math.h' : 'cmath', 58 'setjmp.h' : 'csetjmp', 59 'signal.h' : 'csignal', 60 'stdarg.h' : 'cstdarg', 61 'stddef.h' : 'cstddef', 62 'stdio.h' : 'cstdio', 63 'stdlib.h' : 'cstdlib', 64 'string.h' : 'cstring', 65 'time.h' : 'ctime', 66 'wchar.h' : 'cwchar', 67 'wctype.h' : 'cwctype', 68} 69 70include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]') 71def include_key(line): 72 '''Mark directories with a leading space so directories 73 are sorted before files''' 74 75 match = include_re.match(line) 76 assert match, line 77 keyword = match.group(2) 78 include = match.group(3) 79 80 # Everything but the file part needs to have a space prepended 81 parts = include.split('/') 82 if len(parts) == 2 and parts[0] == 'dnet': 83 # Don't sort the dnet includes with respect to each other, but 84 # make them sorted with respect to non dnet includes. Python 85 # guarantees that sorting is stable, so just clear the 86 # basename part of the filename. 87 parts[1] = ' ' 88 parts[0:-1] = [ ' ' + s for s in parts[0:-1] ] 89 key = '/'.join(parts) 90 91 return key 92 93 94def _include_matcher(keyword="#include", delim="<>"): 95 """Match an include statement and return a (keyword, file, extra) 96 duple, or a touple of None values if there isn't a match.""" 97 98 rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1])) 99 100 def matcher(context, line): 101 m = rex.match(line) 102 return m.groups() if m else (None, ) * 3 103 104 return matcher 105 106def _include_matcher_fname(fname, **kwargs): 107 """Match an include of a specific file name. Any keyword arguments 108 are forwarded to _include_matcher, which is used to match the 109 actual include line.""" 110 111 rex = re.compile(fname) 112 base_matcher = _include_matcher(**kwargs) 113 114 def matcher(context, line): 115 (keyword, fname, extra) = base_matcher(context, line) 116 if fname and rex.match(fname): 117 return (keyword, fname, extra) 118 else: 119 return (None, ) * 3 120 121 return matcher 122 123 124def _include_matcher_main(): 125 """Match a C/C++ source file's primary header (i.e., a file with 126 the same base name, but a header extension).""" 127 128 base_matcher = _include_matcher(delim='""') 129 rex = re.compile(r"^src/(.*)\.([^.]+)$") 130 header_map = { 131 "c" : "h", 132 "cc" : "hh", 133 "cpp" : "hh", 134 } 135 def matcher(context, line): 136 m = rex.match(context["filename"]) 137 if not m: 138 return (None, ) * 3 139 base, ext = m.groups() 140 (keyword, fname, extra) = base_matcher(context, line) 141 try: 142 if fname == "%s.%s" % (base, header_map[ext]): 143 return (keyword, fname, extra) 144 except KeyError: 145 pass 146 147 return (None, ) * 3 148 149 return matcher 150 151class SortIncludes(object): 152 # different types of includes for different sorting of headers 153 # <Python.h> - Python header needs to be first if it exists 154 # <*.h> - system headers (directories before files) 155 # <*> - STL headers 156 # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files) 157 # "*" - M5 headers (directories before files) 158 includes_re = ( 159 ('main', '""', _include_matcher_main()), 160 ('python', '<>', _include_matcher_fname("^Python\.h$")), 161 ('c', '<>', _include_matcher_fname("^.*\.h$")), 162 ('stl', '<>', _include_matcher_fname("^\w+$")), 163 ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")), 164 ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')), 165 ('swig0', '<>', _include_matcher(keyword="%import")), 166 ('swig1', '<>', _include_matcher(keyword="%include")), 167 ('swig2', '""', _include_matcher(keyword="%import", delim='""')), 168 ('swig3', '""', _include_matcher(keyword="%include", delim='""')), 169 ) 170 171 block_order = ( 172 ('main', ), 173 ('python', ), 174 ('c', ), 175 ('stl', ), 176 ('cc', ), 177 ('m5header', ), 178 ('swig0', 'swig1', 'swig2', 'swig3', ), 179 ) 180 181 def __init__(self): 182 self.block_priority = {} 183 for prio, keys in enumerate(self.block_order): 184 for key in keys: 185 self.block_priority[key] = prio 186 187 def reset(self): 188 # clear all stored headers 189 self.includes = {} 190 191 def dump_blocks(self, block_types): 192 """Merge includes of from several block types into one large 193 block of sorted includes. This is useful when we have multiple 194 include block types (e.g., swig includes) with the same 195 priority.""" 196 197 includes = [] 198 for block_type in block_types: 199 try: 200 includes += self.includes[block_type] 201 except KeyError: 202 pass 203 204 return sorted(set(includes)) 205 206 def dump_includes(self): 207 includes = [] 208 for types in self.block_order: 209 block = self.dump_blocks(types) 210 if includes and block: 211 includes.append("") 212 includes += block 213 214 self.reset() 215 return includes 216 217 def __call__(self, lines, filename, language): 218 self.reset() 219 220 context = { 221 "filename" : filename, 222 "language" : language, 223 } 224 225 def match_line(line): 226 if not line: 227 return (None, line) 228 229 for include_type, (ldelim, rdelim), matcher in self.includes_re: 230 keyword, include, extra = matcher(context, line) 231 if keyword: 232 # if we've got a match, clean up the #include line, 233 # fix up stl headers and store it in the proper category 234 if include_type == 'c' and language == 'C++': 235 stl_inc = cpp_c_headers.get(include, None) 236 if stl_inc: 237 include = stl_inc 238 include_type = 'stl' 239 240 return (include_type, 241 keyword + ' ' + ldelim + include + rdelim + extra) 242 243 return (None, line) 244 245 processing_includes = False 246 for line in lines: 247 include_type, line = match_line(line) 248 if include_type: 249 try: 250 self.includes[include_type].append(line) 251 except KeyError: 252 self.includes[include_type] = [ line ] 253 254 processing_includes = True 255 elif processing_includes and not line.strip(): 256 # Skip empty lines while processing includes 257 pass 258 elif processing_includes: 259 # We are now exiting an include block 260 processing_includes = False 261 262 # Output pending includes, a new line between, and the 263 # current l. 264 for include in self.dump_includes(): 265 yield include 266 yield '' 267 yield line 268 else: 269 # We are not in an include block, so just emit the line 270 yield line 271 272 # We've reached EOF, so dump any pending includes 273 if processing_includes: 274 for include in self.dump_includes(): 275 yield include 276 277# default language types to try to apply our sorting rules to 278default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig')) 279 280def options(): 281 import optparse 282 options = optparse.OptionParser() 283 add_option = options.add_option 284 add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string', 285 default=','.join(default_dir_ignore), 286 help="ignore directories") 287 add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string', 288 default=','.join(default_file_ignore), 289 help="ignore files") 290 add_option('-l', '--languages', metavar="LANG[,LANG]", type='string', 291 default=','.join(default_languages), 292 help="languages") 293 add_option('-n', '--dry-run', action='store_true', 294 help="don't overwrite files") 295 296 return options 297 298def parse_args(parser): 299 opts,args = parser.parse_args() 300 301 opts.dir_ignore = frozenset(opts.dir_ignore.split(',')) 302 opts.file_ignore = frozenset(opts.file_ignore.split(',')) 303 opts.languages = frozenset(opts.languages.split(',')) 304 305 return opts,args 306 307if __name__ == '__main__': 308 parser = options() 309 opts, args = parse_args(parser) 310 311 for base in args: 312 for filename,language in find_files(base, languages=opts.languages, 313 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore): 314 if opts.dry_run: 315 print "%s: %s" % (filename, language) 316 else: 317 update_file(filename, filename, language, SortIncludes()) 318