sort_includes.py (11828:36b064696175) sort_includes.py (12009:3345827969f5)
1#!/usr/bin/env python2
2#
3# Copyright (c) 2014-2015 ARM Limited
4# All rights reserved
5#
6# The license below extends only to copyright in the software and shall
7# not be construed as granting a license to any other intellectual
8# property including but not limited to intellectual property relating
9# to a hardware implementation of the functionality of the software
10# licensed hereunder. You may use the software subject to the license
11# terms below provided that you ensure that this notice is replicated
12# unmodified and in its entirety in all distributions of the software,
13# modified or unmodified, in source code or in binary form.
14#
15# Copyright (c) 2011 The Hewlett-Packard Development Company
16# All rights reserved.
17#
18# Redistribution and use in source and binary forms, with or without
19# modification, are permitted provided that the following conditions are
20# met: redistributions of source code must retain the above copyright
21# notice, this list of conditions and the following disclaimer;
22# redistributions in binary form must reproduce the above copyright
23# notice, this list of conditions and the following disclaimer in the
24# documentation and/or other materials provided with the distribution;
25# neither the name of the copyright holders nor the names of its
26# contributors may be used to endorse or promote products derived from
27# this software without specific prior written permission.
28#
29# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40#
41# Authors: Nathan Binkert
42# Andreas Sandberg
43
44import os
45import re
46import sys
47
48from file_types import *
49
50cpp_c_headers = {
51 'assert.h' : 'cassert',
52 'ctype.h' : 'cctype',
53 'errno.h' : 'cerrno',
54 'float.h' : 'cfloat',
55 'limits.h' : 'climits',
56 'locale.h' : 'clocale',
57 'math.h' : 'cmath',
58 'setjmp.h' : 'csetjmp',
59 'signal.h' : 'csignal',
60 'stdarg.h' : 'cstdarg',
61 'stddef.h' : 'cstddef',
62 'stdio.h' : 'cstdio',
63 'stdlib.h' : 'cstdlib',
64 'string.h' : 'cstring',
65 'time.h' : 'ctime',
66 'wchar.h' : 'cwchar',
67 'wctype.h' : 'cwctype',
68}
69
70include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
71def include_key(line):
72 '''Mark directories with a leading space so directories
73 are sorted before files'''
74
75 match = include_re.match(line)
76 assert match, line
77 keyword = match.group(2)
78 include = match.group(3)
79
80 # Everything but the file part needs to have a space prepended
81 parts = include.split('/')
82 if len(parts) == 2 and parts[0] == 'dnet':
83 # Don't sort the dnet includes with respect to each other, but
84 # make them sorted with respect to non dnet includes. Python
85 # guarantees that sorting is stable, so just clear the
86 # basename part of the filename.
87 parts[1] = ' '
88 parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
89 key = '/'.join(parts)
90
91 return key
92
93
94def _include_matcher(keyword="#include", delim="<>"):
95 """Match an include statement and return a (keyword, file, extra)
96 duple, or a touple of None values if there isn't a match."""
97
98 rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
99
100 def matcher(context, line):
101 m = rex.match(line)
102 return m.groups() if m else (None, ) * 3
103
104 return matcher
105
106def _include_matcher_fname(fname, **kwargs):
107 """Match an include of a specific file name. Any keyword arguments
108 are forwarded to _include_matcher, which is used to match the
109 actual include line."""
110
111 rex = re.compile(fname)
112 base_matcher = _include_matcher(**kwargs)
113
114 def matcher(context, line):
115 (keyword, fname, extra) = base_matcher(context, line)
116 if fname and rex.match(fname):
117 return (keyword, fname, extra)
118 else:
119 return (None, ) * 3
120
121 return matcher
122
123
124def _include_matcher_main():
125 """Match a C/C++ source file's primary header (i.e., a file with
126 the same base name, but a header extension)."""
127
128 base_matcher = _include_matcher(delim='""')
129 rex = re.compile(r"^src/(.*)\.([^.]+)$")
130 header_map = {
131 "c" : "h",
132 "cc" : "hh",
133 "cpp" : "hh",
134 }
135 def matcher(context, line):
136 m = rex.match(context["filename"])
137 if not m:
138 return (None, ) * 3
139 base, ext = m.groups()
140 (keyword, fname, extra) = base_matcher(context, line)
141 try:
142 if fname == "%s.%s" % (base, header_map[ext]):
143 return (keyword, fname, extra)
144 except KeyError:
145 pass
146
147 return (None, ) * 3
148
149 return matcher
150
151class SortIncludes(object):
152 # different types of includes for different sorting of headers
153 # <Python.h> - Python header needs to be first if it exists
154 # <*.h> - system headers (directories before files)
155 # <*> - STL headers
156 # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
157 # "*" - M5 headers (directories before files)
158 includes_re = (
159 ('main', '""', _include_matcher_main()),
160 ('python', '<>', _include_matcher_fname("^Python\.h$")),
1#!/usr/bin/env python2
2#
3# Copyright (c) 2014-2015 ARM Limited
4# All rights reserved
5#
6# The license below extends only to copyright in the software and shall
7# not be construed as granting a license to any other intellectual
8# property including but not limited to intellectual property relating
9# to a hardware implementation of the functionality of the software
10# licensed hereunder. You may use the software subject to the license
11# terms below provided that you ensure that this notice is replicated
12# unmodified and in its entirety in all distributions of the software,
13# modified or unmodified, in source code or in binary form.
14#
15# Copyright (c) 2011 The Hewlett-Packard Development Company
16# All rights reserved.
17#
18# Redistribution and use in source and binary forms, with or without
19# modification, are permitted provided that the following conditions are
20# met: redistributions of source code must retain the above copyright
21# notice, this list of conditions and the following disclaimer;
22# redistributions in binary form must reproduce the above copyright
23# notice, this list of conditions and the following disclaimer in the
24# documentation and/or other materials provided with the distribution;
25# neither the name of the copyright holders nor the names of its
26# contributors may be used to endorse or promote products derived from
27# this software without specific prior written permission.
28#
29# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40#
41# Authors: Nathan Binkert
42# Andreas Sandberg
43
44import os
45import re
46import sys
47
48from file_types import *
49
50cpp_c_headers = {
51 'assert.h' : 'cassert',
52 'ctype.h' : 'cctype',
53 'errno.h' : 'cerrno',
54 'float.h' : 'cfloat',
55 'limits.h' : 'climits',
56 'locale.h' : 'clocale',
57 'math.h' : 'cmath',
58 'setjmp.h' : 'csetjmp',
59 'signal.h' : 'csignal',
60 'stdarg.h' : 'cstdarg',
61 'stddef.h' : 'cstddef',
62 'stdio.h' : 'cstdio',
63 'stdlib.h' : 'cstdlib',
64 'string.h' : 'cstring',
65 'time.h' : 'ctime',
66 'wchar.h' : 'cwchar',
67 'wctype.h' : 'cwctype',
68}
69
70include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
71def include_key(line):
72 '''Mark directories with a leading space so directories
73 are sorted before files'''
74
75 match = include_re.match(line)
76 assert match, line
77 keyword = match.group(2)
78 include = match.group(3)
79
80 # Everything but the file part needs to have a space prepended
81 parts = include.split('/')
82 if len(parts) == 2 and parts[0] == 'dnet':
83 # Don't sort the dnet includes with respect to each other, but
84 # make them sorted with respect to non dnet includes. Python
85 # guarantees that sorting is stable, so just clear the
86 # basename part of the filename.
87 parts[1] = ' '
88 parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
89 key = '/'.join(parts)
90
91 return key
92
93
94def _include_matcher(keyword="#include", delim="<>"):
95 """Match an include statement and return a (keyword, file, extra)
96 duple, or a touple of None values if there isn't a match."""
97
98 rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
99
100 def matcher(context, line):
101 m = rex.match(line)
102 return m.groups() if m else (None, ) * 3
103
104 return matcher
105
106def _include_matcher_fname(fname, **kwargs):
107 """Match an include of a specific file name. Any keyword arguments
108 are forwarded to _include_matcher, which is used to match the
109 actual include line."""
110
111 rex = re.compile(fname)
112 base_matcher = _include_matcher(**kwargs)
113
114 def matcher(context, line):
115 (keyword, fname, extra) = base_matcher(context, line)
116 if fname and rex.match(fname):
117 return (keyword, fname, extra)
118 else:
119 return (None, ) * 3
120
121 return matcher
122
123
124def _include_matcher_main():
125 """Match a C/C++ source file's primary header (i.e., a file with
126 the same base name, but a header extension)."""
127
128 base_matcher = _include_matcher(delim='""')
129 rex = re.compile(r"^src/(.*)\.([^.]+)$")
130 header_map = {
131 "c" : "h",
132 "cc" : "hh",
133 "cpp" : "hh",
134 }
135 def matcher(context, line):
136 m = rex.match(context["filename"])
137 if not m:
138 return (None, ) * 3
139 base, ext = m.groups()
140 (keyword, fname, extra) = base_matcher(context, line)
141 try:
142 if fname == "%s.%s" % (base, header_map[ext]):
143 return (keyword, fname, extra)
144 except KeyError:
145 pass
146
147 return (None, ) * 3
148
149 return matcher
150
151class SortIncludes(object):
152 # different types of includes for different sorting of headers
153 # <Python.h> - Python header needs to be first if it exists
154 # <*.h> - system headers (directories before files)
155 # <*> - STL headers
156 # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
157 # "*" - M5 headers (directories before files)
158 includes_re = (
159 ('main', '""', _include_matcher_main()),
160 ('python', '<>', _include_matcher_fname("^Python\.h$")),
161 ('pybind', '""', _include_matcher_fname("^pybind11/.*\.h$",
162 delim='""')),
161 ('c', '<>', _include_matcher_fname("^.*\.h$")),
162 ('stl', '<>', _include_matcher_fname("^\w+$")),
163 ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
164 ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
165 ('swig0', '<>', _include_matcher(keyword="%import")),
166 ('swig1', '<>', _include_matcher(keyword="%include")),
167 ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
168 ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
169 )
170
171 block_order = (
172 ('python', ),
163 ('c', '<>', _include_matcher_fname("^.*\.h$")),
164 ('stl', '<>', _include_matcher_fname("^\w+$")),
165 ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
166 ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
167 ('swig0', '<>', _include_matcher(keyword="%import")),
168 ('swig1', '<>', _include_matcher(keyword="%include")),
169 ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
170 ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
171 )
172
173 block_order = (
174 ('python', ),
175 ('pybind', ),
173 ('main', ),
174 ('c', ),
175 ('stl', ),
176 ('cc', ),
177 ('m5header', ),
178 ('swig0', 'swig1', 'swig2', 'swig3', ),
179 )
180
181 def __init__(self):
182 self.block_priority = {}
183 for prio, keys in enumerate(self.block_order):
184 for key in keys:
185 self.block_priority[key] = prio
186
187 def reset(self):
188 # clear all stored headers
189 self.includes = {}
190
191 def dump_blocks(self, block_types):
192 """Merge includes of from several block types into one large
193 block of sorted includes. This is useful when we have multiple
194 include block types (e.g., swig includes) with the same
195 priority."""
196
197 includes = []
198 for block_type in block_types:
199 try:
200 includes += self.includes[block_type]
201 except KeyError:
202 pass
203
204 return sorted(set(includes))
205
206 def dump_includes(self):
207 includes = []
208 for types in self.block_order:
209 block = self.dump_blocks(types)
210 if includes and block:
211 includes.append("")
212 includes += block
213
214 self.reset()
215 return includes
216
217 def __call__(self, lines, filename, language):
218 self.reset()
219
220 context = {
221 "filename" : filename,
222 "language" : language,
223 }
224
225 def match_line(line):
226 if not line:
227 return (None, line)
228
229 for include_type, (ldelim, rdelim), matcher in self.includes_re:
230 keyword, include, extra = matcher(context, line)
231 if keyword:
232 # if we've got a match, clean up the #include line,
233 # fix up stl headers and store it in the proper category
234 if include_type == 'c' and language == 'C++':
235 stl_inc = cpp_c_headers.get(include, None)
236 if stl_inc:
237 include = stl_inc
238 include_type = 'stl'
239
240 return (include_type,
241 keyword + ' ' + ldelim + include + rdelim + extra)
242
243 return (None, line)
244
245 processing_includes = False
246 for line in lines:
247 include_type, line = match_line(line)
248 if include_type:
249 try:
250 self.includes[include_type].append(line)
251 except KeyError:
252 self.includes[include_type] = [ line ]
253
254 processing_includes = True
255 elif processing_includes and not line.strip():
256 # Skip empty lines while processing includes
257 pass
258 elif processing_includes:
259 # We are now exiting an include block
260 processing_includes = False
261
262 # Output pending includes, a new line between, and the
263 # current l.
264 for include in self.dump_includes():
265 yield include
266 yield ''
267 yield line
268 else:
269 # We are not in an include block, so just emit the line
270 yield line
271
272 # We've reached EOF, so dump any pending includes
273 if processing_includes:
274 for include in self.dump_includes():
275 yield include
276
277# default language types to try to apply our sorting rules to
278default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
279
280def options():
281 import optparse
282 options = optparse.OptionParser()
283 add_option = options.add_option
284 add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
285 default=','.join(default_dir_ignore),
286 help="ignore directories")
287 add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
288 default=','.join(default_file_ignore),
289 help="ignore files")
290 add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
291 default=','.join(default_languages),
292 help="languages")
293 add_option('-n', '--dry-run', action='store_true',
294 help="don't overwrite files")
295
296 return options
297
298def parse_args(parser):
299 opts,args = parser.parse_args()
300
301 opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
302 opts.file_ignore = frozenset(opts.file_ignore.split(','))
303 opts.languages = frozenset(opts.languages.split(','))
304
305 return opts,args
306
307if __name__ == '__main__':
308 parser = options()
309 opts, args = parse_args(parser)
310
311 for base in args:
312 for filename,language in find_files(base, languages=opts.languages,
313 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
314 if opts.dry_run:
315 print "%s: %s" % (filename, language)
316 else:
317 update_file(filename, filename, language, SortIncludes())
176 ('main', ),
177 ('c', ),
178 ('stl', ),
179 ('cc', ),
180 ('m5header', ),
181 ('swig0', 'swig1', 'swig2', 'swig3', ),
182 )
183
184 def __init__(self):
185 self.block_priority = {}
186 for prio, keys in enumerate(self.block_order):
187 for key in keys:
188 self.block_priority[key] = prio
189
190 def reset(self):
191 # clear all stored headers
192 self.includes = {}
193
194 def dump_blocks(self, block_types):
195 """Merge includes of from several block types into one large
196 block of sorted includes. This is useful when we have multiple
197 include block types (e.g., swig includes) with the same
198 priority."""
199
200 includes = []
201 for block_type in block_types:
202 try:
203 includes += self.includes[block_type]
204 except KeyError:
205 pass
206
207 return sorted(set(includes))
208
209 def dump_includes(self):
210 includes = []
211 for types in self.block_order:
212 block = self.dump_blocks(types)
213 if includes and block:
214 includes.append("")
215 includes += block
216
217 self.reset()
218 return includes
219
220 def __call__(self, lines, filename, language):
221 self.reset()
222
223 context = {
224 "filename" : filename,
225 "language" : language,
226 }
227
228 def match_line(line):
229 if not line:
230 return (None, line)
231
232 for include_type, (ldelim, rdelim), matcher in self.includes_re:
233 keyword, include, extra = matcher(context, line)
234 if keyword:
235 # if we've got a match, clean up the #include line,
236 # fix up stl headers and store it in the proper category
237 if include_type == 'c' and language == 'C++':
238 stl_inc = cpp_c_headers.get(include, None)
239 if stl_inc:
240 include = stl_inc
241 include_type = 'stl'
242
243 return (include_type,
244 keyword + ' ' + ldelim + include + rdelim + extra)
245
246 return (None, line)
247
248 processing_includes = False
249 for line in lines:
250 include_type, line = match_line(line)
251 if include_type:
252 try:
253 self.includes[include_type].append(line)
254 except KeyError:
255 self.includes[include_type] = [ line ]
256
257 processing_includes = True
258 elif processing_includes and not line.strip():
259 # Skip empty lines while processing includes
260 pass
261 elif processing_includes:
262 # We are now exiting an include block
263 processing_includes = False
264
265 # Output pending includes, a new line between, and the
266 # current l.
267 for include in self.dump_includes():
268 yield include
269 yield ''
270 yield line
271 else:
272 # We are not in an include block, so just emit the line
273 yield line
274
275 # We've reached EOF, so dump any pending includes
276 if processing_includes:
277 for include in self.dump_includes():
278 yield include
279
280# default language types to try to apply our sorting rules to
281default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
282
283def options():
284 import optparse
285 options = optparse.OptionParser()
286 add_option = options.add_option
287 add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
288 default=','.join(default_dir_ignore),
289 help="ignore directories")
290 add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
291 default=','.join(default_file_ignore),
292 help="ignore files")
293 add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
294 default=','.join(default_languages),
295 help="languages")
296 add_option('-n', '--dry-run', action='store_true',
297 help="don't overwrite files")
298
299 return options
300
301def parse_args(parser):
302 opts,args = parser.parse_args()
303
304 opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
305 opts.file_ignore = frozenset(opts.file_ignore.split(','))
306 opts.languages = frozenset(opts.languages.split(','))
307
308 return opts,args
309
310if __name__ == '__main__':
311 parser = options()
312 opts, args = parse_args(parser)
313
314 for base in args:
315 for filename,language in find_files(base, languages=opts.languages,
316 file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
317 if opts.dry_run:
318 print "%s: %s" % (filename, language)
319 else:
320 update_file(filename, filename, language, SortIncludes())