sort_includes.py revision 13540
15081Sgblack@eecs.umich.edu#!/usr/bin/env python2.7
25081Sgblack@eecs.umich.edu#
35081Sgblack@eecs.umich.edu# Copyright (c) 2014-2015 ARM Limited
47087Snate@binkert.org# All rights reserved
57087Snate@binkert.org#
67087Snate@binkert.org# The license below extends only to copyright in the software and shall
77087Snate@binkert.org# not be construed as granting a license to any other intellectual
87087Snate@binkert.org# property including but not limited to intellectual property relating
97087Snate@binkert.org# to a hardware implementation of the functionality of the software
107087Snate@binkert.org# licensed hereunder.  You may use the software subject to the license
117087Snate@binkert.org# terms below provided that you ensure that this notice is replicated
125081Sgblack@eecs.umich.edu# unmodified and in its entirety in all distributions of the software,
137087Snate@binkert.org# modified or unmodified, in source code or in binary form.
147087Snate@binkert.org#
157087Snate@binkert.org# Copyright (c) 2011 The Hewlett-Packard Development Company
167087Snate@binkert.org# All rights reserved.
177087Snate@binkert.org#
187087Snate@binkert.org# Redistribution and use in source and binary forms, with or without
197087Snate@binkert.org# modification, are permitted provided that the following conditions are
207087Snate@binkert.org# met: redistributions of source code must retain the above copyright
215081Sgblack@eecs.umich.edu# notice, this list of conditions and the following disclaimer;
227087Snate@binkert.org# redistributions in binary form must reproduce the above copyright
235081Sgblack@eecs.umich.edu# notice, this list of conditions and the following disclaimer in the
245081Sgblack@eecs.umich.edu# documentation and/or other materials provided with the distribution;
255081Sgblack@eecs.umich.edu# neither the name of the copyright holders nor the names of its
265081Sgblack@eecs.umich.edu# contributors may be used to endorse or promote products derived from
275081Sgblack@eecs.umich.edu# this software without specific prior written permission.
285081Sgblack@eecs.umich.edu#
295081Sgblack@eecs.umich.edu# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
305081Sgblack@eecs.umich.edu# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
315081Sgblack@eecs.umich.edu# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
325081Sgblack@eecs.umich.edu# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
335081Sgblack@eecs.umich.edu# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
345081Sgblack@eecs.umich.edu# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
355081Sgblack@eecs.umich.edu# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
365081Sgblack@eecs.umich.edu# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
375081Sgblack@eecs.umich.edu# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
385081Sgblack@eecs.umich.edu# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
395081Sgblack@eecs.umich.edu# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
405081Sgblack@eecs.umich.edu#
415081Sgblack@eecs.umich.edu# Authors: Nathan Binkert
425081Sgblack@eecs.umich.edu#          Andreas Sandberg
435081Sgblack@eecs.umich.edu
445081Sgblack@eecs.umich.eduimport os
455081Sgblack@eecs.umich.eduimport re
465081Sgblack@eecs.umich.eduimport sys
475081Sgblack@eecs.umich.edu
485081Sgblack@eecs.umich.edufrom file_types import *
495081Sgblack@eecs.umich.edu
505081Sgblack@eecs.umich.educpp_c_headers = {
515081Sgblack@eecs.umich.edu    'assert.h' : 'cassert',
525081Sgblack@eecs.umich.edu    'ctype.h'  : 'cctype',
535081Sgblack@eecs.umich.edu    'errno.h'  : 'cerrno',
545081Sgblack@eecs.umich.edu    'float.h'  : 'cfloat',
555081Sgblack@eecs.umich.edu    'limits.h' : 'climits',
565081Sgblack@eecs.umich.edu    'locale.h' : 'clocale',
575081Sgblack@eecs.umich.edu    'math.h'   : 'cmath',
585081Sgblack@eecs.umich.edu    'setjmp.h' : 'csetjmp',
595081Sgblack@eecs.umich.edu    'signal.h' : 'csignal',
605081Sgblack@eecs.umich.edu    'stdarg.h' : 'cstdarg',
615081Sgblack@eecs.umich.edu    'stddef.h' : 'cstddef',
625081Sgblack@eecs.umich.edu    'stdio.h'  : 'cstdio',
635081Sgblack@eecs.umich.edu    'stdlib.h' : 'cstdlib',
645081Sgblack@eecs.umich.edu    'string.h' : 'cstring',
655081Sgblack@eecs.umich.edu    'time.h'   : 'ctime',
665081Sgblack@eecs.umich.edu    'wchar.h'  : 'cwchar',
675081Sgblack@eecs.umich.edu    'wctype.h' : 'cwctype',
685081Sgblack@eecs.umich.edu}
695081Sgblack@eecs.umich.edu
705081Sgblack@eecs.umich.eduinclude_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
715081Sgblack@eecs.umich.edudef include_key(line):
725081Sgblack@eecs.umich.edu    '''Mark directories with a leading space so directories
735081Sgblack@eecs.umich.edu    are sorted before files'''
745081Sgblack@eecs.umich.edu
755081Sgblack@eecs.umich.edu    match = include_re.match(line)
765081Sgblack@eecs.umich.edu    assert match, line
775081Sgblack@eecs.umich.edu    keyword = match.group(2)
785081Sgblack@eecs.umich.edu    include = match.group(3)
795081Sgblack@eecs.umich.edu
805081Sgblack@eecs.umich.edu    # Everything but the file part needs to have a space prepended
815081Sgblack@eecs.umich.edu    parts = include.split('/')
825081Sgblack@eecs.umich.edu    if len(parts) == 2 and parts[0] == 'dnet':
835081Sgblack@eecs.umich.edu        # Don't sort the dnet includes with respect to each other, but
845081Sgblack@eecs.umich.edu        # make them sorted with respect to non dnet includes.  Python
855081Sgblack@eecs.umich.edu        # guarantees that sorting is stable, so just clear the
865081Sgblack@eecs.umich.edu        # basename part of the filename.
875081Sgblack@eecs.umich.edu        parts[1] = ' '
885081Sgblack@eecs.umich.edu    parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
895081Sgblack@eecs.umich.edu    key = '/'.join(parts)
905081Sgblack@eecs.umich.edu
91    return key
92
93
94def _include_matcher(keyword="#include", delim="<>"):
95    """Match an include statement and return a (keyword, file, extra)
96    duple, or a touple of None values if there isn't a match."""
97
98    rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
99
100    def matcher(context, line):
101        m = rex.match(line)
102        return m.groups() if m else (None, ) * 3
103
104    return matcher
105
106def _include_matcher_fname(fname, **kwargs):
107    """Match an include of a specific file name. Any keyword arguments
108    are forwarded to _include_matcher, which is used to match the
109    actual include line."""
110
111    rex = re.compile(fname)
112    base_matcher = _include_matcher(**kwargs)
113
114    def matcher(context, line):
115        (keyword, fname, extra) = base_matcher(context, line)
116        if fname and rex.match(fname):
117            return (keyword, fname, extra)
118        else:
119            return (None, ) * 3
120
121    return matcher
122
123
124def _include_matcher_main():
125    """Match a C/C++ source file's primary header (i.e., a file with
126    the same base name, but a header extension)."""
127
128    base_matcher = _include_matcher(delim='""')
129    rex = re.compile(r"^src/(.*)\.([^.]+)$")
130    header_map = {
131        "c" : "h",
132        "cc" : "hh",
133        "cpp" : "hh",
134        }
135    def matcher(context, line):
136        m = rex.match(context["filename"])
137        if not m:
138            return (None, ) * 3
139        base, ext = m.groups()
140        (keyword, fname, extra) = base_matcher(context, line)
141        try:
142            if fname == "%s.%s" % (base, header_map[ext]):
143                return (keyword, fname, extra)
144        except KeyError:
145            pass
146
147        return (None, ) * 3
148
149    return matcher
150
151class SortIncludes(object):
152    # different types of includes for different sorting of headers
153    # <Python.h>         - Python header needs to be first if it exists
154    # <*.h>              - system headers (directories before files)
155    # <*>                - STL headers
156    # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
157    # "*"                - M5 headers (directories before files)
158    includes_re = (
159        ('main', '""', _include_matcher_main()),
160        ('python', '<>', _include_matcher_fname("^Python\.h$")),
161        ('pybind', '""', _include_matcher_fname("^pybind11/.*\.h$",
162                                                delim='""')),
163        ('m5shared', '<>', _include_matcher_fname("^gem5/")),
164        ('c', '<>', _include_matcher_fname("^.*\.h$")),
165        ('stl', '<>', _include_matcher_fname("^\w+$")),
166        ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
167        ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
168        ('swig0', '<>', _include_matcher(keyword="%import")),
169        ('swig1', '<>', _include_matcher(keyword="%include")),
170        ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
171        ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
172        )
173
174    block_order = (
175        ('python', ),
176        ('pybind', ),
177        ('main', ),
178        ('c', ),
179        ('stl', ),
180        ('cc', ),
181        ('m5shared', ),
182        ('m5header', ),
183        ('swig0', 'swig1', 'swig2', 'swig3', ),
184        )
185
186    def __init__(self):
187        self.block_priority = {}
188        for prio, keys in enumerate(self.block_order):
189            for key in keys:
190                self.block_priority[key] = prio
191
192    def reset(self):
193        # clear all stored headers
194        self.includes = {}
195
196    def dump_blocks(self, block_types):
197        """Merge includes of from several block types into one large
198        block of sorted includes. This is useful when we have multiple
199        include block types (e.g., swig includes) with the same
200        priority."""
201
202        includes = []
203        for block_type in block_types:
204            try:
205                includes += self.includes[block_type]
206            except KeyError:
207                pass
208
209        return sorted(set(includes))
210
211    def dump_includes(self):
212        includes = []
213        for types in self.block_order:
214            block = self.dump_blocks(types)
215            if includes and block:
216                includes.append("")
217            includes += block
218
219        self.reset()
220        return includes
221
222    def __call__(self, lines, filename, language):
223        self.reset()
224
225        context = {
226            "filename" : filename,
227            "language" : language,
228            }
229
230        def match_line(line):
231            if not line:
232                return (None, line)
233
234            for include_type, (ldelim, rdelim), matcher in self.includes_re:
235                keyword, include, extra = matcher(context, line)
236                if keyword:
237                    # if we've got a match, clean up the #include line,
238                    # fix up stl headers and store it in the proper category
239                    if include_type == 'c' and language == 'C++':
240                        stl_inc = cpp_c_headers.get(include, None)
241                        if stl_inc:
242                            include = stl_inc
243                            include_type = 'stl'
244
245                    return (include_type,
246                            keyword + ' ' + ldelim + include + rdelim + extra)
247
248            return (None, line)
249
250        processing_includes = False
251        for line in lines:
252            include_type, line = match_line(line)
253            if include_type:
254                try:
255                    self.includes[include_type].append(line)
256                except KeyError:
257                    self.includes[include_type] = [ line ]
258
259                processing_includes = True
260            elif processing_includes and not line.strip():
261                # Skip empty lines while processing includes
262                pass
263            elif processing_includes:
264                # We are now exiting an include block
265                processing_includes = False
266
267                # Output pending includes, a new line between, and the
268                # current l.
269                for include in self.dump_includes():
270                    yield include
271                yield ''
272                yield line
273            else:
274                # We are not in an include block, so just emit the line
275                yield line
276
277        # We've reached EOF, so dump any pending includes
278        if processing_includes:
279            for include in self.dump_includes():
280                yield include
281
282# default language types to try to apply our sorting rules to
283default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
284
285def options():
286    import optparse
287    options = optparse.OptionParser()
288    add_option = options.add_option
289    add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
290               default=','.join(default_dir_ignore),
291               help="ignore directories")
292    add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
293               default=','.join(default_file_ignore),
294               help="ignore files")
295    add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
296               default=','.join(default_languages),
297               help="languages")
298    add_option('-n', '--dry-run', action='store_true',
299               help="don't overwrite files")
300
301    return options
302
303def parse_args(parser):
304    opts,args = parser.parse_args()
305
306    opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
307    opts.file_ignore = frozenset(opts.file_ignore.split(','))
308    opts.languages = frozenset(opts.languages.split(','))
309
310    return opts,args
311
312if __name__ == '__main__':
313    parser = options()
314    opts, args = parse_args(parser)
315
316    for base in args:
317        for filename,language in find_files(base, languages=opts.languages,
318                file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
319            if opts.dry_run:
320                print "%s: %s" % (filename, language)
321            else:
322                update_file(filename, filename, language, SortIncludes())
323