1#!/usr/bin/env python2.7
2#
3# Copyright (c) 2014-2015 ARM Limited
4# All rights reserved
5#
6# The license below extends only to copyright in the software and shall
7# not be construed as granting a license to any other intellectual
8# property including but not limited to intellectual property relating
9# to a hardware implementation of the functionality of the software
10# licensed hereunder.  You may use the software subject to the license
11# terms below provided that you ensure that this notice is replicated
12# unmodified and in its entirety in all distributions of the software,
13# modified or unmodified, in source code or in binary form.
14#
15# Copyright (c) 2011 The Hewlett-Packard Development Company
16# All rights reserved.
17#
18# Redistribution and use in source and binary forms, with or without
19# modification, are permitted provided that the following conditions are
20# met: redistributions of source code must retain the above copyright
21# notice, this list of conditions and the following disclaimer;
22# redistributions in binary form must reproduce the above copyright
23# notice, this list of conditions and the following disclaimer in the
24# documentation and/or other materials provided with the distribution;
25# neither the name of the copyright holders nor the names of its
26# contributors may be used to endorse or promote products derived from
27# this software without specific prior written permission.
28#
29# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40#
41# Authors: Nathan Binkert
42#          Andreas Sandberg
43
44import os
45import re
46import sys
47
48from file_types import *
49
50cpp_c_headers = {
51    'assert.h' : 'cassert',
52    'ctype.h'  : 'cctype',
53    'errno.h'  : 'cerrno',
54    'float.h'  : 'cfloat',
55    'limits.h' : 'climits',
56    'locale.h' : 'clocale',
57    'math.h'   : 'cmath',
58    'setjmp.h' : 'csetjmp',
59    'signal.h' : 'csignal',
60    'stdarg.h' : 'cstdarg',
61    'stddef.h' : 'cstddef',
62    'stdio.h'  : 'cstdio',
63    'stdlib.h' : 'cstdlib',
64    'string.h' : 'cstring',
65    'time.h'   : 'ctime',
66    'wchar.h'  : 'cwchar',
67    'wctype.h' : 'cwctype',
68}
69
70include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
71def include_key(line):
72    '''Mark directories with a leading space so directories
73    are sorted before files'''
74
75    match = include_re.match(line)
76    assert match, line
77    keyword = match.group(2)
78    include = match.group(3)
79
80    # Everything but the file part needs to have a space prepended
81    parts = include.split('/')
82    if len(parts) == 2 and parts[0] == 'dnet':
83        # Don't sort the dnet includes with respect to each other, but
84        # make them sorted with respect to non dnet includes.  Python
85        # guarantees that sorting is stable, so just clear the
86        # basename part of the filename.
87        parts[1] = ' '
88    parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
89    key = '/'.join(parts)
90
91    return key
92
93
94def _include_matcher(keyword="#include", delim="<>"):
95    """Match an include statement and return a (keyword, file, extra)
96    duple, or a touple of None values if there isn't a match."""
97
98    rex = re.compile(r'^(%s)\s*%s(.*)%s(.*)$' % (keyword, delim[0], delim[1]))
99
100    def matcher(context, line):
101        m = rex.match(line)
102        return m.groups() if m else (None, ) * 3
103
104    return matcher
105
106def _include_matcher_fname(fname, **kwargs):
107    """Match an include of a specific file name. Any keyword arguments
108    are forwarded to _include_matcher, which is used to match the
109    actual include line."""
110
111    rex = re.compile(fname)
112    base_matcher = _include_matcher(**kwargs)
113
114    def matcher(context, line):
115        (keyword, fname, extra) = base_matcher(context, line)
116        if fname and rex.match(fname):
117            return (keyword, fname, extra)
118        else:
119            return (None, ) * 3
120
121    return matcher
122
123
124def _include_matcher_main():
125    """Match a C/C++ source file's primary header (i.e., a file with
126    the same base name, but a header extension)."""
127
128    base_matcher = _include_matcher(delim='""')
129    rex = re.compile(r"^src/(.*)\.([^.]+)$")
130    header_map = {
131        "c" : "h",
132        "cc" : "hh",
133        "cpp" : "hh",
134        }
135    def matcher(context, line):
136        m = rex.match(context["filename"])
137        if not m:
138            return (None, ) * 3
139        base, ext = m.groups()
140        (keyword, fname, extra) = base_matcher(context, line)
141        try:
142            if fname == "%s.%s" % (base, header_map[ext]):
143                return (keyword, fname, extra)
144        except KeyError:
145            pass
146
147        return (None, ) * 3
148
149    return matcher
150
151class SortIncludes(object):
152    # different types of includes for different sorting of headers
153    # <Python.h>         - Python header needs to be first if it exists
154    # <*.h>              - system headers (directories before files)
155    # <*>                - STL headers
156    # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
157    # "*"                - M5 headers (directories before files)
158    includes_re = (
159        ('main', '""', _include_matcher_main()),
160        ('python', '<>', _include_matcher_fname("^Python\.h$")),
161        ('pybind', '""', _include_matcher_fname("^pybind11/.*\.h$",
162                                                delim='""')),
163        ('m5shared', '<>', _include_matcher_fname("^gem5/")),
164        ('c', '<>', _include_matcher_fname("^.*\.h$")),
165        ('stl', '<>', _include_matcher_fname("^\w+$")),
166        ('cc', '<>', _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
167        ('m5header', '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
168        ('swig0', '<>', _include_matcher(keyword="%import")),
169        ('swig1', '<>', _include_matcher(keyword="%include")),
170        ('swig2', '""', _include_matcher(keyword="%import", delim='""')),
171        ('swig3', '""', _include_matcher(keyword="%include", delim='""')),
172        )
173
174    block_order = (
175        ('python', ),
176        ('pybind', ),
177        ('main', ),
178        ('c', ),
179        ('stl', ),
180        ('cc', ),
181        ('m5shared', ),
182        ('m5header', ),
183        ('swig0', 'swig1', 'swig2', 'swig3', ),
184        )
185
186    def __init__(self):
187        self.block_priority = {}
188        for prio, keys in enumerate(self.block_order):
189            for key in keys:
190                self.block_priority[key] = prio
191
192    def reset(self):
193        # clear all stored headers
194        self.includes = {}
195
196    def dump_blocks(self, block_types):
197        """Merge includes of from several block types into one large
198        block of sorted includes. This is useful when we have multiple
199        include block types (e.g., swig includes) with the same
200        priority."""
201
202        includes = []
203        for block_type in block_types:
204            try:
205                includes += self.includes[block_type]
206            except KeyError:
207                pass
208
209        return sorted(set(includes))
210
211    def dump_includes(self):
212        includes = []
213        for types in self.block_order:
214            block = self.dump_blocks(types)
215            if includes and block:
216                includes.append("")
217            includes += block
218
219        self.reset()
220        return includes
221
222    def __call__(self, lines, filename, language):
223        self.reset()
224
225        context = {
226            "filename" : filename,
227            "language" : language,
228            }
229
230        def match_line(line):
231            if not line:
232                return (None, line)
233
234            for include_type, (ldelim, rdelim), matcher in self.includes_re:
235                keyword, include, extra = matcher(context, line)
236                if keyword:
237                    # if we've got a match, clean up the #include line,
238                    # fix up stl headers and store it in the proper category
239                    if include_type == 'c' and language == 'C++':
240                        stl_inc = cpp_c_headers.get(include, None)
241                        if stl_inc:
242                            include = stl_inc
243                            include_type = 'stl'
244
245                    return (include_type,
246                            keyword + ' ' + ldelim + include + rdelim + extra)
247
248            return (None, line)
249
250        processing_includes = False
251        for line in lines:
252            include_type, line = match_line(line)
253            if include_type:
254                try:
255                    self.includes[include_type].append(line)
256                except KeyError:
257                    self.includes[include_type] = [ line ]
258
259                processing_includes = True
260            elif processing_includes and not line.strip():
261                # Skip empty lines while processing includes
262                pass
263            elif processing_includes:
264                # We are now exiting an include block
265                processing_includes = False
266
267                # Output pending includes, a new line between, and the
268                # current l.
269                for include in self.dump_includes():
270                    yield include
271                yield ''
272                yield line
273            else:
274                # We are not in an include block, so just emit the line
275                yield line
276
277        # We've reached EOF, so dump any pending includes
278        if processing_includes:
279            for include in self.dump_includes():
280                yield include
281
282# default language types to try to apply our sorting rules to
283default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
284
285def options():
286    import optparse
287    options = optparse.OptionParser()
288    add_option = options.add_option
289    add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
290               default=','.join(default_dir_ignore),
291               help="ignore directories")
292    add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
293               default=','.join(default_file_ignore),
294               help="ignore files")
295    add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
296               default=','.join(default_languages),
297               help="languages")
298    add_option('-n', '--dry-run', action='store_true',
299               help="don't overwrite files")
300
301    return options
302
303def parse_args(parser):
304    opts,args = parser.parse_args()
305
306    opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
307    opts.file_ignore = frozenset(opts.file_ignore.split(','))
308    opts.languages = frozenset(opts.languages.split(','))
309
310    return opts,args
311
312if __name__ == '__main__':
313    parser = options()
314    opts, args = parse_args(parser)
315
316    for base in args:
317        for filename,language in find_files(base, languages=opts.languages,
318                file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
319            if opts.dry_run:
320                print "%s: %s" % (filename, language)
321            else:
322                update_file(filename, filename, language, SortIncludes())
323