sort_includes.py revision 10275
12292SN/A#!/usr/bin/env python
213590Srekai.gonzalezalberquilla@arm.com
310239Sbinhpham@cs.rutgers.eduimport os
48707Sandreas.hansson@arm.comimport re
58707Sandreas.hansson@arm.comimport sys
68707Sandreas.hansson@arm.com
78707Sandreas.hansson@arm.comfrom file_types import *
88707Sandreas.hansson@arm.com
98707Sandreas.hansson@arm.comcpp_c_headers = {
108707Sandreas.hansson@arm.com    'assert.h' : 'cassert',
118707Sandreas.hansson@arm.com    'ctype.h'  : 'cctype',
128707Sandreas.hansson@arm.com    'errno.h'  : 'cerrno',
138707Sandreas.hansson@arm.com    'float.h'  : 'cfloat',
148707Sandreas.hansson@arm.com    'limits.h' : 'climits',
152727Sktlim@umich.edu    'locale.h' : 'clocale',
162292SN/A    'math.h'   : 'cmath',
172292SN/A    'setjmp.h' : 'csetjmp',
182292SN/A    'signal.h' : 'csignal',
192292SN/A    'stdarg.h' : 'cstdarg',
202292SN/A    'stddef.h' : 'cstddef',
212292SN/A    'stdio.h'  : 'cstdio',
222292SN/A    'stdlib.h' : 'cstdlib',
232292SN/A    'string.h' : 'cstring',
242292SN/A    'time.h'   : 'ctime',
252292SN/A    'wchar.h'  : 'cwchar',
262292SN/A    'wctype.h' : 'cwctype',
272292SN/A}
282292SN/A
292292SN/Ainclude_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
302292SN/Adef include_key(line):
312292SN/A    '''Mark directories with a leading space so directories
322292SN/A    are sorted before files'''
332292SN/A
342292SN/A    match = include_re.match(line)
352292SN/A    assert match, line
362292SN/A    keyword = match.group(2)
372292SN/A    include = match.group(3)
382292SN/A
392292SN/A    # Everything but the file part needs to have a space prepended
402689Sktlim@umich.edu    parts = include.split('/')
412689Sktlim@umich.edu    if len(parts) == 2 and parts[0] == 'dnet':
422292SN/A        # Don't sort the dnet includes with respect to each other, but
432292SN/A        # make them sorted with respect to non dnet includes.  Python
449944Smatt.horsnell@ARM.com        # guarantees that sorting is stable, so just clear the
459944Smatt.horsnell@ARM.com        # basename part of the filename.
469944Smatt.horsnell@ARM.com        parts[1] = ' '
472329SN/A    parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
482980Sgblack@eecs.umich.edu    key = '/'.join(parts)
492329SN/A
502329SN/A    return key
5113449Sgabeblack@google.com
522292SN/Aclass SortIncludes(object):
539444SAndreas.Sandberg@ARM.com    # different types of includes for different sorting of headers
548232Snate@binkert.org    # <Python.h>         - Python header needs to be first if it exists
558232Snate@binkert.org    # <*.h>              - system headers (directories before files)
568232Snate@binkert.org    # <*>                - STL headers
576221Snate@binkert.org    # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
582292SN/A    # "*"                - M5 headers (directories before files)
596221Snate@binkert.org    includes_re = (
605529Snate@binkert.org        ('python', '<>', r'^(#include)[ \t]+<(Python.*\.h)>(.*)'),
612292SN/A        ('c', '<>', r'^(#include)[ \t]<(.+\.h)>(.*)'),
625529Snate@binkert.org        ('stl', '<>', r'^(#include)[ \t]+<([0-9A-z_]+)>(.*)'),
638707Sandreas.hansson@arm.com        ('cc', '<>', r'^(#include)[ \t]+<([0-9A-z_]+\.(hh|hxx|hpp|H))>(.*)'),
6413590Srekai.gonzalezalberquilla@arm.com        ('m5cc', '""', r'^(#include)[ \t]"(.+\.h{1,2})"(.*)'),
6513590Srekai.gonzalezalberquilla@arm.com        ('swig0', '<>', r'^(%import)[ \t]<(.+)>(.*)'),
6613560Snikos.nikoleris@arm.com        ('swig1', '<>', r'^(%include)[ \t]<(.+)>(.*)'),
674329Sktlim@umich.edu        ('swig2', '""', r'^(%import)[ \t]"(.+)"(.*)'),
684329Sktlim@umich.edu        ('swig3', '""', r'^(%include)[ \t]"(.+)"(.*)'),
6913472Srekai.gonzalezalberquilla@arm.com        )
7013472Srekai.gonzalezalberquilla@arm.com
7113472Srekai.gonzalezalberquilla@arm.com    # compile the regexes
7213472Srekai.gonzalezalberquilla@arm.com    includes_re = tuple((a, b, re.compile(c)) for a,b,c in includes_re)
7310333Smitch.hayenga@arm.com
742292SN/A    def __init__(self):
759868Sjthestness@gmail.com        pass
769868Sjthestness@gmail.com
772292SN/A    def reset(self):
782292SN/A        # clear all stored headers
792292SN/A        self.includes = {}
802292SN/A        for include_type,_,_ in self.includes_re:
8113590Srekai.gonzalezalberquilla@arm.com            self.includes[include_type] = []
8213590Srekai.gonzalezalberquilla@arm.com
832292SN/A    def dump_block(self):
8413560Snikos.nikoleris@arm.com        '''dump the includes'''
852292SN/A        first = True
868346Sksewell@umich.edu        for include,_,_ in self.includes_re:
872292SN/A            if not self.includes[include]:
8813560Snikos.nikoleris@arm.com                continue
892292SN/A
9013590Srekai.gonzalezalberquilla@arm.com            if not first:
9113590Srekai.gonzalezalberquilla@arm.com                # print a newline between groups of
922292SN/A                # include types
932292SN/A                yield ''
948346Sksewell@umich.edu            first = False
952292SN/A
962292SN/A            # print out the includes in the current group
9713449Sgabeblack@google.com            # and sort them according to include_key()
9813449Sgabeblack@google.com            prev = None
992292SN/A            for l in sorted(self.includes[include],
1002292SN/A                            key=include_key):
10113472Srekai.gonzalezalberquilla@arm.com                if l != prev:
1026221Snate@binkert.org                    yield l
10313472Srekai.gonzalezalberquilla@arm.com                prev = l
10413472Srekai.gonzalezalberquilla@arm.com
1058850Sandreas.hansson@arm.com    def __call__(self, lines, filename, language):
1062292SN/A        self.reset()
1072292SN/A        leading_blank = False
1082292SN/A        blanks = 0
1092292SN/A        block = False
1102292SN/A
1112292SN/A        for line in lines:
1122292SN/A            if not line:
1132292SN/A                blanks += 1
1142292SN/A                if not block:
1152292SN/A                    # if we're not in an include block, spit out the
1162292SN/A                    # newline otherwise, skip it since we're going to
1172292SN/A                    # control newlines withinin include block
1182292SN/A                    yield ''
1192727Sktlim@umich.edu                continue
1202727Sktlim@umich.edu
1212727Sktlim@umich.edu            # Try to match each of the include types
1226221Snate@binkert.org            for include_type,(ldelim,rdelim),include_re in self.includes_re:
1232727Sktlim@umich.edu                match = include_re.match(line)
1242727Sktlim@umich.edu                if not match:
1252727Sktlim@umich.edu                    continue
1262727Sktlim@umich.edu
1272727Sktlim@umich.edu                # if we've got a match, clean up the #include line,
1282727Sktlim@umich.edu                # fix up stl headers and store it in the proper category
1296221Snate@binkert.org                groups = match.groups()
1302292SN/A                keyword = groups[0]
1312292SN/A                include = groups[1]
1322292SN/A                extra = groups[-1]
1332292SN/A                if include_type == 'c' and language == 'C++':
1342292SN/A                    stl_inc = cpp_c_headers.get(include, None)
1352292SN/A                    if stl_inc:
1362307SN/A                        include = stl_inc
1379444SAndreas.Sandberg@ARM.com                        include_type = 'stl'
1382307SN/A
1399444SAndreas.Sandberg@ARM.com                line = keyword + ' ' + ldelim + include + rdelim + extra
1409444SAndreas.Sandberg@ARM.com
1419444SAndreas.Sandberg@ARM.com                self.includes[include_type].append(line)
1429444SAndreas.Sandberg@ARM.com
1439444SAndreas.Sandberg@ARM.com                # We've entered a block, don't keep track of blank
1449444SAndreas.Sandberg@ARM.com                # lines while in a block
1459444SAndreas.Sandberg@ARM.com                block = True
1469444SAndreas.Sandberg@ARM.com                blanks = 0
1479444SAndreas.Sandberg@ARM.com                break
1489444SAndreas.Sandberg@ARM.com            else:
1499444SAndreas.Sandberg@ARM.com                # this line did not match a #include
1509444SAndreas.Sandberg@ARM.com                assert not include_re.match(line)
1519444SAndreas.Sandberg@ARM.com
1529444SAndreas.Sandberg@ARM.com                # if we're not in a block and we didn't match an include
1539444SAndreas.Sandberg@ARM.com                # to enter a block, just emit the line and continue
1542307SN/A                if not block:
1559444SAndreas.Sandberg@ARM.com                    yield line
1569444SAndreas.Sandberg@ARM.com                    continue
1579444SAndreas.Sandberg@ARM.com
1589444SAndreas.Sandberg@ARM.com                # We've exited an include block.
1599444SAndreas.Sandberg@ARM.com                for block_line in self.dump_block():
1609444SAndreas.Sandberg@ARM.com                    yield block_line
1619444SAndreas.Sandberg@ARM.com
1622307SN/A                # if there are any newlines after the include block,
1632307SN/A                # emit a single newline (removing extras)
1642307SN/A                if blanks and block:
1652307SN/A                    yield ''
1662307SN/A
1672307SN/A                blanks = 0
16813590Srekai.gonzalezalberquilla@arm.com                block = False
16913590Srekai.gonzalezalberquilla@arm.com                self.reset()
17013590Srekai.gonzalezalberquilla@arm.com
1716221Snate@binkert.org                # emit the line that ended the block
1722307SN/A                yield line
1732307SN/A
1742307SN/A        if block:
1752307SN/A            # We've exited an include block.
17613590Srekai.gonzalezalberquilla@arm.com            for block_line in self.dump_block():
17713590Srekai.gonzalezalberquilla@arm.com                yield block_line
17813590Srekai.gonzalezalberquilla@arm.com
1792292SN/A
18013590Srekai.gonzalezalberquilla@arm.com
1812292SN/A# default language types to try to apply our sorting rules to
1822292SN/Adefault_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
1832292SN/A
1842292SN/Adef options():
18513590Srekai.gonzalezalberquilla@arm.com    import optparse
1862292SN/A    options = optparse.OptionParser()
18713590Srekai.gonzalezalberquilla@arm.com    add_option = options.add_option
18813590Srekai.gonzalezalberquilla@arm.com    add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
18913590Srekai.gonzalezalberquilla@arm.com               default=','.join(default_dir_ignore),
19013590Srekai.gonzalezalberquilla@arm.com               help="ignore directories")
19113590Srekai.gonzalezalberquilla@arm.com    add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
19213590Srekai.gonzalezalberquilla@arm.com               default=','.join(default_file_ignore),
19313590Srekai.gonzalezalberquilla@arm.com               help="ignore files")
19413590Srekai.gonzalezalberquilla@arm.com    add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
1952292SN/A               default=','.join(default_languages),
1962292SN/A               help="languages")
1972292SN/A    add_option('-n', '--dry-run', action='store_true',
1982292SN/A               help="don't overwrite files")
19913590Srekai.gonzalezalberquilla@arm.com
2002292SN/A    return options
20113590Srekai.gonzalezalberquilla@arm.com
20213590Srekai.gonzalezalberquilla@arm.comdef parse_args(parser):
2032292SN/A    opts,args = parser.parse_args()
2042292SN/A
2052292SN/A    opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
2062292SN/A    opts.file_ignore = frozenset(opts.file_ignore.split(','))
20713429Srekai.gonzalezalberquilla@arm.com    opts.languages = frozenset(opts.languages.split(','))
2082292SN/A
2096221Snate@binkert.org    return opts,args
2102292SN/A
2112292SN/Aif __name__ == '__main__':
2122292SN/A    parser = options()
2132292SN/A    opts, args = parse_args(parser)
2142292SN/A
2152292SN/A    for base in args:
21613429Srekai.gonzalezalberquilla@arm.com        for filename,language in find_files(base, languages=opts.languages,
2172292SN/A                file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
2186221Snate@binkert.org            if opts.dry_run:
2192292SN/A                print "%s: %s" % (filename, language)
2202292SN/A            else:
2212292SN/A                update_file(filename, filename, language, SortIncludes())
2222292SN/A