sort_includes.py revision 10275
12292SN/A#!/usr/bin/env python 213590Srekai.gonzalezalberquilla@arm.com 310239Sbinhpham@cs.rutgers.eduimport os 48707Sandreas.hansson@arm.comimport re 58707Sandreas.hansson@arm.comimport sys 68707Sandreas.hansson@arm.com 78707Sandreas.hansson@arm.comfrom file_types import * 88707Sandreas.hansson@arm.com 98707Sandreas.hansson@arm.comcpp_c_headers = { 108707Sandreas.hansson@arm.com 'assert.h' : 'cassert', 118707Sandreas.hansson@arm.com 'ctype.h' : 'cctype', 128707Sandreas.hansson@arm.com 'errno.h' : 'cerrno', 138707Sandreas.hansson@arm.com 'float.h' : 'cfloat', 148707Sandreas.hansson@arm.com 'limits.h' : 'climits', 152727Sktlim@umich.edu 'locale.h' : 'clocale', 162292SN/A 'math.h' : 'cmath', 172292SN/A 'setjmp.h' : 'csetjmp', 182292SN/A 'signal.h' : 'csignal', 192292SN/A 'stdarg.h' : 'cstdarg', 202292SN/A 'stddef.h' : 'cstddef', 212292SN/A 'stdio.h' : 'cstdio', 222292SN/A 'stdlib.h' : 'cstdlib', 232292SN/A 'string.h' : 'cstring', 242292SN/A 'time.h' : 'ctime', 252292SN/A 'wchar.h' : 'cwchar', 262292SN/A 'wctype.h' : 'cwctype', 272292SN/A} 282292SN/A 292292SN/Ainclude_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]') 302292SN/Adef include_key(line): 312292SN/A '''Mark directories with a leading space so directories 322292SN/A are sorted before files''' 332292SN/A 342292SN/A match = include_re.match(line) 352292SN/A assert match, line 362292SN/A keyword = match.group(2) 372292SN/A include = match.group(3) 382292SN/A 392292SN/A # Everything but the file part needs to have a space prepended 402689Sktlim@umich.edu parts = include.split('/') 412689Sktlim@umich.edu if len(parts) == 2 and parts[0] == 'dnet': 422292SN/A # Don't sort the dnet includes with respect to each other, but 432292SN/A # make them sorted with respect to non dnet includes. Python 449944Smatt.horsnell@ARM.com # guarantees that sorting is stable, so just clear the 459944Smatt.horsnell@ARM.com # basename part of the filename. 469944Smatt.horsnell@ARM.com parts[1] = ' ' 472329SN/A parts[0:-1] = [ ' ' + s for s in parts[0:-1] ] 482980Sgblack@eecs.umich.edu key = '/'.join(parts) 492329SN/A 502329SN/A return key 5113449Sgabeblack@google.com 522292SN/Aclass SortIncludes(object): 539444SAndreas.Sandberg@ARM.com # different types of includes for different sorting of headers 548232Snate@binkert.org # <Python.h> - Python header needs to be first if it exists 558232Snate@binkert.org # <*.h> - system headers (directories before files) 568232Snate@binkert.org # <*> - STL headers 576221Snate@binkert.org # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files) 582292SN/A # "*" - M5 headers (directories before files) 596221Snate@binkert.org includes_re = ( 605529Snate@binkert.org ('python', '<>', r'^(#include)[ \t]+<(Python.*\.h)>(.*)'), 612292SN/A ('c', '<>', r'^(#include)[ \t]<(.+\.h)>(.*)'), 625529Snate@binkert.org ('stl', '<>', r'^(#include)[ \t]+<([0-9A-z_]+)>(.*)'), 638707Sandreas.hansson@arm.com ('cc', '<>', r'^(#include)[ \t]+<([0-9A-z_]+\.(hh|hxx|hpp|H))>(.*)'), 6413590Srekai.gonzalezalberquilla@arm.com ('m5cc', '""', r'^(#include)[ \t]"(.+\.h{1,2})"(.*)'), 6513590Srekai.gonzalezalberquilla@arm.com ('swig0', '<>', r'^(%import)[ \t]<(.+)>(.*)'), 6613560Snikos.nikoleris@arm.com ('swig1', '<>', r'^(%include)[ \t]<(.+)>(.*)'), 674329Sktlim@umich.edu ('swig2', '""', r'^(%import)[ \t]"(.+)"(.*)'), 684329Sktlim@umich.edu ('swig3', '""', r'^(%include)[ \t]"(.+)"(.*)'), 6913472Srekai.gonzalezalberquilla@arm.com ) 7013472Srekai.gonzalezalberquilla@arm.com 7113472Srekai.gonzalezalberquilla@arm.com # compile the regexes 7213472Srekai.gonzalezalberquilla@arm.com includes_re = tuple((a, b, re.compile(c)) for a,b,c in includes_re) 7310333Smitch.hayenga@arm.com 742292SN/A def __init__(self): 759868Sjthestness@gmail.com pass 769868Sjthestness@gmail.com 772292SN/A def reset(self): 782292SN/A # clear all stored headers 792292SN/A self.includes = {} 802292SN/A for include_type,_,_ in self.includes_re: 8113590Srekai.gonzalezalberquilla@arm.com self.includes[include_type] = [] 8213590Srekai.gonzalezalberquilla@arm.com 832292SN/A def dump_block(self): 8413560Snikos.nikoleris@arm.com '''dump the includes''' 852292SN/A first = True 868346Sksewell@umich.edu for include,_,_ in self.includes_re: 872292SN/A if not self.includes[include]: 8813560Snikos.nikoleris@arm.com continue 892292SN/A 9013590Srekai.gonzalezalberquilla@arm.com if not first: 9113590Srekai.gonzalezalberquilla@arm.com # print a newline between groups of 922292SN/A # include types 932292SN/A yield '' 948346Sksewell@umich.edu first = False 952292SN/A 962292SN/A # print out the includes in the current group 9713449Sgabeblack@google.com # and sort them according to include_key() 9813449Sgabeblack@google.com prev = None 992292SN/A for l in sorted(self.includes[include], 1002292SN/A key=include_key): 10113472Srekai.gonzalezalberquilla@arm.com if l != prev: 1026221Snate@binkert.org yield l 10313472Srekai.gonzalezalberquilla@arm.com prev = l 10413472Srekai.gonzalezalberquilla@arm.com 1058850Sandreas.hansson@arm.com def __call__(self, lines, filename, language): 1062292SN/A self.reset() 1072292SN/A leading_blank = False 1082292SN/A blanks = 0 1092292SN/A block = False 1102292SN/A 1112292SN/A for line in lines: 1122292SN/A if not line: 1132292SN/A blanks += 1 1142292SN/A if not block: 1152292SN/A # if we're not in an include block, spit out the 1162292SN/A # newline otherwise, skip it since we're going to 1172292SN/A # control newlines withinin include block 1182292SN/A yield '' 1192727Sktlim@umich.edu continue 1202727Sktlim@umich.edu 1212727Sktlim@umich.edu # Try to match each of the include types 1226221Snate@binkert.org for include_type,(ldelim,rdelim),include_re in self.includes_re: 1232727Sktlim@umich.edu match = include_re.match(line) 1242727Sktlim@umich.edu if not match: 1252727Sktlim@umich.edu continue 1262727Sktlim@umich.edu 1272727Sktlim@umich.edu # if we've got a match, clean up the #include line, 1282727Sktlim@umich.edu # fix up stl headers and store it in the proper category 1296221Snate@binkert.org groups = match.groups() 1302292SN/A keyword = groups[0] 1312292SN/A include = groups[1] 1322292SN/A extra = groups[-1] 1332292SN/A if include_type == 'c' and language == 'C++': 1342292SN/A stl_inc = cpp_c_headers.get(include, None) 1352292SN/A if stl_inc: 1362307SN/A include = stl_inc 1379444SAndreas.Sandberg@ARM.com include_type = 'stl' 1382307SN/A 1399444SAndreas.Sandberg@ARM.com line = keyword + ' ' + ldelim + include + rdelim + extra 1409444SAndreas.Sandberg@ARM.com 1419444SAndreas.Sandberg@ARM.com self.includes[include_type].append(line) 1429444SAndreas.Sandberg@ARM.com 1439444SAndreas.Sandberg@ARM.com # We've entered a block, don't keep track of blank 1449444SAndreas.Sandberg@ARM.com # lines while in a block 1459444SAndreas.Sandberg@ARM.com block = True 1469444SAndreas.Sandberg@ARM.com blanks = 0 1479444SAndreas.Sandberg@ARM.com break 1489444SAndreas.Sandberg@ARM.com else: 1499444SAndreas.Sandberg@ARM.com # this line did not match a #include 1509444SAndreas.Sandberg@ARM.com assert not include_re.match(line) 1519444SAndreas.Sandberg@ARM.com 1529444SAndreas.Sandberg@ARM.com # if we're not in a block and we didn't match an include 1539444SAndreas.Sandberg@ARM.com # to enter a block, just emit the line and continue 1542307SN/A if not block: 1559444SAndreas.Sandberg@ARM.com yield line 1569444SAndreas.Sandberg@ARM.com continue 1579444SAndreas.Sandberg@ARM.com 1589444SAndreas.Sandberg@ARM.com # We've exited an include block. 1599444SAndreas.Sandberg@ARM.com for block_line in self.dump_block(): 1609444SAndreas.Sandberg@ARM.com yield block_line 1619444SAndreas.Sandberg@ARM.com 1622307SN/A # if there are any newlines after the include block, 1632307SN/A # emit a single newline (removing extras) 1642307SN/A if blanks and block: 1652307SN/A yield '' 1662307SN/A 1672307SN/A blanks = 0 16813590Srekai.gonzalezalberquilla@arm.com block = False 16913590Srekai.gonzalezalberquilla@arm.com self.reset() 17013590Srekai.gonzalezalberquilla@arm.com 1716221Snate@binkert.org # emit the line that ended the block 1722307SN/A yield line 1732307SN/A 1742307SN/A if block: 1752307SN/A # We've exited an include block. 17613590Srekai.gonzalezalberquilla@arm.com for block_line in self.dump_block(): 17713590Srekai.gonzalezalberquilla@arm.com yield block_line 17813590Srekai.gonzalezalberquilla@arm.com 1792292SN/A 18013590Srekai.gonzalezalberquilla@arm.com 1812292SN/A# default language types to try to apply our sorting rules to 1822292SN/Adefault_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig')) 1832292SN/A 1842292SN/Adef options(): 18513590Srekai.gonzalezalberquilla@arm.com import optparse 1862292SN/A options = optparse.OptionParser() 18713590Srekai.gonzalezalberquilla@arm.com add_option = options.add_option 18813590Srekai.gonzalezalberquilla@arm.com add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string', 18913590Srekai.gonzalezalberquilla@arm.com default=','.join(default_dir_ignore), 19013590Srekai.gonzalezalberquilla@arm.com help="ignore directories") 19113590Srekai.gonzalezalberquilla@arm.com add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string', 19213590Srekai.gonzalezalberquilla@arm.com default=','.join(default_file_ignore), 19313590Srekai.gonzalezalberquilla@arm.com help="ignore files") 19413590Srekai.gonzalezalberquilla@arm.com add_option('-l', '--languages', metavar="LANG[,LANG]", type='string', 1952292SN/A default=','.join(default_languages), 1962292SN/A help="languages") 1972292SN/A add_option('-n', '--dry-run', action='store_true', 1982292SN/A help="don't overwrite files") 19913590Srekai.gonzalezalberquilla@arm.com 2002292SN/A return options 20113590Srekai.gonzalezalberquilla@arm.com 20213590Srekai.gonzalezalberquilla@arm.comdef parse_args(parser): 2032292SN/A opts,args = parser.parse_args() 2042292SN/A 2052292SN/A opts.dir_ignore = frozenset(opts.dir_ignore.split(',')) 2062292SN/A opts.file_ignore = frozenset(opts.file_ignore.split(',')) 20713429Srekai.gonzalezalberquilla@arm.com opts.languages = frozenset(opts.languages.split(',')) 2082292SN/A 2096221Snate@binkert.org return opts,args 2102292SN/A 2112292SN/Aif __name__ == '__main__': 2122292SN/A parser = options() 2132292SN/A opts, args = parse_args(parser) 2142292SN/A 2152292SN/A for base in args: 21613429Srekai.gonzalezalberquilla@arm.com for filename,language in find_files(base, languages=opts.languages, 2172292SN/A file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore): 2186221Snate@binkert.org if opts.dry_run: 2192292SN/A print "%s: %s" % (filename, language) 2202292SN/A else: 2212292SN/A update_file(filename, filename, language, SortIncludes()) 2222292SN/A