file_types.py revision 11508
111508Sandreas.sandberg@arm.com# Copyright (c) 2010 The Hewlett-Packard Development Company 211508Sandreas.sandberg@arm.com# All rights reserved. 311508Sandreas.sandberg@arm.com# 411508Sandreas.sandberg@arm.com# Redistribution and use in source and binary forms, with or without 511508Sandreas.sandberg@arm.com# modification, are permitted provided that the following conditions are 611508Sandreas.sandberg@arm.com# met: redistributions of source code must retain the above copyright 711508Sandreas.sandberg@arm.com# notice, this list of conditions and the following disclaimer; 811508Sandreas.sandberg@arm.com# redistributions in binary form must reproduce the above copyright 911508Sandreas.sandberg@arm.com# notice, this list of conditions and the following disclaimer in the 1011508Sandreas.sandberg@arm.com# documentation and/or other materials provided with the distribution; 1111508Sandreas.sandberg@arm.com# neither the name of the copyright holders nor the names of its 1211508Sandreas.sandberg@arm.com# contributors may be used to endorse or promote products derived from 1311508Sandreas.sandberg@arm.com# this software without specific prior written permission. 1411508Sandreas.sandberg@arm.com# 1511508Sandreas.sandberg@arm.com# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1611508Sandreas.sandberg@arm.com# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1711508Sandreas.sandberg@arm.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1811508Sandreas.sandberg@arm.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1911508Sandreas.sandberg@arm.com# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2011508Sandreas.sandberg@arm.com# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 2111508Sandreas.sandberg@arm.com# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2211508Sandreas.sandberg@arm.com# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2311508Sandreas.sandberg@arm.com# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2411508Sandreas.sandberg@arm.com# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 2511508Sandreas.sandberg@arm.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2611508Sandreas.sandberg@arm.com# 2711508Sandreas.sandberg@arm.com# Authors: Nathan Binkert 2811508Sandreas.sandberg@arm.com 2911403Sandreas.sandberg@arm.comimport os 3011403Sandreas.sandberg@arm.com 3111403Sandreas.sandberg@arm.com# lanuage type for each file extension 3211403Sandreas.sandberg@arm.comlang_types = { 3311403Sandreas.sandberg@arm.com '.c' : "C", 3411403Sandreas.sandberg@arm.com '.cl' : "C", 3511403Sandreas.sandberg@arm.com '.h' : "C", 3611403Sandreas.sandberg@arm.com '.cc' : "C++", 3711403Sandreas.sandberg@arm.com '.hh' : "C++", 3811403Sandreas.sandberg@arm.com '.cxx' : "C++", 3911403Sandreas.sandberg@arm.com '.hxx' : "C++", 4011403Sandreas.sandberg@arm.com '.cpp' : "C++", 4111403Sandreas.sandberg@arm.com '.hpp' : "C++", 4211403Sandreas.sandberg@arm.com '.C' : "C++", 4311403Sandreas.sandberg@arm.com '.H' : "C++", 4411403Sandreas.sandberg@arm.com '.i' : "swig", 4511403Sandreas.sandberg@arm.com '.py' : "python", 4611403Sandreas.sandberg@arm.com '.pl' : "perl", 4711403Sandreas.sandberg@arm.com '.pm' : "perl", 4811403Sandreas.sandberg@arm.com '.s' : "asm", 4911403Sandreas.sandberg@arm.com '.S' : "asm", 5011403Sandreas.sandberg@arm.com '.l' : "lex", 5111403Sandreas.sandberg@arm.com '.ll' : "lex", 5211403Sandreas.sandberg@arm.com '.y' : "yacc", 5311403Sandreas.sandberg@arm.com '.yy' : "yacc", 5411403Sandreas.sandberg@arm.com '.isa' : "isa", 5511403Sandreas.sandberg@arm.com '.sh' : "shell", 5611403Sandreas.sandberg@arm.com '.slicc' : "slicc", 5711403Sandreas.sandberg@arm.com '.sm' : "slicc", 5811403Sandreas.sandberg@arm.com '.awk' : "awk", 5911403Sandreas.sandberg@arm.com '.el' : "lisp", 6011403Sandreas.sandberg@arm.com '.txt' : "text", 6111403Sandreas.sandberg@arm.com '.tex' : "tex", 6211403Sandreas.sandberg@arm.com '.mk' : "make", 6311403Sandreas.sandberg@arm.com } 6411403Sandreas.sandberg@arm.com 6511403Sandreas.sandberg@arm.com# languages based on file prefix 6611403Sandreas.sandberg@arm.comlang_prefixes = ( 6711403Sandreas.sandberg@arm.com ('SCons', 'scons'), 6811403Sandreas.sandberg@arm.com ('Make', 'make'), 6911403Sandreas.sandberg@arm.com ('make', 'make'), 7011403Sandreas.sandberg@arm.com ('Doxyfile', 'doxygen'), 7111403Sandreas.sandberg@arm.com ) 7211403Sandreas.sandberg@arm.com 7311403Sandreas.sandberg@arm.com# languages based on #! line of first file 7411403Sandreas.sandberg@arm.comhash_bang = ( 7511403Sandreas.sandberg@arm.com ('python', 'python'), 7611403Sandreas.sandberg@arm.com ('perl', 'perl'), 7711403Sandreas.sandberg@arm.com ('sh', 'shell'), 7811403Sandreas.sandberg@arm.com ) 7911403Sandreas.sandberg@arm.com 8011403Sandreas.sandberg@arm.com# the list of all languages that we detect 8111403Sandreas.sandberg@arm.comall_languages = frozenset(lang_types.itervalues()) 8211403Sandreas.sandberg@arm.comall_languages |= frozenset(lang for start,lang in lang_prefixes) 8311403Sandreas.sandberg@arm.comall_languages |= frozenset(lang for start,lang in hash_bang) 8411403Sandreas.sandberg@arm.com 8511403Sandreas.sandberg@arm.comdef lang_type(filename, firstline=None, openok=True): 8611403Sandreas.sandberg@arm.com '''identify the language of a given filename and potentially the 8711403Sandreas.sandberg@arm.com firstline of the file. If the firstline of the file is not 8811403Sandreas.sandberg@arm.com provided and openok is True, open the file and read the first line 8911403Sandreas.sandberg@arm.com if necessary''' 9011403Sandreas.sandberg@arm.com 9111403Sandreas.sandberg@arm.com basename = os.path.basename(filename) 9211403Sandreas.sandberg@arm.com name,extension = os.path.splitext(basename) 9311403Sandreas.sandberg@arm.com 9411403Sandreas.sandberg@arm.com # first try to detect language based on file extension 9511403Sandreas.sandberg@arm.com try: 9611403Sandreas.sandberg@arm.com return lang_types[extension] 9711403Sandreas.sandberg@arm.com except KeyError: 9811403Sandreas.sandberg@arm.com pass 9911403Sandreas.sandberg@arm.com 10011403Sandreas.sandberg@arm.com # now try to detect language based on file prefix 10111403Sandreas.sandberg@arm.com for start,lang in lang_prefixes: 10211403Sandreas.sandberg@arm.com if basename.startswith(start): 10311403Sandreas.sandberg@arm.com return lang 10411403Sandreas.sandberg@arm.com 10511403Sandreas.sandberg@arm.com # if a first line was not provided but the file is ok to open, 10611403Sandreas.sandberg@arm.com # grab the first line of the file. 10711403Sandreas.sandberg@arm.com if firstline is None and openok: 10811403Sandreas.sandberg@arm.com handle = file(filename, 'r') 10911403Sandreas.sandberg@arm.com firstline = handle.readline() 11011403Sandreas.sandberg@arm.com handle.close() 11111403Sandreas.sandberg@arm.com 11211403Sandreas.sandberg@arm.com # try to detect language based on #! in first line 11311403Sandreas.sandberg@arm.com if firstline and firstline.startswith('#!'): 11411403Sandreas.sandberg@arm.com for string,lang in hash_bang: 11511403Sandreas.sandberg@arm.com if firstline.find(string) > 0: 11611403Sandreas.sandberg@arm.com return lang 11711403Sandreas.sandberg@arm.com 11811403Sandreas.sandberg@arm.com # sorry, we couldn't detect the language 11911403Sandreas.sandberg@arm.com return None 12011403Sandreas.sandberg@arm.com 12111403Sandreas.sandberg@arm.com# directories and files to ignore by default 12211403Sandreas.sandberg@arm.comdefault_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext')) 12311403Sandreas.sandberg@arm.comdefault_file_ignore = frozenset(('parsetab.py', )) 12411403Sandreas.sandberg@arm.com 12511403Sandreas.sandberg@arm.comdef find_files(base, languages=all_languages, 12611403Sandreas.sandberg@arm.com dir_ignore=default_dir_ignore, 12711403Sandreas.sandberg@arm.com file_ignore=default_file_ignore): 12811403Sandreas.sandberg@arm.com '''find all files in a directory and its subdirectories based on a 12911403Sandreas.sandberg@arm.com set of languages, ignore directories specified in dir_ignore and 13011403Sandreas.sandberg@arm.com files specified in file_ignore''' 13111403Sandreas.sandberg@arm.com if base[-1] != '/': 13211403Sandreas.sandberg@arm.com base += '/' 13311403Sandreas.sandberg@arm.com 13411403Sandreas.sandberg@arm.com def update_dirs(dirs): 13511403Sandreas.sandberg@arm.com '''strip the ignored directories out of the provided list''' 13611403Sandreas.sandberg@arm.com index = len(dirs) - 1 13711403Sandreas.sandberg@arm.com for i,d in enumerate(reversed(dirs)): 13811403Sandreas.sandberg@arm.com if d in dir_ignore: 13911403Sandreas.sandberg@arm.com del dirs[index - i] 14011403Sandreas.sandberg@arm.com 14111403Sandreas.sandberg@arm.com # walk over base 14211403Sandreas.sandberg@arm.com for root,dirs,files in os.walk(base): 14311403Sandreas.sandberg@arm.com root = root.replace(base, '', 1) 14411403Sandreas.sandberg@arm.com 14511403Sandreas.sandberg@arm.com # strip ignored directories from the list 14611403Sandreas.sandberg@arm.com update_dirs(dirs) 14711403Sandreas.sandberg@arm.com 14811403Sandreas.sandberg@arm.com for filename in files: 14911403Sandreas.sandberg@arm.com if filename in file_ignore: 15011403Sandreas.sandberg@arm.com # skip ignored files 15111403Sandreas.sandberg@arm.com continue 15211403Sandreas.sandberg@arm.com 15311403Sandreas.sandberg@arm.com # try to figure out the language of the specified file 15411403Sandreas.sandberg@arm.com fullpath = os.path.join(base, root, filename) 15511403Sandreas.sandberg@arm.com language = lang_type(fullpath) 15611403Sandreas.sandberg@arm.com 15711403Sandreas.sandberg@arm.com # if the file is one of the langauges that we want return 15811403Sandreas.sandberg@arm.com # its name and the language 15911403Sandreas.sandberg@arm.com if language in languages: 16011403Sandreas.sandberg@arm.com yield fullpath, language 16111403Sandreas.sandberg@arm.com 16211403Sandreas.sandberg@arm.comdef update_file(dst, src, language, mutator): 16311403Sandreas.sandberg@arm.com '''update a file of the specified language with the provided 16411403Sandreas.sandberg@arm.com mutator generator. If inplace is provided, update the file in 16511403Sandreas.sandberg@arm.com place and return the handle to the updated file. If inplace is 16611403Sandreas.sandberg@arm.com false, write the updated file to cStringIO''' 16711403Sandreas.sandberg@arm.com 16811403Sandreas.sandberg@arm.com # if the source and destination are the same, we're updating in place 16911403Sandreas.sandberg@arm.com inplace = dst == src 17011403Sandreas.sandberg@arm.com 17111403Sandreas.sandberg@arm.com if isinstance(src, str): 17211403Sandreas.sandberg@arm.com # if a filename was provided, open the file 17311403Sandreas.sandberg@arm.com if inplace: 17411403Sandreas.sandberg@arm.com mode = 'r+' 17511403Sandreas.sandberg@arm.com else: 17611403Sandreas.sandberg@arm.com mode = 'r' 17711403Sandreas.sandberg@arm.com src = file(src, mode) 17811403Sandreas.sandberg@arm.com 17911403Sandreas.sandberg@arm.com orig_lines = [] 18011403Sandreas.sandberg@arm.com 18111403Sandreas.sandberg@arm.com # grab all of the lines of the file and strip them of their line ending 18211403Sandreas.sandberg@arm.com old_lines = list(line.rstrip('\r\n') for line in src.xreadlines()) 18311403Sandreas.sandberg@arm.com new_lines = list(mutator(old_lines, src.name, language)) 18411403Sandreas.sandberg@arm.com 18511403Sandreas.sandberg@arm.com for line in src.xreadlines(): 18611403Sandreas.sandberg@arm.com line = line 18711403Sandreas.sandberg@arm.com 18811403Sandreas.sandberg@arm.com if inplace: 18911403Sandreas.sandberg@arm.com # if we're updating in place and the file hasn't changed, do nothing 19011403Sandreas.sandberg@arm.com if old_lines == new_lines: 19111403Sandreas.sandberg@arm.com return 19211403Sandreas.sandberg@arm.com 19311403Sandreas.sandberg@arm.com # otherwise, truncate the file and seek to the beginning. 19411403Sandreas.sandberg@arm.com dst = src 19511403Sandreas.sandberg@arm.com dst.truncate(0) 19611403Sandreas.sandberg@arm.com dst.seek(0) 19711403Sandreas.sandberg@arm.com elif isinstance(dst, str): 19811403Sandreas.sandberg@arm.com # if we're not updating in place and a destination file name 19911403Sandreas.sandberg@arm.com # was provided, create a file object 20011403Sandreas.sandberg@arm.com dst = file(dst, 'w') 20111403Sandreas.sandberg@arm.com 20211403Sandreas.sandberg@arm.com for line in new_lines: 20311403Sandreas.sandberg@arm.com dst.write(line) 20411403Sandreas.sandberg@arm.com dst.write('\n') 205