file_types.py revision 11403
111403Sandreas.sandberg@arm.comimport os 211403Sandreas.sandberg@arm.com 311403Sandreas.sandberg@arm.com# lanuage type for each file extension 411403Sandreas.sandberg@arm.comlang_types = { 511403Sandreas.sandberg@arm.com '.c' : "C", 611403Sandreas.sandberg@arm.com '.cl' : "C", 711403Sandreas.sandberg@arm.com '.h' : "C", 811403Sandreas.sandberg@arm.com '.cc' : "C++", 911403Sandreas.sandberg@arm.com '.hh' : "C++", 1011403Sandreas.sandberg@arm.com '.cxx' : "C++", 1111403Sandreas.sandberg@arm.com '.hxx' : "C++", 1211403Sandreas.sandberg@arm.com '.cpp' : "C++", 1311403Sandreas.sandberg@arm.com '.hpp' : "C++", 1411403Sandreas.sandberg@arm.com '.C' : "C++", 1511403Sandreas.sandberg@arm.com '.H' : "C++", 1611403Sandreas.sandberg@arm.com '.i' : "swig", 1711403Sandreas.sandberg@arm.com '.py' : "python", 1811403Sandreas.sandberg@arm.com '.pl' : "perl", 1911403Sandreas.sandberg@arm.com '.pm' : "perl", 2011403Sandreas.sandberg@arm.com '.s' : "asm", 2111403Sandreas.sandberg@arm.com '.S' : "asm", 2211403Sandreas.sandberg@arm.com '.l' : "lex", 2311403Sandreas.sandberg@arm.com '.ll' : "lex", 2411403Sandreas.sandberg@arm.com '.y' : "yacc", 2511403Sandreas.sandberg@arm.com '.yy' : "yacc", 2611403Sandreas.sandberg@arm.com '.isa' : "isa", 2711403Sandreas.sandberg@arm.com '.sh' : "shell", 2811403Sandreas.sandberg@arm.com '.slicc' : "slicc", 2911403Sandreas.sandberg@arm.com '.sm' : "slicc", 3011403Sandreas.sandberg@arm.com '.awk' : "awk", 3111403Sandreas.sandberg@arm.com '.el' : "lisp", 3211403Sandreas.sandberg@arm.com '.txt' : "text", 3311403Sandreas.sandberg@arm.com '.tex' : "tex", 3411403Sandreas.sandberg@arm.com '.mk' : "make", 3511403Sandreas.sandberg@arm.com } 3611403Sandreas.sandberg@arm.com 3711403Sandreas.sandberg@arm.com# languages based on file prefix 3811403Sandreas.sandberg@arm.comlang_prefixes = ( 3911403Sandreas.sandberg@arm.com ('SCons', 'scons'), 4011403Sandreas.sandberg@arm.com ('Make', 'make'), 4111403Sandreas.sandberg@arm.com ('make', 'make'), 4211403Sandreas.sandberg@arm.com ('Doxyfile', 'doxygen'), 4311403Sandreas.sandberg@arm.com ) 4411403Sandreas.sandberg@arm.com 4511403Sandreas.sandberg@arm.com# languages based on #! line of first file 4611403Sandreas.sandberg@arm.comhash_bang = ( 4711403Sandreas.sandberg@arm.com ('python', 'python'), 4811403Sandreas.sandberg@arm.com ('perl', 'perl'), 4911403Sandreas.sandberg@arm.com ('sh', 'shell'), 5011403Sandreas.sandberg@arm.com ) 5111403Sandreas.sandberg@arm.com 5211403Sandreas.sandberg@arm.com# the list of all languages that we detect 5311403Sandreas.sandberg@arm.comall_languages = frozenset(lang_types.itervalues()) 5411403Sandreas.sandberg@arm.comall_languages |= frozenset(lang for start,lang in lang_prefixes) 5511403Sandreas.sandberg@arm.comall_languages |= frozenset(lang for start,lang in hash_bang) 5611403Sandreas.sandberg@arm.com 5711403Sandreas.sandberg@arm.comdef lang_type(filename, firstline=None, openok=True): 5811403Sandreas.sandberg@arm.com '''identify the language of a given filename and potentially the 5911403Sandreas.sandberg@arm.com firstline of the file. If the firstline of the file is not 6011403Sandreas.sandberg@arm.com provided and openok is True, open the file and read the first line 6111403Sandreas.sandberg@arm.com if necessary''' 6211403Sandreas.sandberg@arm.com 6311403Sandreas.sandberg@arm.com basename = os.path.basename(filename) 6411403Sandreas.sandberg@arm.com name,extension = os.path.splitext(basename) 6511403Sandreas.sandberg@arm.com 6611403Sandreas.sandberg@arm.com # first try to detect language based on file extension 6711403Sandreas.sandberg@arm.com try: 6811403Sandreas.sandberg@arm.com return lang_types[extension] 6911403Sandreas.sandberg@arm.com except KeyError: 7011403Sandreas.sandberg@arm.com pass 7111403Sandreas.sandberg@arm.com 7211403Sandreas.sandberg@arm.com # now try to detect language based on file prefix 7311403Sandreas.sandberg@arm.com for start,lang in lang_prefixes: 7411403Sandreas.sandberg@arm.com if basename.startswith(start): 7511403Sandreas.sandberg@arm.com return lang 7611403Sandreas.sandberg@arm.com 7711403Sandreas.sandberg@arm.com # if a first line was not provided but the file is ok to open, 7811403Sandreas.sandberg@arm.com # grab the first line of the file. 7911403Sandreas.sandberg@arm.com if firstline is None and openok: 8011403Sandreas.sandberg@arm.com handle = file(filename, 'r') 8111403Sandreas.sandberg@arm.com firstline = handle.readline() 8211403Sandreas.sandberg@arm.com handle.close() 8311403Sandreas.sandberg@arm.com 8411403Sandreas.sandberg@arm.com # try to detect language based on #! in first line 8511403Sandreas.sandberg@arm.com if firstline and firstline.startswith('#!'): 8611403Sandreas.sandberg@arm.com for string,lang in hash_bang: 8711403Sandreas.sandberg@arm.com if firstline.find(string) > 0: 8811403Sandreas.sandberg@arm.com return lang 8911403Sandreas.sandberg@arm.com 9011403Sandreas.sandberg@arm.com # sorry, we couldn't detect the language 9111403Sandreas.sandberg@arm.com return None 9211403Sandreas.sandberg@arm.com 9311403Sandreas.sandberg@arm.com# directories and files to ignore by default 9411403Sandreas.sandberg@arm.comdefault_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext')) 9511403Sandreas.sandberg@arm.comdefault_file_ignore = frozenset(('parsetab.py', )) 9611403Sandreas.sandberg@arm.com 9711403Sandreas.sandberg@arm.comdef find_files(base, languages=all_languages, 9811403Sandreas.sandberg@arm.com dir_ignore=default_dir_ignore, 9911403Sandreas.sandberg@arm.com file_ignore=default_file_ignore): 10011403Sandreas.sandberg@arm.com '''find all files in a directory and its subdirectories based on a 10111403Sandreas.sandberg@arm.com set of languages, ignore directories specified in dir_ignore and 10211403Sandreas.sandberg@arm.com files specified in file_ignore''' 10311403Sandreas.sandberg@arm.com if base[-1] != '/': 10411403Sandreas.sandberg@arm.com base += '/' 10511403Sandreas.sandberg@arm.com 10611403Sandreas.sandberg@arm.com def update_dirs(dirs): 10711403Sandreas.sandberg@arm.com '''strip the ignored directories out of the provided list''' 10811403Sandreas.sandberg@arm.com index = len(dirs) - 1 10911403Sandreas.sandberg@arm.com for i,d in enumerate(reversed(dirs)): 11011403Sandreas.sandberg@arm.com if d in dir_ignore: 11111403Sandreas.sandberg@arm.com del dirs[index - i] 11211403Sandreas.sandberg@arm.com 11311403Sandreas.sandberg@arm.com # walk over base 11411403Sandreas.sandberg@arm.com for root,dirs,files in os.walk(base): 11511403Sandreas.sandberg@arm.com root = root.replace(base, '', 1) 11611403Sandreas.sandberg@arm.com 11711403Sandreas.sandberg@arm.com # strip ignored directories from the list 11811403Sandreas.sandberg@arm.com update_dirs(dirs) 11911403Sandreas.sandberg@arm.com 12011403Sandreas.sandberg@arm.com for filename in files: 12111403Sandreas.sandberg@arm.com if filename in file_ignore: 12211403Sandreas.sandberg@arm.com # skip ignored files 12311403Sandreas.sandberg@arm.com continue 12411403Sandreas.sandberg@arm.com 12511403Sandreas.sandberg@arm.com # try to figure out the language of the specified file 12611403Sandreas.sandberg@arm.com fullpath = os.path.join(base, root, filename) 12711403Sandreas.sandberg@arm.com language = lang_type(fullpath) 12811403Sandreas.sandberg@arm.com 12911403Sandreas.sandberg@arm.com # if the file is one of the langauges that we want return 13011403Sandreas.sandberg@arm.com # its name and the language 13111403Sandreas.sandberg@arm.com if language in languages: 13211403Sandreas.sandberg@arm.com yield fullpath, language 13311403Sandreas.sandberg@arm.com 13411403Sandreas.sandberg@arm.comdef update_file(dst, src, language, mutator): 13511403Sandreas.sandberg@arm.com '''update a file of the specified language with the provided 13611403Sandreas.sandberg@arm.com mutator generator. If inplace is provided, update the file in 13711403Sandreas.sandberg@arm.com place and return the handle to the updated file. If inplace is 13811403Sandreas.sandberg@arm.com false, write the updated file to cStringIO''' 13911403Sandreas.sandberg@arm.com 14011403Sandreas.sandberg@arm.com # if the source and destination are the same, we're updating in place 14111403Sandreas.sandberg@arm.com inplace = dst == src 14211403Sandreas.sandberg@arm.com 14311403Sandreas.sandberg@arm.com if isinstance(src, str): 14411403Sandreas.sandberg@arm.com # if a filename was provided, open the file 14511403Sandreas.sandberg@arm.com if inplace: 14611403Sandreas.sandberg@arm.com mode = 'r+' 14711403Sandreas.sandberg@arm.com else: 14811403Sandreas.sandberg@arm.com mode = 'r' 14911403Sandreas.sandberg@arm.com src = file(src, mode) 15011403Sandreas.sandberg@arm.com 15111403Sandreas.sandberg@arm.com orig_lines = [] 15211403Sandreas.sandberg@arm.com 15311403Sandreas.sandberg@arm.com # grab all of the lines of the file and strip them of their line ending 15411403Sandreas.sandberg@arm.com old_lines = list(line.rstrip('\r\n') for line in src.xreadlines()) 15511403Sandreas.sandberg@arm.com new_lines = list(mutator(old_lines, src.name, language)) 15611403Sandreas.sandberg@arm.com 15711403Sandreas.sandberg@arm.com for line in src.xreadlines(): 15811403Sandreas.sandberg@arm.com line = line 15911403Sandreas.sandberg@arm.com 16011403Sandreas.sandberg@arm.com if inplace: 16111403Sandreas.sandberg@arm.com # if we're updating in place and the file hasn't changed, do nothing 16211403Sandreas.sandberg@arm.com if old_lines == new_lines: 16311403Sandreas.sandberg@arm.com return 16411403Sandreas.sandberg@arm.com 16511403Sandreas.sandberg@arm.com # otherwise, truncate the file and seek to the beginning. 16611403Sandreas.sandberg@arm.com dst = src 16711403Sandreas.sandberg@arm.com dst.truncate(0) 16811403Sandreas.sandberg@arm.com dst.seek(0) 16911403Sandreas.sandberg@arm.com elif isinstance(dst, str): 17011403Sandreas.sandberg@arm.com # if we're not updating in place and a destination file name 17111403Sandreas.sandberg@arm.com # was provided, create a file object 17211403Sandreas.sandberg@arm.com dst = file(dst, 'w') 17311403Sandreas.sandberg@arm.com 17411403Sandreas.sandberg@arm.com for line in new_lines: 17511403Sandreas.sandberg@arm.com dst.write(line) 17611403Sandreas.sandberg@arm.com dst.write('\n') 177