file_types.py revision 11403
111403Sandreas.sandberg@arm.comimport os
211403Sandreas.sandberg@arm.com
311403Sandreas.sandberg@arm.com# lanuage type for each file extension
411403Sandreas.sandberg@arm.comlang_types = {
511403Sandreas.sandberg@arm.com    '.c'     : "C",
611403Sandreas.sandberg@arm.com    '.cl'    : "C",
711403Sandreas.sandberg@arm.com    '.h'     : "C",
811403Sandreas.sandberg@arm.com    '.cc'    : "C++",
911403Sandreas.sandberg@arm.com    '.hh'    : "C++",
1011403Sandreas.sandberg@arm.com    '.cxx'   : "C++",
1111403Sandreas.sandberg@arm.com    '.hxx'   : "C++",
1211403Sandreas.sandberg@arm.com    '.cpp'   : "C++",
1311403Sandreas.sandberg@arm.com    '.hpp'   : "C++",
1411403Sandreas.sandberg@arm.com    '.C'     : "C++",
1511403Sandreas.sandberg@arm.com    '.H'     : "C++",
1611403Sandreas.sandberg@arm.com    '.i'     : "swig",
1711403Sandreas.sandberg@arm.com    '.py'    : "python",
1811403Sandreas.sandberg@arm.com    '.pl'    : "perl",
1911403Sandreas.sandberg@arm.com    '.pm'    : "perl",
2011403Sandreas.sandberg@arm.com    '.s'     : "asm",
2111403Sandreas.sandberg@arm.com    '.S'     : "asm",
2211403Sandreas.sandberg@arm.com    '.l'     : "lex",
2311403Sandreas.sandberg@arm.com    '.ll'    : "lex",
2411403Sandreas.sandberg@arm.com    '.y'     : "yacc",
2511403Sandreas.sandberg@arm.com    '.yy'    : "yacc",
2611403Sandreas.sandberg@arm.com    '.isa'   : "isa",
2711403Sandreas.sandberg@arm.com    '.sh'    : "shell",
2811403Sandreas.sandberg@arm.com    '.slicc' : "slicc",
2911403Sandreas.sandberg@arm.com    '.sm'    : "slicc",
3011403Sandreas.sandberg@arm.com    '.awk'   : "awk",
3111403Sandreas.sandberg@arm.com    '.el'    : "lisp",
3211403Sandreas.sandberg@arm.com    '.txt'   : "text",
3311403Sandreas.sandberg@arm.com    '.tex'   : "tex",
3411403Sandreas.sandberg@arm.com    '.mk'    : "make",
3511403Sandreas.sandberg@arm.com    }
3611403Sandreas.sandberg@arm.com
3711403Sandreas.sandberg@arm.com# languages based on file prefix
3811403Sandreas.sandberg@arm.comlang_prefixes = (
3911403Sandreas.sandberg@arm.com    ('SCons',    'scons'),
4011403Sandreas.sandberg@arm.com    ('Make',     'make'),
4111403Sandreas.sandberg@arm.com    ('make',     'make'),
4211403Sandreas.sandberg@arm.com    ('Doxyfile', 'doxygen'),
4311403Sandreas.sandberg@arm.com    )
4411403Sandreas.sandberg@arm.com
4511403Sandreas.sandberg@arm.com# languages based on #! line of first file
4611403Sandreas.sandberg@arm.comhash_bang = (
4711403Sandreas.sandberg@arm.com    ('python', 'python'),
4811403Sandreas.sandberg@arm.com    ('perl',   'perl'),
4911403Sandreas.sandberg@arm.com    ('sh',     'shell'),
5011403Sandreas.sandberg@arm.com    )
5111403Sandreas.sandberg@arm.com
5211403Sandreas.sandberg@arm.com# the list of all languages that we detect
5311403Sandreas.sandberg@arm.comall_languages = frozenset(lang_types.itervalues())
5411403Sandreas.sandberg@arm.comall_languages |= frozenset(lang for start,lang in lang_prefixes)
5511403Sandreas.sandberg@arm.comall_languages |= frozenset(lang for start,lang in hash_bang)
5611403Sandreas.sandberg@arm.com
5711403Sandreas.sandberg@arm.comdef lang_type(filename, firstline=None, openok=True):
5811403Sandreas.sandberg@arm.com    '''identify the language of a given filename and potentially the
5911403Sandreas.sandberg@arm.com    firstline of the file.  If the firstline of the file is not
6011403Sandreas.sandberg@arm.com    provided and openok is True, open the file and read the first line
6111403Sandreas.sandberg@arm.com    if necessary'''
6211403Sandreas.sandberg@arm.com
6311403Sandreas.sandberg@arm.com    basename = os.path.basename(filename)
6411403Sandreas.sandberg@arm.com    name,extension = os.path.splitext(basename)
6511403Sandreas.sandberg@arm.com
6611403Sandreas.sandberg@arm.com    # first try to detect language based on file extension
6711403Sandreas.sandberg@arm.com    try:
6811403Sandreas.sandberg@arm.com        return lang_types[extension]
6911403Sandreas.sandberg@arm.com    except KeyError:
7011403Sandreas.sandberg@arm.com        pass
7111403Sandreas.sandberg@arm.com
7211403Sandreas.sandberg@arm.com    # now try to detect language based on file prefix
7311403Sandreas.sandberg@arm.com    for start,lang in lang_prefixes:
7411403Sandreas.sandberg@arm.com        if basename.startswith(start):
7511403Sandreas.sandberg@arm.com            return lang
7611403Sandreas.sandberg@arm.com
7711403Sandreas.sandberg@arm.com    # if a first line was not provided but the file is ok to open,
7811403Sandreas.sandberg@arm.com    # grab the first line of the file.
7911403Sandreas.sandberg@arm.com    if firstline is None and openok:
8011403Sandreas.sandberg@arm.com        handle = file(filename, 'r')
8111403Sandreas.sandberg@arm.com        firstline = handle.readline()
8211403Sandreas.sandberg@arm.com        handle.close()
8311403Sandreas.sandberg@arm.com
8411403Sandreas.sandberg@arm.com    # try to detect language based on #! in first line
8511403Sandreas.sandberg@arm.com    if firstline and firstline.startswith('#!'):
8611403Sandreas.sandberg@arm.com        for string,lang in hash_bang:
8711403Sandreas.sandberg@arm.com            if firstline.find(string) > 0:
8811403Sandreas.sandberg@arm.com                return lang
8911403Sandreas.sandberg@arm.com
9011403Sandreas.sandberg@arm.com    # sorry, we couldn't detect the language
9111403Sandreas.sandberg@arm.com    return None
9211403Sandreas.sandberg@arm.com
9311403Sandreas.sandberg@arm.com# directories and files to ignore by default
9411403Sandreas.sandberg@arm.comdefault_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext'))
9511403Sandreas.sandberg@arm.comdefault_file_ignore = frozenset(('parsetab.py', ))
9611403Sandreas.sandberg@arm.com
9711403Sandreas.sandberg@arm.comdef find_files(base, languages=all_languages,
9811403Sandreas.sandberg@arm.com               dir_ignore=default_dir_ignore,
9911403Sandreas.sandberg@arm.com               file_ignore=default_file_ignore):
10011403Sandreas.sandberg@arm.com    '''find all files in a directory and its subdirectories based on a
10111403Sandreas.sandberg@arm.com    set of languages, ignore directories specified in dir_ignore and
10211403Sandreas.sandberg@arm.com    files specified in file_ignore'''
10311403Sandreas.sandberg@arm.com    if base[-1] != '/':
10411403Sandreas.sandberg@arm.com        base += '/'
10511403Sandreas.sandberg@arm.com
10611403Sandreas.sandberg@arm.com    def update_dirs(dirs):
10711403Sandreas.sandberg@arm.com        '''strip the ignored directories out of the provided list'''
10811403Sandreas.sandberg@arm.com        index = len(dirs) - 1
10911403Sandreas.sandberg@arm.com        for i,d in enumerate(reversed(dirs)):
11011403Sandreas.sandberg@arm.com            if d in dir_ignore:
11111403Sandreas.sandberg@arm.com                del dirs[index - i]
11211403Sandreas.sandberg@arm.com
11311403Sandreas.sandberg@arm.com    # walk over base
11411403Sandreas.sandberg@arm.com    for root,dirs,files in os.walk(base):
11511403Sandreas.sandberg@arm.com        root = root.replace(base, '', 1)
11611403Sandreas.sandberg@arm.com
11711403Sandreas.sandberg@arm.com        # strip ignored directories from the list
11811403Sandreas.sandberg@arm.com        update_dirs(dirs)
11911403Sandreas.sandberg@arm.com
12011403Sandreas.sandberg@arm.com        for filename in files:
12111403Sandreas.sandberg@arm.com            if filename in file_ignore:
12211403Sandreas.sandberg@arm.com                # skip ignored files
12311403Sandreas.sandberg@arm.com                continue
12411403Sandreas.sandberg@arm.com
12511403Sandreas.sandberg@arm.com            # try to figure out the language of the specified file
12611403Sandreas.sandberg@arm.com            fullpath = os.path.join(base, root, filename)
12711403Sandreas.sandberg@arm.com            language = lang_type(fullpath)
12811403Sandreas.sandberg@arm.com
12911403Sandreas.sandberg@arm.com            # if the file is one of the langauges that we want return
13011403Sandreas.sandberg@arm.com            # its name and the language
13111403Sandreas.sandberg@arm.com            if language in languages:
13211403Sandreas.sandberg@arm.com                yield fullpath, language
13311403Sandreas.sandberg@arm.com
13411403Sandreas.sandberg@arm.comdef update_file(dst, src, language, mutator):
13511403Sandreas.sandberg@arm.com    '''update a file of the specified language with the provided
13611403Sandreas.sandberg@arm.com    mutator generator.  If inplace is provided, update the file in
13711403Sandreas.sandberg@arm.com    place and return the handle to the updated file.  If inplace is
13811403Sandreas.sandberg@arm.com    false, write the updated file to cStringIO'''
13911403Sandreas.sandberg@arm.com
14011403Sandreas.sandberg@arm.com    # if the source and destination are the same, we're updating in place
14111403Sandreas.sandberg@arm.com    inplace = dst == src
14211403Sandreas.sandberg@arm.com
14311403Sandreas.sandberg@arm.com    if isinstance(src, str):
14411403Sandreas.sandberg@arm.com        # if a filename was provided, open the file
14511403Sandreas.sandberg@arm.com        if inplace:
14611403Sandreas.sandberg@arm.com            mode = 'r+'
14711403Sandreas.sandberg@arm.com        else:
14811403Sandreas.sandberg@arm.com            mode = 'r'
14911403Sandreas.sandberg@arm.com        src = file(src, mode)
15011403Sandreas.sandberg@arm.com
15111403Sandreas.sandberg@arm.com    orig_lines = []
15211403Sandreas.sandberg@arm.com
15311403Sandreas.sandberg@arm.com    # grab all of the lines of the file and strip them of their line ending
15411403Sandreas.sandberg@arm.com    old_lines = list(line.rstrip('\r\n') for line in src.xreadlines())
15511403Sandreas.sandberg@arm.com    new_lines = list(mutator(old_lines, src.name, language))
15611403Sandreas.sandberg@arm.com
15711403Sandreas.sandberg@arm.com    for line in src.xreadlines():
15811403Sandreas.sandberg@arm.com        line = line
15911403Sandreas.sandberg@arm.com
16011403Sandreas.sandberg@arm.com    if inplace:
16111403Sandreas.sandberg@arm.com        # if we're updating in place and the file hasn't changed, do nothing
16211403Sandreas.sandberg@arm.com        if old_lines == new_lines:
16311403Sandreas.sandberg@arm.com            return
16411403Sandreas.sandberg@arm.com
16511403Sandreas.sandberg@arm.com        # otherwise, truncate the file and seek to the beginning.
16611403Sandreas.sandberg@arm.com        dst = src
16711403Sandreas.sandberg@arm.com        dst.truncate(0)
16811403Sandreas.sandberg@arm.com        dst.seek(0)
16911403Sandreas.sandberg@arm.com    elif isinstance(dst, str):
17011403Sandreas.sandberg@arm.com        # if we're not updating in place and a destination file name
17111403Sandreas.sandberg@arm.com        # was provided, create a file object
17211403Sandreas.sandberg@arm.com        dst = file(dst, 'w')
17311403Sandreas.sandberg@arm.com
17411403Sandreas.sandberg@arm.com    for line in new_lines:
17511403Sandreas.sandberg@arm.com        dst.write(line)
17611403Sandreas.sandberg@arm.com        dst.write('\n')
177