111508Sandreas.sandberg@arm.com# Copyright (c) 2010 The Hewlett-Packard Development Company
211508Sandreas.sandberg@arm.com# All rights reserved.
311508Sandreas.sandberg@arm.com#
411508Sandreas.sandberg@arm.com# Redistribution and use in source and binary forms, with or without
511508Sandreas.sandberg@arm.com# modification, are permitted provided that the following conditions are
611508Sandreas.sandberg@arm.com# met: redistributions of source code must retain the above copyright
711508Sandreas.sandberg@arm.com# notice, this list of conditions and the following disclaimer;
811508Sandreas.sandberg@arm.com# redistributions in binary form must reproduce the above copyright
911508Sandreas.sandberg@arm.com# notice, this list of conditions and the following disclaimer in the
1011508Sandreas.sandberg@arm.com# documentation and/or other materials provided with the distribution;
1111508Sandreas.sandberg@arm.com# neither the name of the copyright holders nor the names of its
1211508Sandreas.sandberg@arm.com# contributors may be used to endorse or promote products derived from
1311508Sandreas.sandberg@arm.com# this software without specific prior written permission.
1411508Sandreas.sandberg@arm.com#
1511508Sandreas.sandberg@arm.com# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1611508Sandreas.sandberg@arm.com# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1711508Sandreas.sandberg@arm.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1811508Sandreas.sandberg@arm.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1911508Sandreas.sandberg@arm.com# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2011508Sandreas.sandberg@arm.com# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2111508Sandreas.sandberg@arm.com# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2211508Sandreas.sandberg@arm.com# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2311508Sandreas.sandberg@arm.com# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2411508Sandreas.sandberg@arm.com# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2511508Sandreas.sandberg@arm.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2611508Sandreas.sandberg@arm.com#
2711508Sandreas.sandberg@arm.com# Authors: Nathan Binkert
2811508Sandreas.sandberg@arm.com
2911403Sandreas.sandberg@arm.comimport os
3011403Sandreas.sandberg@arm.com
3111403Sandreas.sandberg@arm.com# lanuage type for each file extension
3211403Sandreas.sandberg@arm.comlang_types = {
3311403Sandreas.sandberg@arm.com    '.c'     : "C",
3411403Sandreas.sandberg@arm.com    '.cl'    : "C",
3511403Sandreas.sandberg@arm.com    '.h'     : "C",
3611403Sandreas.sandberg@arm.com    '.cc'    : "C++",
3711403Sandreas.sandberg@arm.com    '.hh'    : "C++",
3811403Sandreas.sandberg@arm.com    '.cxx'   : "C++",
3911403Sandreas.sandberg@arm.com    '.hxx'   : "C++",
4011403Sandreas.sandberg@arm.com    '.cpp'   : "C++",
4111403Sandreas.sandberg@arm.com    '.hpp'   : "C++",
4211403Sandreas.sandberg@arm.com    '.C'     : "C++",
4311403Sandreas.sandberg@arm.com    '.H'     : "C++",
4411403Sandreas.sandberg@arm.com    '.i'     : "swig",
4511403Sandreas.sandberg@arm.com    '.py'    : "python",
4611403Sandreas.sandberg@arm.com    '.pl'    : "perl",
4711403Sandreas.sandberg@arm.com    '.pm'    : "perl",
4811403Sandreas.sandberg@arm.com    '.s'     : "asm",
4911403Sandreas.sandberg@arm.com    '.S'     : "asm",
5011403Sandreas.sandberg@arm.com    '.l'     : "lex",
5111403Sandreas.sandberg@arm.com    '.ll'    : "lex",
5211403Sandreas.sandberg@arm.com    '.y'     : "yacc",
5311403Sandreas.sandberg@arm.com    '.yy'    : "yacc",
5411403Sandreas.sandberg@arm.com    '.isa'   : "isa",
5511403Sandreas.sandberg@arm.com    '.sh'    : "shell",
5611403Sandreas.sandberg@arm.com    '.slicc' : "slicc",
5711403Sandreas.sandberg@arm.com    '.sm'    : "slicc",
5811403Sandreas.sandberg@arm.com    '.awk'   : "awk",
5911403Sandreas.sandberg@arm.com    '.el'    : "lisp",
6011403Sandreas.sandberg@arm.com    '.txt'   : "text",
6111403Sandreas.sandberg@arm.com    '.tex'   : "tex",
6211403Sandreas.sandberg@arm.com    '.mk'    : "make",
6311541Sgabor.dozsa@arm.com    '.dts'    : "dts",
6411403Sandreas.sandberg@arm.com    }
6511403Sandreas.sandberg@arm.com
6611403Sandreas.sandberg@arm.com# languages based on file prefix
6711403Sandreas.sandberg@arm.comlang_prefixes = (
6811403Sandreas.sandberg@arm.com    ('SCons',    'scons'),
6911403Sandreas.sandberg@arm.com    ('Make',     'make'),
7011403Sandreas.sandberg@arm.com    ('make',     'make'),
7111403Sandreas.sandberg@arm.com    ('Doxyfile', 'doxygen'),
7211403Sandreas.sandberg@arm.com    )
7311403Sandreas.sandberg@arm.com
7411403Sandreas.sandberg@arm.com# languages based on #! line of first file
7511403Sandreas.sandberg@arm.comhash_bang = (
7611403Sandreas.sandberg@arm.com    ('python', 'python'),
7711403Sandreas.sandberg@arm.com    ('perl',   'perl'),
7811403Sandreas.sandberg@arm.com    ('sh',     'shell'),
7911403Sandreas.sandberg@arm.com    )
8011403Sandreas.sandberg@arm.com
8111403Sandreas.sandberg@arm.com# the list of all languages that we detect
8211403Sandreas.sandberg@arm.comall_languages = frozenset(lang_types.itervalues())
8311403Sandreas.sandberg@arm.comall_languages |= frozenset(lang for start,lang in lang_prefixes)
8411403Sandreas.sandberg@arm.comall_languages |= frozenset(lang for start,lang in hash_bang)
8511403Sandreas.sandberg@arm.com
8611403Sandreas.sandberg@arm.comdef lang_type(filename, firstline=None, openok=True):
8711403Sandreas.sandberg@arm.com    '''identify the language of a given filename and potentially the
8811403Sandreas.sandberg@arm.com    firstline of the file.  If the firstline of the file is not
8911403Sandreas.sandberg@arm.com    provided and openok is True, open the file and read the first line
9011403Sandreas.sandberg@arm.com    if necessary'''
9111403Sandreas.sandberg@arm.com
9211403Sandreas.sandberg@arm.com    basename = os.path.basename(filename)
9311403Sandreas.sandberg@arm.com    name,extension = os.path.splitext(basename)
9411403Sandreas.sandberg@arm.com
9511403Sandreas.sandberg@arm.com    # first try to detect language based on file extension
9611403Sandreas.sandberg@arm.com    try:
9711403Sandreas.sandberg@arm.com        return lang_types[extension]
9811403Sandreas.sandberg@arm.com    except KeyError:
9911403Sandreas.sandberg@arm.com        pass
10011403Sandreas.sandberg@arm.com
10111403Sandreas.sandberg@arm.com    # now try to detect language based on file prefix
10211403Sandreas.sandberg@arm.com    for start,lang in lang_prefixes:
10311403Sandreas.sandberg@arm.com        if basename.startswith(start):
10411403Sandreas.sandberg@arm.com            return lang
10511403Sandreas.sandberg@arm.com
10611403Sandreas.sandberg@arm.com    # if a first line was not provided but the file is ok to open,
10711403Sandreas.sandberg@arm.com    # grab the first line of the file.
10811403Sandreas.sandberg@arm.com    if firstline is None and openok:
10911403Sandreas.sandberg@arm.com        handle = file(filename, 'r')
11011403Sandreas.sandberg@arm.com        firstline = handle.readline()
11111403Sandreas.sandberg@arm.com        handle.close()
11211403Sandreas.sandberg@arm.com
11311403Sandreas.sandberg@arm.com    # try to detect language based on #! in first line
11411403Sandreas.sandberg@arm.com    if firstline and firstline.startswith('#!'):
11511403Sandreas.sandberg@arm.com        for string,lang in hash_bang:
11611403Sandreas.sandberg@arm.com            if firstline.find(string) > 0:
11711403Sandreas.sandberg@arm.com                return lang
11811403Sandreas.sandberg@arm.com
11911403Sandreas.sandberg@arm.com    # sorry, we couldn't detect the language
12011403Sandreas.sandberg@arm.com    return None
12111403Sandreas.sandberg@arm.com
12211403Sandreas.sandberg@arm.com# directories and files to ignore by default
12311403Sandreas.sandberg@arm.comdefault_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext'))
12411403Sandreas.sandberg@arm.comdefault_file_ignore = frozenset(('parsetab.py', ))
12511403Sandreas.sandberg@arm.com
12611403Sandreas.sandberg@arm.comdef find_files(base, languages=all_languages,
12711403Sandreas.sandberg@arm.com               dir_ignore=default_dir_ignore,
12811403Sandreas.sandberg@arm.com               file_ignore=default_file_ignore):
12911403Sandreas.sandberg@arm.com    '''find all files in a directory and its subdirectories based on a
13011403Sandreas.sandberg@arm.com    set of languages, ignore directories specified in dir_ignore and
13111403Sandreas.sandberg@arm.com    files specified in file_ignore'''
13211403Sandreas.sandberg@arm.com    if base[-1] != '/':
13311403Sandreas.sandberg@arm.com        base += '/'
13411403Sandreas.sandberg@arm.com
13511403Sandreas.sandberg@arm.com    def update_dirs(dirs):
13611403Sandreas.sandberg@arm.com        '''strip the ignored directories out of the provided list'''
13711403Sandreas.sandberg@arm.com        index = len(dirs) - 1
13811403Sandreas.sandberg@arm.com        for i,d in enumerate(reversed(dirs)):
13911403Sandreas.sandberg@arm.com            if d in dir_ignore:
14011403Sandreas.sandberg@arm.com                del dirs[index - i]
14111403Sandreas.sandberg@arm.com
14211403Sandreas.sandberg@arm.com    # walk over base
14311403Sandreas.sandberg@arm.com    for root,dirs,files in os.walk(base):
14411403Sandreas.sandberg@arm.com        root = root.replace(base, '', 1)
14511403Sandreas.sandberg@arm.com
14611403Sandreas.sandberg@arm.com        # strip ignored directories from the list
14711403Sandreas.sandberg@arm.com        update_dirs(dirs)
14811403Sandreas.sandberg@arm.com
14911403Sandreas.sandberg@arm.com        for filename in files:
15011403Sandreas.sandberg@arm.com            if filename in file_ignore:
15111403Sandreas.sandberg@arm.com                # skip ignored files
15211403Sandreas.sandberg@arm.com                continue
15311403Sandreas.sandberg@arm.com
15411403Sandreas.sandberg@arm.com            # try to figure out the language of the specified file
15511403Sandreas.sandberg@arm.com            fullpath = os.path.join(base, root, filename)
15611403Sandreas.sandberg@arm.com            language = lang_type(fullpath)
15711403Sandreas.sandberg@arm.com
15811403Sandreas.sandberg@arm.com            # if the file is one of the langauges that we want return
15911403Sandreas.sandberg@arm.com            # its name and the language
16011403Sandreas.sandberg@arm.com            if language in languages:
16111403Sandreas.sandberg@arm.com                yield fullpath, language
16211403Sandreas.sandberg@arm.com
16311403Sandreas.sandberg@arm.comdef update_file(dst, src, language, mutator):
16411403Sandreas.sandberg@arm.com    '''update a file of the specified language with the provided
16511403Sandreas.sandberg@arm.com    mutator generator.  If inplace is provided, update the file in
16611403Sandreas.sandberg@arm.com    place and return the handle to the updated file.  If inplace is
16711403Sandreas.sandberg@arm.com    false, write the updated file to cStringIO'''
16811403Sandreas.sandberg@arm.com
16911403Sandreas.sandberg@arm.com    # if the source and destination are the same, we're updating in place
17011403Sandreas.sandberg@arm.com    inplace = dst == src
17111403Sandreas.sandberg@arm.com
17211403Sandreas.sandberg@arm.com    if isinstance(src, str):
17311403Sandreas.sandberg@arm.com        # if a filename was provided, open the file
17411403Sandreas.sandberg@arm.com        if inplace:
17511403Sandreas.sandberg@arm.com            mode = 'r+'
17611403Sandreas.sandberg@arm.com        else:
17711403Sandreas.sandberg@arm.com            mode = 'r'
17811403Sandreas.sandberg@arm.com        src = file(src, mode)
17911403Sandreas.sandberg@arm.com
18011403Sandreas.sandberg@arm.com    orig_lines = []
18111403Sandreas.sandberg@arm.com
18211403Sandreas.sandberg@arm.com    # grab all of the lines of the file and strip them of their line ending
18311403Sandreas.sandberg@arm.com    old_lines = list(line.rstrip('\r\n') for line in src.xreadlines())
18411403Sandreas.sandberg@arm.com    new_lines = list(mutator(old_lines, src.name, language))
18511403Sandreas.sandberg@arm.com
18611403Sandreas.sandberg@arm.com    for line in src.xreadlines():
18711403Sandreas.sandberg@arm.com        line = line
18811403Sandreas.sandberg@arm.com
18911403Sandreas.sandberg@arm.com    if inplace:
19011403Sandreas.sandberg@arm.com        # if we're updating in place and the file hasn't changed, do nothing
19111403Sandreas.sandberg@arm.com        if old_lines == new_lines:
19211403Sandreas.sandberg@arm.com            return
19311403Sandreas.sandberg@arm.com
19411403Sandreas.sandberg@arm.com        # otherwise, truncate the file and seek to the beginning.
19511403Sandreas.sandberg@arm.com        dst = src
19611403Sandreas.sandberg@arm.com        dst.truncate(0)
19711403Sandreas.sandberg@arm.com        dst.seek(0)
19811403Sandreas.sandberg@arm.com    elif isinstance(dst, str):
19911403Sandreas.sandberg@arm.com        # if we're not updating in place and a destination file name
20011403Sandreas.sandberg@arm.com        # was provided, create a file object
20111403Sandreas.sandberg@arm.com        dst = file(dst, 'w')
20211403Sandreas.sandberg@arm.com
20311403Sandreas.sandberg@arm.com    for line in new_lines:
20411403Sandreas.sandberg@arm.com        dst.write(line)
20511403Sandreas.sandberg@arm.com        dst.write('\n')
206