file_types.py revision 11541
113521Sgabeblack@google.com# Copyright (c) 2010 The Hewlett-Packard Development Company
213521Sgabeblack@google.com# All rights reserved.
313521Sgabeblack@google.com#
413521Sgabeblack@google.com# Redistribution and use in source and binary forms, with or without
513521Sgabeblack@google.com# modification, are permitted provided that the following conditions are
613521Sgabeblack@google.com# met: redistributions of source code must retain the above copyright
713521Sgabeblack@google.com# notice, this list of conditions and the following disclaimer;
813521Sgabeblack@google.com# redistributions in binary form must reproduce the above copyright
913521Sgabeblack@google.com# notice, this list of conditions and the following disclaimer in the
1013521Sgabeblack@google.com# documentation and/or other materials provided with the distribution;
1113521Sgabeblack@google.com# neither the name of the copyright holders nor the names of its
1213521Sgabeblack@google.com# contributors may be used to endorse or promote products derived from
1313521Sgabeblack@google.com# this software without specific prior written permission.
1413521Sgabeblack@google.com#
1513521Sgabeblack@google.com# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1613521Sgabeblack@google.com# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1713521Sgabeblack@google.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1813521Sgabeblack@google.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1913521Sgabeblack@google.com# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2013521Sgabeblack@google.com# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2113521Sgabeblack@google.com# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2213521Sgabeblack@google.com# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2313586Sgabeblack@google.com# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2413521Sgabeblack@google.com# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2513521Sgabeblack@google.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2613521Sgabeblack@google.com#
2713521Sgabeblack@google.com# Authors: Nathan Binkert
2813521Sgabeblack@google.com
2913521Sgabeblack@google.comimport os
3013521Sgabeblack@google.com
3113521Sgabeblack@google.com# lanuage type for each file extension
3213521Sgabeblack@google.comlang_types = {
3313521Sgabeblack@google.com    '.c'     : "C",
3413521Sgabeblack@google.com    '.cl'    : "C",
3513521Sgabeblack@google.com    '.h'     : "C",
3613521Sgabeblack@google.com    '.cc'    : "C++",
3713521Sgabeblack@google.com    '.hh'    : "C++",
3813521Sgabeblack@google.com    '.cxx'   : "C++",
3913521Sgabeblack@google.com    '.hxx'   : "C++",
4013521Sgabeblack@google.com    '.cpp'   : "C++",
4113521Sgabeblack@google.com    '.hpp'   : "C++",
4213521Sgabeblack@google.com    '.C'     : "C++",
4313521Sgabeblack@google.com    '.H'     : "C++",
4413521Sgabeblack@google.com    '.i'     : "swig",
4513521Sgabeblack@google.com    '.py'    : "python",
4613521Sgabeblack@google.com    '.pl'    : "perl",
4713521Sgabeblack@google.com    '.pm'    : "perl",
4813521Sgabeblack@google.com    '.s'     : "asm",
4913521Sgabeblack@google.com    '.S'     : "asm",
5013521Sgabeblack@google.com    '.l'     : "lex",
5113521Sgabeblack@google.com    '.ll'    : "lex",
5213521Sgabeblack@google.com    '.y'     : "yacc",
5313521Sgabeblack@google.com    '.yy'    : "yacc",
5413521Sgabeblack@google.com    '.isa'   : "isa",
5513521Sgabeblack@google.com    '.sh'    : "shell",
5613521Sgabeblack@google.com    '.slicc' : "slicc",
5713521Sgabeblack@google.com    '.sm'    : "slicc",
5813521Sgabeblack@google.com    '.awk'   : "awk",
5913521Sgabeblack@google.com    '.el'    : "lisp",
6013521Sgabeblack@google.com    '.txt'   : "text",
6113521Sgabeblack@google.com    '.tex'   : "tex",
6213521Sgabeblack@google.com    '.mk'    : "make",
6313521Sgabeblack@google.com    '.dts'    : "dts",
6413521Sgabeblack@google.com    }
6513521Sgabeblack@google.com
6613521Sgabeblack@google.com# languages based on file prefix
6713521Sgabeblack@google.comlang_prefixes = (
6813521Sgabeblack@google.com    ('SCons',    'scons'),
6913521Sgabeblack@google.com    ('Make',     'make'),
7013521Sgabeblack@google.com    ('make',     'make'),
7113521Sgabeblack@google.com    ('Doxyfile', 'doxygen'),
7213521Sgabeblack@google.com    )
7313521Sgabeblack@google.com
7413521Sgabeblack@google.com# languages based on #! line of first file
7513521Sgabeblack@google.comhash_bang = (
7613521Sgabeblack@google.com    ('python', 'python'),
7713521Sgabeblack@google.com    ('perl',   'perl'),
7813521Sgabeblack@google.com    ('sh',     'shell'),
7913521Sgabeblack@google.com    )
8013521Sgabeblack@google.com
8113521Sgabeblack@google.com# the list of all languages that we detect
8213521Sgabeblack@google.comall_languages = frozenset(lang_types.itervalues())
8313521Sgabeblack@google.comall_languages |= frozenset(lang for start,lang in lang_prefixes)
8413521Sgabeblack@google.comall_languages |= frozenset(lang for start,lang in hash_bang)
8513521Sgabeblack@google.com
8613521Sgabeblack@google.comdef lang_type(filename, firstline=None, openok=True):
8713521Sgabeblack@google.com    '''identify the language of a given filename and potentially the
8813521Sgabeblack@google.com    firstline of the file.  If the firstline of the file is not
8913521Sgabeblack@google.com    provided and openok is True, open the file and read the first line
9013521Sgabeblack@google.com    if necessary'''
9113521Sgabeblack@google.com
9213521Sgabeblack@google.com    basename = os.path.basename(filename)
9313521Sgabeblack@google.com    name,extension = os.path.splitext(basename)
9413521Sgabeblack@google.com
9513521Sgabeblack@google.com    # first try to detect language based on file extension
9613521Sgabeblack@google.com    try:
9713521Sgabeblack@google.com        return lang_types[extension]
9813521Sgabeblack@google.com    except KeyError:
9913521Sgabeblack@google.com        pass
10013521Sgabeblack@google.com
101    # now try to detect language based on file prefix
102    for start,lang in lang_prefixes:
103        if basename.startswith(start):
104            return lang
105
106    # if a first line was not provided but the file is ok to open,
107    # grab the first line of the file.
108    if firstline is None and openok:
109        handle = file(filename, 'r')
110        firstline = handle.readline()
111        handle.close()
112
113    # try to detect language based on #! in first line
114    if firstline and firstline.startswith('#!'):
115        for string,lang in hash_bang:
116            if firstline.find(string) > 0:
117                return lang
118
119    # sorry, we couldn't detect the language
120    return None
121
122# directories and files to ignore by default
123default_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext'))
124default_file_ignore = frozenset(('parsetab.py', ))
125
126def find_files(base, languages=all_languages,
127               dir_ignore=default_dir_ignore,
128               file_ignore=default_file_ignore):
129    '''find all files in a directory and its subdirectories based on a
130    set of languages, ignore directories specified in dir_ignore and
131    files specified in file_ignore'''
132    if base[-1] != '/':
133        base += '/'
134
135    def update_dirs(dirs):
136        '''strip the ignored directories out of the provided list'''
137        index = len(dirs) - 1
138        for i,d in enumerate(reversed(dirs)):
139            if d in dir_ignore:
140                del dirs[index - i]
141
142    # walk over base
143    for root,dirs,files in os.walk(base):
144        root = root.replace(base, '', 1)
145
146        # strip ignored directories from the list
147        update_dirs(dirs)
148
149        for filename in files:
150            if filename in file_ignore:
151                # skip ignored files
152                continue
153
154            # try to figure out the language of the specified file
155            fullpath = os.path.join(base, root, filename)
156            language = lang_type(fullpath)
157
158            # if the file is one of the langauges that we want return
159            # its name and the language
160            if language in languages:
161                yield fullpath, language
162
163def update_file(dst, src, language, mutator):
164    '''update a file of the specified language with the provided
165    mutator generator.  If inplace is provided, update the file in
166    place and return the handle to the updated file.  If inplace is
167    false, write the updated file to cStringIO'''
168
169    # if the source and destination are the same, we're updating in place
170    inplace = dst == src
171
172    if isinstance(src, str):
173        # if a filename was provided, open the file
174        if inplace:
175            mode = 'r+'
176        else:
177            mode = 'r'
178        src = file(src, mode)
179
180    orig_lines = []
181
182    # grab all of the lines of the file and strip them of their line ending
183    old_lines = list(line.rstrip('\r\n') for line in src.xreadlines())
184    new_lines = list(mutator(old_lines, src.name, language))
185
186    for line in src.xreadlines():
187        line = line
188
189    if inplace:
190        # if we're updating in place and the file hasn't changed, do nothing
191        if old_lines == new_lines:
192            return
193
194        # otherwise, truncate the file and seek to the beginning.
195        dst = src
196        dst.truncate(0)
197        dst.seek(0)
198    elif isinstance(dst, str):
199        # if we're not updating in place and a destination file name
200        # was provided, create a file object
201        dst = file(dst, 'w')
202
203    for line in new_lines:
204        dst.write(line)
205        dst.write('\n')
206