file_types.py revision 11541:3d518944f0cc
1# Copyright (c) 2010 The Hewlett-Packard Development Company
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26#
27# Authors: Nathan Binkert
28
29import os
30
31# lanuage type for each file extension
32lang_types = {
33    '.c'     : "C",
34    '.cl'    : "C",
35    '.h'     : "C",
36    '.cc'    : "C++",
37    '.hh'    : "C++",
38    '.cxx'   : "C++",
39    '.hxx'   : "C++",
40    '.cpp'   : "C++",
41    '.hpp'   : "C++",
42    '.C'     : "C++",
43    '.H'     : "C++",
44    '.i'     : "swig",
45    '.py'    : "python",
46    '.pl'    : "perl",
47    '.pm'    : "perl",
48    '.s'     : "asm",
49    '.S'     : "asm",
50    '.l'     : "lex",
51    '.ll'    : "lex",
52    '.y'     : "yacc",
53    '.yy'    : "yacc",
54    '.isa'   : "isa",
55    '.sh'    : "shell",
56    '.slicc' : "slicc",
57    '.sm'    : "slicc",
58    '.awk'   : "awk",
59    '.el'    : "lisp",
60    '.txt'   : "text",
61    '.tex'   : "tex",
62    '.mk'    : "make",
63    '.dts'    : "dts",
64    }
65
66# languages based on file prefix
67lang_prefixes = (
68    ('SCons',    'scons'),
69    ('Make',     'make'),
70    ('make',     'make'),
71    ('Doxyfile', 'doxygen'),
72    )
73
74# languages based on #! line of first file
75hash_bang = (
76    ('python', 'python'),
77    ('perl',   'perl'),
78    ('sh',     'shell'),
79    )
80
81# the list of all languages that we detect
82all_languages = frozenset(lang_types.itervalues())
83all_languages |= frozenset(lang for start,lang in lang_prefixes)
84all_languages |= frozenset(lang for start,lang in hash_bang)
85
86def lang_type(filename, firstline=None, openok=True):
87    '''identify the language of a given filename and potentially the
88    firstline of the file.  If the firstline of the file is not
89    provided and openok is True, open the file and read the first line
90    if necessary'''
91
92    basename = os.path.basename(filename)
93    name,extension = os.path.splitext(basename)
94
95    # first try to detect language based on file extension
96    try:
97        return lang_types[extension]
98    except KeyError:
99        pass
100
101    # now try to detect language based on file prefix
102    for start,lang in lang_prefixes:
103        if basename.startswith(start):
104            return lang
105
106    # if a first line was not provided but the file is ok to open,
107    # grab the first line of the file.
108    if firstline is None and openok:
109        handle = file(filename, 'r')
110        firstline = handle.readline()
111        handle.close()
112
113    # try to detect language based on #! in first line
114    if firstline and firstline.startswith('#!'):
115        for string,lang in hash_bang:
116            if firstline.find(string) > 0:
117                return lang
118
119    # sorry, we couldn't detect the language
120    return None
121
122# directories and files to ignore by default
123default_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext'))
124default_file_ignore = frozenset(('parsetab.py', ))
125
126def find_files(base, languages=all_languages,
127               dir_ignore=default_dir_ignore,
128               file_ignore=default_file_ignore):
129    '''find all files in a directory and its subdirectories based on a
130    set of languages, ignore directories specified in dir_ignore and
131    files specified in file_ignore'''
132    if base[-1] != '/':
133        base += '/'
134
135    def update_dirs(dirs):
136        '''strip the ignored directories out of the provided list'''
137        index = len(dirs) - 1
138        for i,d in enumerate(reversed(dirs)):
139            if d in dir_ignore:
140                del dirs[index - i]
141
142    # walk over base
143    for root,dirs,files in os.walk(base):
144        root = root.replace(base, '', 1)
145
146        # strip ignored directories from the list
147        update_dirs(dirs)
148
149        for filename in files:
150            if filename in file_ignore:
151                # skip ignored files
152                continue
153
154            # try to figure out the language of the specified file
155            fullpath = os.path.join(base, root, filename)
156            language = lang_type(fullpath)
157
158            # if the file is one of the langauges that we want return
159            # its name and the language
160            if language in languages:
161                yield fullpath, language
162
163def update_file(dst, src, language, mutator):
164    '''update a file of the specified language with the provided
165    mutator generator.  If inplace is provided, update the file in
166    place and return the handle to the updated file.  If inplace is
167    false, write the updated file to cStringIO'''
168
169    # if the source and destination are the same, we're updating in place
170    inplace = dst == src
171
172    if isinstance(src, str):
173        # if a filename was provided, open the file
174        if inplace:
175            mode = 'r+'
176        else:
177            mode = 'r'
178        src = file(src, mode)
179
180    orig_lines = []
181
182    # grab all of the lines of the file and strip them of their line ending
183    old_lines = list(line.rstrip('\r\n') for line in src.xreadlines())
184    new_lines = list(mutator(old_lines, src.name, language))
185
186    for line in src.xreadlines():
187        line = line
188
189    if inplace:
190        # if we're updating in place and the file hasn't changed, do nothing
191        if old_lines == new_lines:
192            return
193
194        # otherwise, truncate the file and seek to the beginning.
195        dst = src
196        dst.truncate(0)
197        dst.seek(0)
198    elif isinstance(dst, str):
199        # if we're not updating in place and a destination file name
200        # was provided, create a file object
201        dst = file(dst, 'w')
202
203    for line in new_lines:
204        dst.write(line)
205        dst.write('\n')
206