file_types.py revision 11403:e8949ea6961f
1import os
2
3# lanuage type for each file extension
4lang_types = {
5    '.c'     : "C",
6    '.cl'    : "C",
7    '.h'     : "C",
8    '.cc'    : "C++",
9    '.hh'    : "C++",
10    '.cxx'   : "C++",
11    '.hxx'   : "C++",
12    '.cpp'   : "C++",
13    '.hpp'   : "C++",
14    '.C'     : "C++",
15    '.H'     : "C++",
16    '.i'     : "swig",
17    '.py'    : "python",
18    '.pl'    : "perl",
19    '.pm'    : "perl",
20    '.s'     : "asm",
21    '.S'     : "asm",
22    '.l'     : "lex",
23    '.ll'    : "lex",
24    '.y'     : "yacc",
25    '.yy'    : "yacc",
26    '.isa'   : "isa",
27    '.sh'    : "shell",
28    '.slicc' : "slicc",
29    '.sm'    : "slicc",
30    '.awk'   : "awk",
31    '.el'    : "lisp",
32    '.txt'   : "text",
33    '.tex'   : "tex",
34    '.mk'    : "make",
35    }
36
37# languages based on file prefix
38lang_prefixes = (
39    ('SCons',    'scons'),
40    ('Make',     'make'),
41    ('make',     'make'),
42    ('Doxyfile', 'doxygen'),
43    )
44
45# languages based on #! line of first file
46hash_bang = (
47    ('python', 'python'),
48    ('perl',   'perl'),
49    ('sh',     'shell'),
50    )
51
52# the list of all languages that we detect
53all_languages = frozenset(lang_types.itervalues())
54all_languages |= frozenset(lang for start,lang in lang_prefixes)
55all_languages |= frozenset(lang for start,lang in hash_bang)
56
57def lang_type(filename, firstline=None, openok=True):
58    '''identify the language of a given filename and potentially the
59    firstline of the file.  If the firstline of the file is not
60    provided and openok is True, open the file and read the first line
61    if necessary'''
62
63    basename = os.path.basename(filename)
64    name,extension = os.path.splitext(basename)
65
66    # first try to detect language based on file extension
67    try:
68        return lang_types[extension]
69    except KeyError:
70        pass
71
72    # now try to detect language based on file prefix
73    for start,lang in lang_prefixes:
74        if basename.startswith(start):
75            return lang
76
77    # if a first line was not provided but the file is ok to open,
78    # grab the first line of the file.
79    if firstline is None and openok:
80        handle = file(filename, 'r')
81        firstline = handle.readline()
82        handle.close()
83
84    # try to detect language based on #! in first line
85    if firstline and firstline.startswith('#!'):
86        for string,lang in hash_bang:
87            if firstline.find(string) > 0:
88                return lang
89
90    # sorry, we couldn't detect the language
91    return None
92
93# directories and files to ignore by default
94default_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext'))
95default_file_ignore = frozenset(('parsetab.py', ))
96
97def find_files(base, languages=all_languages,
98               dir_ignore=default_dir_ignore,
99               file_ignore=default_file_ignore):
100    '''find all files in a directory and its subdirectories based on a
101    set of languages, ignore directories specified in dir_ignore and
102    files specified in file_ignore'''
103    if base[-1] != '/':
104        base += '/'
105
106    def update_dirs(dirs):
107        '''strip the ignored directories out of the provided list'''
108        index = len(dirs) - 1
109        for i,d in enumerate(reversed(dirs)):
110            if d in dir_ignore:
111                del dirs[index - i]
112
113    # walk over base
114    for root,dirs,files in os.walk(base):
115        root = root.replace(base, '', 1)
116
117        # strip ignored directories from the list
118        update_dirs(dirs)
119
120        for filename in files:
121            if filename in file_ignore:
122                # skip ignored files
123                continue
124
125            # try to figure out the language of the specified file
126            fullpath = os.path.join(base, root, filename)
127            language = lang_type(fullpath)
128
129            # if the file is one of the langauges that we want return
130            # its name and the language
131            if language in languages:
132                yield fullpath, language
133
134def update_file(dst, src, language, mutator):
135    '''update a file of the specified language with the provided
136    mutator generator.  If inplace is provided, update the file in
137    place and return the handle to the updated file.  If inplace is
138    false, write the updated file to cStringIO'''
139
140    # if the source and destination are the same, we're updating in place
141    inplace = dst == src
142
143    if isinstance(src, str):
144        # if a filename was provided, open the file
145        if inplace:
146            mode = 'r+'
147        else:
148            mode = 'r'
149        src = file(src, mode)
150
151    orig_lines = []
152
153    # grab all of the lines of the file and strip them of their line ending
154    old_lines = list(line.rstrip('\r\n') for line in src.xreadlines())
155    new_lines = list(mutator(old_lines, src.name, language))
156
157    for line in src.xreadlines():
158        line = line
159
160    if inplace:
161        # if we're updating in place and the file hasn't changed, do nothing
162        if old_lines == new_lines:
163            return
164
165        # otherwise, truncate the file and seek to the beginning.
166        dst = src
167        dst.truncate(0)
168        dst.seek(0)
169    elif isinstance(dst, str):
170        # if we're not updating in place and a destination file name
171        # was provided, create a file object
172        dst = file(dst, 'w')
173
174    for line in new_lines:
175        dst.write(line)
176        dst.write('\n')
177