style.py revision 4984:1cbcac3f8144
1#! /usr/bin/env python
2# Copyright (c) 2007 The Regents of The University of Michigan
3# All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met: redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer;
9# redistributions in binary form must reproduce the above copyright
10# notice, this list of conditions and the following disclaimer in the
11# documentation and/or other materials provided with the distribution;
12# neither the name of the copyright holders nor the names of its
13# contributors may be used to endorse or promote products derived from
14# this software without specific prior written permission.
15#
16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27#
28# Authors: Nathan Binkert
29
30import re
31import os
32import sys
33
34lead = re.compile(r'^([ \t]+)')
35trail = re.compile(r'([ \t]+)$')
36any_control = re.compile(r'\b(if|while|for)[ \t]*[(]')
37good_control = re.compile(r'\b(if|while|for) [(]')
38
39lang_types = { 'c'   : "C",
40               'h'   : "C",
41               'cc'  : "C++",
42               'hh'  : "C++",
43               'cxx' : "C++",
44               'hxx' : "C++",
45               'cpp' : "C++",
46               'hpp' : "C++",
47               'C'   : "C++",
48               'H'   : "C++",
49               'i'   : "swig",
50               'py'  : "python",
51               's'   : "asm",
52               'S'   : "asm",
53               'isa' : "isa" }
54def file_type(filename):
55    extension = filename.split('.')
56    extension = len(extension) > 1 and extension[-1]
57    return lang_types.get(extension, None)
58
59whitespace_types = ('C', 'C++', 'swig', 'python', 'asm', 'isa')
60def whitespace_file(filename):
61    if file_type(filename) in whitespace_types:
62        return True
63
64    if filename.startswith("SCons"):
65        return True
66
67    return False
68
69format_types = ( 'C', 'C++' )
70def format_file(filename):
71    if file_type(filename) in format_types:
72        return True
73
74    return False
75
76def checkwhite_line(line):
77    match = lead.search(line)
78    if match and match.group(1).find('\t') != -1:
79        return False
80
81    match = trail.search(line)
82    if match:
83        return False
84
85    return True
86
87def checkwhite(filename):
88    if not whitespace_file(filename):
89        return
90
91    try:
92        f = file(filename, 'r+')
93    except OSError, msg:
94        print 'could not open file %s: %s' % (filename, msg)
95        return
96
97    for num,line in enumerate(f):
98        if not checkwhite_line(line):
99            yield line,num + 1
100
101def fixwhite_line(line, tabsize):
102    if lead.search(line):
103        newline = ''
104        for i,c in enumerate(line):
105            if c == ' ':
106                newline += ' '
107            elif c == '\t':
108                newline += ' ' * (tabsize - len(newline) % tabsize)
109            else:
110                newline += line[i:]
111                break
112
113        line = newline
114
115    return line.rstrip() + '\n'
116
117def fixwhite(filename, tabsize, fixonly=None):
118    if not whitespace_file(filename):
119        return
120
121    try:
122        f = file(filename, 'r+')
123    except OSError, msg:
124        print 'could not open file %s: %s' % (filename, msg)
125        return
126
127    lines = list(f)
128
129    f.seek(0)
130    f.truncate()
131
132    for i,line in enumerate(lines):
133        if fixonly is None or i in fixonly:
134            line = fixwhite_line(line, tabsize)
135
136        print >>f, line,
137
138def linelen(line):
139    tabs = line.count('\t')
140    if not tabs:
141        return len(line)
142
143    count = 0
144    for c in line:
145        if c == '\t':
146            count += tabsize - count % tabsize
147        else:
148            count += 1
149
150    return count
151
152class ValidationStats(object):
153    def __init__(self):
154        self.toolong = 0
155        self.toolong80 = 0
156        self.leadtabs = 0
157        self.trailwhite = 0
158        self.badcontrol = 0
159        self.cret = 0
160
161    def dump(self):
162        print '''\
163%d violations of lines over 79 chars. %d of which are 80 chars exactly.
164%d cases of whitespace at the end of a line.
165%d cases of tabs to indent.
166%d bad parens after if/while/for.
167%d carriage returns found.
168''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
169       self.badcontrol, self.cret)
170
171    def __nonzero__(self):
172        return self.toolong or self.toolong80 or self.leadtabs or \
173               self.trailwhite or self.badcontrol or self.cret
174
175def validate(filename, stats, verbose, exit_code):
176    if not format_file(filename):
177        return
178
179    def msg(lineno, line, message):
180        print '%s:%d>' % (filename, lineno + 1), message
181        if verbose > 2:
182            print line
183
184    def bad():
185        if exit_code is not None:
186            sys.exit(exit_code)
187
188    cpp = filename.endswith('.cc') or filename.endswith('.hh')
189    py = filename.endswith('.py')
190
191    if py + cpp != 1:
192        raise AttributeError, \
193              "I don't know how to deal with the file %s" % filename
194
195    try:
196        f = file(filename, 'r')
197    except OSError:
198        if verbose > 0:
199            print 'could not open file %s' % filename
200        bad()
201        return
202
203    for i,line in enumerate(f):
204        line = line.rstrip('\n')
205
206        # no carriage returns
207        if line.find('\r') != -1:
208            self.cret += 1
209            if verbose > 1:
210                msg(i, line, 'carriage return found')
211            bad()
212
213        # lines max out at 79 chars
214        llen = linelen(line)
215        if llen > 79:
216            stats.toolong += 1
217            if llen == 80:
218                stats.toolong80 += 1
219            if verbose > 1:
220                msg(i, line, 'line too long (%d chars)' % llen)
221            bad()
222
223        # no tabs used to indent
224        match = lead.search(line)
225        if match and match.group(1).find('\t') != -1:
226            stats.leadtabs += 1
227            if verbose > 1:
228                msg(i, line, 'using tabs to indent')
229            bad()
230
231        # no trailing whitespace
232        if trail.search(line):
233            stats.trailwhite +=1
234            if verbose > 1:
235                msg(i, line, 'trailing whitespace')
236            bad()
237
238        # for c++, exactly one space betwen if/while/for and (
239        if cpp:
240            match = any_control.search(line)
241            if match and not good_control.search(line):
242                stats.badcontrol += 1
243                if verbose > 1:
244                    msg(i, line, 'improper spacing after %s' % match.group(1))
245                bad()
246
247def modified_lines(old_data, new_data, max_lines):
248    from itertools import count
249    from mercurial import bdiff, mdiff
250
251    modified = set()
252    counter = count()
253    for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
254        for i in counter:
255            if i < fbeg:
256                modified.add(i)
257            elif i + 1 >= fend:
258                break
259            elif i > max_lines:
260                break
261    return modified
262
263def check_whitespace(ui, repo, hooktype, node, parent1, parent2):
264    from mercurial import mdiff
265
266    if hooktype != 'pretxncommit':
267        raise AttributeError, \
268              "This hook is only meant for pretxncommit, not %s" % hooktype
269
270    tabsize = 8
271    verbose = ui.configbool('style', 'verbose', False)
272    def prompt(name, fixonly=None):
273        result = ui.prompt("(a)bort, (i)gnore, or (f)ix?", "^[aif]$", "a")
274        if result == 'a':
275            return True
276        elif result == 'i':
277            pass
278        elif result == 'f':
279            fixwhite(repo.wjoin(name), tabsize, fixonly)
280        else:
281            raise RepoError, "Invalid response: '%s'" % result
282
283        return False
284
285    modified, added, removed, deleted, unknown, ignore, clean = repo.status()
286
287    for fname in added:
288        ok = True
289        for line,num in checkwhite(repo.wjoin(fname)):
290            ui.write("invalid whitespace in %s:%d\n" % (fname, num))
291            if verbose:
292                ui.write(">>%s<<\n" % line[-1])
293            ok = False
294
295        if not ok:
296            if prompt(fname):
297                return True
298
299    wctx = repo.workingctx()
300    for fname in modified:
301        if not whitespace_file(fname):
302            continue
303
304        fctx = wctx.filectx(fname)
305        pctx = fctx.parents()
306        assert len(pctx) in (1, 2)
307
308        file_data = fctx.data()
309        lines = mdiff.splitnewlines(file_data)
310        mod_lines = modified_lines(pctx[0].data(), file_data, len(lines))
311        if len(pctx) == 2:
312            m2 = modified_lines(pctx[1].data(), file_data, len(lines))
313            mod_lines = mod_lines & m2 # only the lines that are new in both
314
315        fixonly = set()
316        for i,line in enumerate(lines):
317            if i not in mod_lines:
318                continue
319
320            if checkwhite_line(line):
321                continue
322
323            ui.write("invalid whitespace: %s:%d\n" % (fname, i+1))
324            if verbose:
325                ui.write(">>%s<<\n" % line[:-1])
326            fixonly.add(i)
327
328        if fixonly:
329            if prompt(fname, fixonly):
330                return True
331
332def check_format(ui, repo, hooktype, node, parent1, parent2):
333    if hooktype != 'pretxncommit':
334        raise AttributeError, \
335              "This hook is only meant for pretxncommit, not %s" % hooktype
336
337    modified, added, removed, deleted, unknown, ignore, clean = repo.status()
338
339    verbose = 0
340    stats = ValidationStats()
341    for f in modified + added:
342        validate(f, stats, verbose, None)
343
344    if stats:
345        stats.dump()
346        result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
347                           "^[ia]$", "a")
348        if result.startswith('i'):
349            pass
350        elif result.startswith('a'):
351            return True
352        else:
353            raise RepoError, "Invalid response: '%s'" % result
354
355    return False
356
357if __name__ == '__main__':
358    import getopt
359
360    progname = sys.argv[0]
361    if len(sys.argv) < 2:
362        sys.exit('usage: %s <command> [<command args>]' % progname)
363
364    fixwhite_usage = '%s fixwhite [-t <tabsize> ] <path> [...] \n' % progname
365    chkformat_usage = '%s chkformat <path> [...] \n' % progname
366    chkwhite_usage = '%s chkwhite <path> [...] \n' % progname
367
368    command = sys.argv[1]
369    if command == 'fixwhite':
370        flags = 't:'
371        usage = fixwhite_usage
372    elif command == 'chkwhite':
373        flags = 'nv'
374        usage = chkwhite_usage
375    elif command == 'chkformat':
376        flags = 'nv'
377        usage = chkformat_usage
378    else:
379        sys.exit(fixwhite_usage + chkwhite_usage + chkformat_usage)
380
381    opts, args = getopt.getopt(sys.argv[2:], flags)
382
383    code = 1
384    verbose = 1
385    tabsize = 8
386    for opt,arg in opts:
387        if opt == '-n':
388            code = None
389        if opt == '-t':
390            tabsize = int(arg)
391        if opt == '-v':
392            verbose += 1
393
394    if command == 'fixwhite':
395        for filename in args:
396            fixwhite(filename, tabsize)
397    elif command == 'chkwhite':
398        for filename in args:
399            for line,num in checkwhite(filename):
400                print 'invalid whitespace: %s:%d' % (filename, num)
401                if verbose:
402                    print '>>%s<<' % line[:-1]
403    elif command == 'chkformat':
404        stats = ValidationStats()
405        for filename in files:
406            validate(filename, stats=stats, verbose=verbose, exit_code=code)
407
408        if verbose > 0:
409            stats.dump()
410    else:
411        sys.exit("command '%s' not found" % command)
412