style.py revision 6868:c7eb55c68529
1#! /usr/bin/env python
2# Copyright (c) 2006 The Regents of The University of Michigan
3# Copyright (c) 2007 The Hewlett-Packard Development Company
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met: redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer;
10# redistributions in binary form must reproduce the above copyright
11# notice, this list of conditions and the following disclaimer in the
12# documentation and/or other materials provided with the distribution;
13# neither the name of the copyright holders nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28#
29# Authors: Nathan Binkert
30
31import re
32import os
33import sys
34
35lead = re.compile(r'^([ \t]+)')
36trail = re.compile(r'([ \t]+)$')
37any_control = re.compile(r'\b(if|while|for)[ \t]*[(]')
38good_control = re.compile(r'\b(if|while|for) [(]')
39
40lang_types = { 'c'   : "C",
41               'h'   : "C",
42               'cc'  : "C++",
43               'hh'  : "C++",
44               'cxx' : "C++",
45               'hxx' : "C++",
46               'cpp' : "C++",
47               'hpp' : "C++",
48               'C'   : "C++",
49               'H'   : "C++",
50               'i'   : "swig",
51               'py'  : "python",
52               's'   : "asm",
53               'S'   : "asm",
54               'isa' : "isa" }
55def file_type(filename):
56    extension = filename.split('.')
57    extension = len(extension) > 1 and extension[-1]
58    return lang_types.get(extension, None)
59
60whitespace_types = ('C', 'C++', 'swig', 'python', 'asm', 'isa')
61def whitespace_file(filename):
62    if file_type(filename) in whitespace_types:
63        return True
64
65    if filename.startswith("SCons"):
66        return True
67
68    return False
69
70format_types = ( 'C', 'C++' )
71def format_file(filename):
72    if file_type(filename) in format_types:
73        return True
74
75    return True
76
77def checkwhite_line(line):
78    match = lead.search(line)
79    if match and match.group(1).find('\t') != -1:
80        return False
81
82    match = trail.search(line)
83    if match:
84        return False
85
86    return True
87
88def checkwhite(filename):
89    if not whitespace_file(filename):
90        return
91
92    try:
93        f = file(filename, 'r+')
94    except OSError, msg:
95        print 'could not open file %s: %s' % (filename, msg)
96        return
97
98    for num,line in enumerate(f):
99        if not checkwhite_line(line):
100            yield line,num + 1
101
102def fixwhite_line(line, tabsize):
103    if lead.search(line):
104        newline = ''
105        for i,c in enumerate(line):
106            if c == ' ':
107                newline += ' '
108            elif c == '\t':
109                newline += ' ' * (tabsize - len(newline) % tabsize)
110            else:
111                newline += line[i:]
112                break
113
114        line = newline
115
116    return line.rstrip() + '\n'
117
118def fixwhite(filename, tabsize, fixonly=None):
119    if not whitespace_file(filename):
120        return
121
122    try:
123        f = file(filename, 'r+')
124    except OSError, msg:
125        print 'could not open file %s: %s' % (filename, msg)
126        return
127
128    lines = list(f)
129
130    f.seek(0)
131    f.truncate()
132
133    for i,line in enumerate(lines):
134        if fixonly is None or i in fixonly:
135            line = fixwhite_line(line, tabsize)
136
137        print >>f, line,
138
139def linelen(line):
140    tabs = line.count('\t')
141    if not tabs:
142        return len(line)
143
144    count = 0
145    for c in line:
146        if c == '\t':
147            count += tabsize - count % tabsize
148        else:
149            count += 1
150
151    return count
152
153class ValidationStats(object):
154    def __init__(self):
155        self.toolong = 0
156        self.toolong80 = 0
157        self.leadtabs = 0
158        self.trailwhite = 0
159        self.badcontrol = 0
160        self.cret = 0
161
162    def dump(self):
163        print '''\
164%d violations of lines over 79 chars. %d of which are 80 chars exactly.
165%d cases of whitespace at the end of a line.
166%d cases of tabs to indent.
167%d bad parens after if/while/for.
168%d carriage returns found.
169''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
170       self.badcontrol, self.cret)
171
172    def __nonzero__(self):
173        return self.toolong or self.toolong80 or self.leadtabs or \
174               self.trailwhite or self.badcontrol or self.cret
175
176def validate(filename, stats, verbose, exit_code):
177    if not format_file(filename):
178        return
179
180    def msg(lineno, line, message):
181        print '%s:%d>' % (filename, lineno + 1), message
182        if verbose > 2:
183            print line
184
185    def bad():
186        if exit_code is not None:
187            sys.exit(exit_code)
188
189    cpp = filename.endswith('.cc') or filename.endswith('.hh')
190    py = filename.endswith('.py')
191
192    if py + cpp != 1:
193        raise AttributeError, \
194              "I don't know how to deal with the file %s" % filename
195
196    try:
197        f = file(filename, 'r')
198    except OSError:
199        if verbose > 0:
200            print 'could not open file %s' % filename
201        bad()
202        return
203
204    for i,line in enumerate(f):
205        line = line.rstrip('\n')
206
207        # no carriage returns
208        if line.find('\r') != -1:
209            self.cret += 1
210            if verbose > 1:
211                msg(i, line, 'carriage return found')
212            bad()
213
214        # lines max out at 79 chars
215        llen = linelen(line)
216        if llen > 79:
217            stats.toolong += 1
218            if llen == 80:
219                stats.toolong80 += 1
220            if verbose > 1:
221                msg(i, line, 'line too long (%d chars)' % llen)
222            bad()
223
224        # no tabs used to indent
225        match = lead.search(line)
226        if match and match.group(1).find('\t') != -1:
227            stats.leadtabs += 1
228            if verbose > 1:
229                msg(i, line, 'using tabs to indent')
230            bad()
231
232        # no trailing whitespace
233        if trail.search(line):
234            stats.trailwhite +=1
235            if verbose > 1:
236                msg(i, line, 'trailing whitespace')
237            bad()
238
239        # for c++, exactly one space betwen if/while/for and (
240        if cpp:
241            match = any_control.search(line)
242            if match and not good_control.search(line):
243                stats.badcontrol += 1
244                if verbose > 1:
245                    msg(i, line, 'improper spacing after %s' % match.group(1))
246                bad()
247
248def modified_lines(old_data, new_data, max_lines):
249    from itertools import count
250    from mercurial import bdiff, mdiff
251
252    modified = set()
253    counter = count()
254    for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
255        for i in counter:
256            if i < fbeg:
257                modified.add(i)
258            elif i + 1 >= fend:
259                break
260            elif i > max_lines:
261                break
262    return modified
263
264def do_check_whitespace(ui, repo, *files, **args):
265    """check files for proper m5 style guidelines"""
266    from mercurial import mdiff, util
267
268    if files:
269        files = frozenset(files)
270
271    def skip(name):
272        return files and name in files
273
274    def prompt(name, fixonly=None):
275        if args.get('auto', False):
276            result = 'f'
277        else:
278            while True:
279                result = ui.prompt("(a)bort, (i)gnore, or (f)ix?", default='a')
280                if result in 'aif':
281                    break
282
283        if result == 'a':
284            return True
285        elif result == 'f':
286            fixwhite(repo.wjoin(name), args['tabsize'], fixonly)
287
288        return False
289
290    modified, added, removed, deleted, unknown, ignore, clean = repo.status()
291
292    for fname in added:
293        if skip(fname):
294            continue
295
296        ok = True
297        for line,num in checkwhite(repo.wjoin(fname)):
298            ui.write("invalid whitespace in %s:%d\n" % (fname, num))
299            if ui.verbose:
300                ui.write(">>%s<<\n" % line[-1])
301            ok = False
302
303        if not ok:
304            if prompt(fname):
305                return True
306
307    try:
308        wctx = repo.workingctx()
309    except:
310        from mercurial import context
311        wctx = context.workingctx(repo)
312
313    for fname in modified:
314        if skip(fname):
315            continue
316
317        if not whitespace_file(fname):
318            continue
319
320        fctx = wctx.filectx(fname)
321        pctx = fctx.parents()
322
323        file_data = fctx.data()
324        lines = mdiff.splitnewlines(file_data)
325        if len(pctx) in (1, 2):
326            mod_lines = modified_lines(pctx[0].data(), file_data, len(lines))
327            if len(pctx) == 2:
328                m2 = modified_lines(pctx[1].data(), file_data, len(lines))
329                # only the lines that are new in both
330                mod_lines = mod_lines & m2
331        else:
332            mod_lines = xrange(0, len(lines))
333
334        fixonly = set()
335        for i,line in enumerate(lines):
336            if i not in mod_lines:
337                continue
338
339            if checkwhite_line(line):
340                continue
341
342            ui.write("invalid whitespace: %s:%d\n" % (fname, i+1))
343            if ui.verbose:
344                ui.write(">>%s<<\n" % line[:-1])
345            fixonly.add(i)
346
347        if fixonly:
348            if prompt(fname, fixonly):
349                return True
350
351def check_whitespace(ui, repo, hooktype, node, parent1, parent2, **kwargs):
352    if hooktype != 'pretxncommit':
353        raise AttributeError, \
354              "This hook is only meant for pretxncommit, not %s" % hooktype
355
356    args = { 'tabsize' : 8 }
357    return do_check_whitespace(ui, repo, **args)
358
359def check_format(ui, repo, hooktype, node, parent1, parent2, **kwargs):
360    if hooktype != 'pretxncommit':
361        raise AttributeError, \
362              "This hook is only meant for pretxncommit, not %s" % hooktype
363
364    modified, added, removed, deleted, unknown, ignore, clean = repo.status()
365
366    verbose = 0
367    stats = ValidationStats()
368    for f in modified + added:
369        validate(f, stats, verbose, None)
370
371    if stats:
372        stats.dump()
373        result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
374                           "^[ia]$", "a")
375        if result.startswith('i'):
376            pass
377        elif result.startswith('a'):
378            return True
379        else:
380            raise util.Abort(_("Invalid response: '%s'") % result)
381
382    return False
383
384try:
385    from mercurial.i18n import _
386except ImportError:
387    def _(arg):
388        return arg
389
390cmdtable = {
391    '^m5style' :
392    ( do_check_whitespace,
393      [ ('a', 'auto', False, _("automatically fix whitespace")),
394        ('t', 'tabsize', 8, _("Number of spaces TAB indents")) ],
395      _('hg m5check [-t <tabsize>] [FILE]...')),
396}
397if __name__ == '__main__':
398    import getopt
399
400    progname = sys.argv[0]
401    if len(sys.argv) < 2:
402        sys.exit('usage: %s <command> [<command args>]' % progname)
403
404    fixwhite_usage = '%s fixwhite [-t <tabsize> ] <path> [...] \n' % progname
405    chkformat_usage = '%s chkformat <path> [...] \n' % progname
406    chkwhite_usage = '%s chkwhite <path> [...] \n' % progname
407
408    command = sys.argv[1]
409    if command == 'fixwhite':
410        flags = 't:'
411        usage = fixwhite_usage
412    elif command == 'chkwhite':
413        flags = 'nv'
414        usage = chkwhite_usage
415    elif command == 'chkformat':
416        flags = 'nv'
417        usage = chkformat_usage
418    else:
419        sys.exit(fixwhite_usage + chkwhite_usage + chkformat_usage)
420
421    opts, args = getopt.getopt(sys.argv[2:], flags)
422
423    code = 1
424    verbose = 1
425    tabsize = 8
426    for opt,arg in opts:
427        if opt == '-n':
428            code = None
429        if opt == '-t':
430            tabsize = int(arg)
431        if opt == '-v':
432            verbose += 1
433
434    if command == 'fixwhite':
435        for filename in args:
436            fixwhite(filename, tabsize)
437    elif command == 'chkwhite':
438        for filename in args:
439            for line,num in checkwhite(filename):
440                print 'invalid whitespace: %s:%d' % (filename, num)
441                if verbose:
442                    print '>>%s<<' % line[:-1]
443    elif command == 'chkformat':
444        stats = ValidationStats()
445        for filename in args:
446            validate(filename, stats=stats, verbose=verbose, exit_code=code)
447
448        if verbose > 0:
449            stats.dump()
450    else:
451        sys.exit("command '%s' not found" % command)
452