style.py revision 5465:4cff095bbf2b
1#! /usr/bin/env python
2# Copyright (c) 2006 The Regents of The University of Michigan
3# Copyright (c) 2007 The Hewlett-Packard Development Company
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met: redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer;
10# redistributions in binary form must reproduce the above copyright
11# notice, this list of conditions and the following disclaimer in the
12# documentation and/or other materials provided with the distribution;
13# neither the name of the copyright holders nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28#
29# Authors: Nathan Binkert
30
31import re
32import os
33import sys
34
35lead = re.compile(r'^([ \t]+)')
36trail = re.compile(r'([ \t]+)$')
37any_control = re.compile(r'\b(if|while|for)[ \t]*[(]')
38good_control = re.compile(r'\b(if|while|for) [(]')
39
40lang_types = { 'c'   : "C",
41               'h'   : "C",
42               'cc'  : "C++",
43               'hh'  : "C++",
44               'cxx' : "C++",
45               'hxx' : "C++",
46               'cpp' : "C++",
47               'hpp' : "C++",
48               'C'   : "C++",
49               'H'   : "C++",
50               'i'   : "swig",
51               'py'  : "python",
52               's'   : "asm",
53               'S'   : "asm",
54               'isa' : "isa" }
55def file_type(filename):
56    extension = filename.split('.')
57    extension = len(extension) > 1 and extension[-1]
58    return lang_types.get(extension, None)
59
60whitespace_types = ('C', 'C++', 'swig', 'python', 'asm', 'isa')
61def whitespace_file(filename):
62    if file_type(filename) in whitespace_types:
63        return True
64
65    if filename.startswith("SCons"):
66        return True
67
68    return False
69
70format_types = ( 'C', 'C++' )
71def format_file(filename):
72    if file_type(filename) in format_types:
73        return True
74
75    return False
76
77def checkwhite_line(line):
78    match = lead.search(line)
79    if match and match.group(1).find('\t') != -1:
80        return False
81
82    match = trail.search(line)
83    if match:
84        return False
85
86    return True
87
88def checkwhite(filename):
89    if not whitespace_file(filename):
90        return
91
92    try:
93        f = file(filename, 'r+')
94    except OSError, msg:
95        print 'could not open file %s: %s' % (filename, msg)
96        return
97
98    for num,line in enumerate(f):
99        if not checkwhite_line(line):
100            yield line,num + 1
101
102def fixwhite_line(line, tabsize):
103    if lead.search(line):
104        newline = ''
105        for i,c in enumerate(line):
106            if c == ' ':
107                newline += ' '
108            elif c == '\t':
109                newline += ' ' * (tabsize - len(newline) % tabsize)
110            else:
111                newline += line[i:]
112                break
113
114        line = newline
115
116    return line.rstrip() + '\n'
117
118def fixwhite(filename, tabsize, fixonly=None):
119    if not whitespace_file(filename):
120        return
121
122    try:
123        f = file(filename, 'r+')
124    except OSError, msg:
125        print 'could not open file %s: %s' % (filename, msg)
126        return
127
128    lines = list(f)
129
130    f.seek(0)
131    f.truncate()
132
133    for i,line in enumerate(lines):
134        if fixonly is None or i in fixonly:
135            line = fixwhite_line(line, tabsize)
136
137        print >>f, line,
138
139def linelen(line):
140    tabs = line.count('\t')
141    if not tabs:
142        return len(line)
143
144    count = 0
145    for c in line:
146        if c == '\t':
147            count += tabsize - count % tabsize
148        else:
149            count += 1
150
151    return count
152
153class ValidationStats(object):
154    def __init__(self):
155        self.toolong = 0
156        self.toolong80 = 0
157        self.leadtabs = 0
158        self.trailwhite = 0
159        self.badcontrol = 0
160        self.cret = 0
161
162    def dump(self):
163        print '''\
164%d violations of lines over 79 chars. %d of which are 80 chars exactly.
165%d cases of whitespace at the end of a line.
166%d cases of tabs to indent.
167%d bad parens after if/while/for.
168%d carriage returns found.
169''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
170       self.badcontrol, self.cret)
171
172    def __nonzero__(self):
173        return self.toolong or self.toolong80 or self.leadtabs or \
174               self.trailwhite or self.badcontrol or self.cret
175
176def validate(filename, stats, verbose, exit_code):
177    if not format_file(filename):
178        return
179
180    def msg(lineno, line, message):
181        print '%s:%d>' % (filename, lineno + 1), message
182        if verbose > 2:
183            print line
184
185    def bad():
186        if exit_code is not None:
187            sys.exit(exit_code)
188
189    cpp = filename.endswith('.cc') or filename.endswith('.hh')
190    py = filename.endswith('.py')
191
192    if py + cpp != 1:
193        raise AttributeError, \
194              "I don't know how to deal with the file %s" % filename
195
196    try:
197        f = file(filename, 'r')
198    except OSError:
199        if verbose > 0:
200            print 'could not open file %s' % filename
201        bad()
202        return
203
204    for i,line in enumerate(f):
205        line = line.rstrip('\n')
206
207        # no carriage returns
208        if line.find('\r') != -1:
209            self.cret += 1
210            if verbose > 1:
211                msg(i, line, 'carriage return found')
212            bad()
213
214        # lines max out at 79 chars
215        llen = linelen(line)
216        if llen > 79:
217            stats.toolong += 1
218            if llen == 80:
219                stats.toolong80 += 1
220            if verbose > 1:
221                msg(i, line, 'line too long (%d chars)' % llen)
222            bad()
223
224        # no tabs used to indent
225        match = lead.search(line)
226        if match and match.group(1).find('\t') != -1:
227            stats.leadtabs += 1
228            if verbose > 1:
229                msg(i, line, 'using tabs to indent')
230            bad()
231
232        # no trailing whitespace
233        if trail.search(line):
234            stats.trailwhite +=1
235            if verbose > 1:
236                msg(i, line, 'trailing whitespace')
237            bad()
238
239        # for c++, exactly one space betwen if/while/for and (
240        if cpp:
241            match = any_control.search(line)
242            if match and not good_control.search(line):
243                stats.badcontrol += 1
244                if verbose > 1:
245                    msg(i, line, 'improper spacing after %s' % match.group(1))
246                bad()
247
248def modified_lines(old_data, new_data, max_lines):
249    from itertools import count
250    from mercurial import bdiff, mdiff
251
252    modified = set()
253    counter = count()
254    for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
255        for i in counter:
256            if i < fbeg:
257                modified.add(i)
258            elif i + 1 >= fend:
259                break
260            elif i > max_lines:
261                break
262    return modified
263
264def do_check_whitespace(ui, repo, *files, **args):
265    """check files for proper m5 style guidelines"""
266    from mercurial import mdiff, util
267
268    if files:
269        files = frozenset(files)
270
271    def skip(name):
272        return files and name in files
273
274    def prompt(name, fixonly=None):
275        if args.get('auto', False):
276            result = 'f'
277        else:
278            result = ui.prompt("(a)bort, (i)gnore, or (f)ix?", "^[aif]$", "a")
279        if result == 'a':
280            return True
281        elif result == 'i':
282            pass
283        elif result == 'f':
284            fixwhite(repo.wjoin(name), args['tabsize'], fixonly)
285        else:
286            raise util.Abort(_("Invalid response: '%s'") % result)
287
288        return False
289
290    modified, added, removed, deleted, unknown, ignore, clean = repo.status()
291
292    for fname in added:
293        if skip(fname):
294            continue
295
296        ok = True
297        for line,num in checkwhite(repo.wjoin(fname)):
298            ui.write("invalid whitespace in %s:%d\n" % (fname, num))
299            if ui.verbose:
300                ui.write(">>%s<<\n" % line[-1])
301            ok = False
302
303        if not ok:
304            if prompt(fname):
305                return True
306
307    wctx = repo.workingctx()
308    for fname in modified:
309        if skip(fname):
310            continue
311
312        if not whitespace_file(fname):
313            continue
314
315        fctx = wctx.filectx(fname)
316        pctx = fctx.parents()
317        assert len(pctx) in (1, 2)
318
319        file_data = fctx.data()
320        lines = mdiff.splitnewlines(file_data)
321        mod_lines = modified_lines(pctx[0].data(), file_data, len(lines))
322        if len(pctx) == 2:
323            m2 = modified_lines(pctx[1].data(), file_data, len(lines))
324            mod_lines = mod_lines & m2 # only the lines that are new in both
325
326        fixonly = set()
327        for i,line in enumerate(lines):
328            if i not in mod_lines:
329                continue
330
331            if checkwhite_line(line):
332                continue
333
334            ui.write("invalid whitespace: %s:%d\n" % (fname, i+1))
335            if ui.verbose:
336                ui.write(">>%s<<\n" % line[:-1])
337            fixonly.add(i)
338
339        if fixonly:
340            if prompt(fname, fixonly):
341                return True
342
343def check_whitespace(ui, repo, hooktype, node, parent1, parent2):
344    if hooktype != 'pretxncommit':
345        raise AttributeError, \
346              "This hook is only meant for pretxncommit, not %s" % hooktype
347
348    args = { 'tabsize' : 8 }
349    do_check_whitespace(ui, repo, **args)
350
351def check_format(ui, repo, hooktype, node, parent1, parent2):
352    if hooktype != 'pretxncommit':
353        raise AttributeError, \
354              "This hook is only meant for pretxncommit, not %s" % hooktype
355
356    modified, added, removed, deleted, unknown, ignore, clean = repo.status()
357
358    verbose = 0
359    stats = ValidationStats()
360    for f in modified + added:
361        validate(f, stats, verbose, None)
362
363    if stats:
364        stats.dump()
365        result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
366                           "^[ia]$", "a")
367        if result.startswith('i'):
368            pass
369        elif result.startswith('a'):
370            return True
371        else:
372            raise util.Abort(_("Invalid response: '%s'") % result)
373
374    return False
375
376try:
377    from mercurial.i18n import _
378except ImportError:
379    def _(arg):
380        return arg
381
382cmdtable = {
383    '^m5style' :
384    ( do_check_whitespace,
385      [ ('a', 'auto', False, _("automatically fix whitespace")),
386        ('t', 'tabsize', 8, _("Number of spaces TAB indents")) ],
387      _('hg m5check [-t <tabsize>] [FILE]...')),
388}
389if __name__ == '__main__':
390    import getopt
391
392    progname = sys.argv[0]
393    if len(sys.argv) < 2:
394        sys.exit('usage: %s <command> [<command args>]' % progname)
395
396    fixwhite_usage = '%s fixwhite [-t <tabsize> ] <path> [...] \n' % progname
397    chkformat_usage = '%s chkformat <path> [...] \n' % progname
398    chkwhite_usage = '%s chkwhite <path> [...] \n' % progname
399
400    command = sys.argv[1]
401    if command == 'fixwhite':
402        flags = 't:'
403        usage = fixwhite_usage
404    elif command == 'chkwhite':
405        flags = 'nv'
406        usage = chkwhite_usage
407    elif command == 'chkformat':
408        flags = 'nv'
409        usage = chkformat_usage
410    else:
411        sys.exit(fixwhite_usage + chkwhite_usage + chkformat_usage)
412
413    opts, args = getopt.getopt(sys.argv[2:], flags)
414
415    code = 1
416    verbose = 1
417    tabsize = 8
418    for opt,arg in opts:
419        if opt == '-n':
420            code = None
421        if opt == '-t':
422            tabsize = int(arg)
423        if opt == '-v':
424            verbose += 1
425
426    if command == 'fixwhite':
427        for filename in args:
428            fixwhite(filename, tabsize)
429    elif command == 'chkwhite':
430        for filename in args:
431            for line,num in checkwhite(filename):
432                print 'invalid whitespace: %s:%d' % (filename, num)
433                if verbose:
434                    print '>>%s<<' % line[:-1]
435    elif command == 'chkformat':
436        stats = ValidationStats()
437        for filename in args:
438            validate(filename, stats=stats, verbose=verbose, exit_code=code)
439
440        if verbose > 0:
441            stats.dump()
442    else:
443        sys.exit("command '%s' not found" % command)
444