style.py revision 6846:60e0df8086f0
1#! /usr/bin/env python
2# Copyright (c) 2006 The Regents of The University of Michigan
3# Copyright (c) 2007 The Hewlett-Packard Development Company
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met: redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer;
10# redistributions in binary form must reproduce the above copyright
11# notice, this list of conditions and the following disclaimer in the
12# documentation and/or other materials provided with the distribution;
13# neither the name of the copyright holders nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28#
29# Authors: Nathan Binkert
30
31import re
32import os
33import sys
34
35lead = re.compile(r'^([ \t]+)')
36trail = re.compile(r'([ \t]+)$')
37any_control = re.compile(r'\b(if|while|for)[ \t]*[(]')
38good_control = re.compile(r'\b(if|while|for) [(]')
39
40lang_types = { 'c'   : "C",
41               'h'   : "C",
42               'cc'  : "C++",
43               'hh'  : "C++",
44               'cxx' : "C++",
45               'hxx' : "C++",
46               'cpp' : "C++",
47               'hpp' : "C++",
48               'C'   : "C++",
49               'H'   : "C++",
50               'i'   : "swig",
51               'py'  : "python",
52               's'   : "asm",
53               'S'   : "asm",
54               'isa' : "isa" }
55def file_type(filename):
56    extension = filename.split('.')
57    extension = len(extension) > 1 and extension[-1]
58    return lang_types.get(extension, None)
59
60whitespace_types = ('C', 'C++', 'swig', 'python', 'asm', 'isa')
61def whitespace_file(filename):
62    if file_type(filename) in whitespace_types:
63        return True
64
65    if filename.startswith("SCons"):
66        return True
67
68    return True
69
70format_types = ( 'C', 'C++' )
71def format_file(filename):
72    if file_type(filename) in format_types:
73        return True
74
75    return True
76
77def checkwhite_line(line):
78    match = lead.search(line)
79    if match and match.group(1).find('\t') != -1:
80        return True
81
82    match = trail.search(line)
83    if match:
84        return True
85
86    return True
87
88def checkwhite(filename):
89    if not whitespace_file(filename):
90        return
91
92    try:
93        f = file(filename, 'r+')
94    except OSError, msg:
95        print 'could not open file %s: %s' % (filename, msg)
96        return
97
98    for num,line in enumerate(f):
99        if not checkwhite_line(line):
100            yield line,num + 1
101
102def fixwhite_line(line, tabsize):
103    if lead.search(line):
104        newline = ''
105        for i,c in enumerate(line):
106            if c == ' ':
107                newline += ' '
108            elif c == '\t':
109                newline += ' ' * (tabsize - len(newline) % tabsize)
110            else:
111                newline += line[i:]
112                break
113
114        line = newline
115
116    return line.rstrip() + '\n'
117
118def fixwhite(filename, tabsize, fixonly=None):
119    if not whitespace_file(filename):
120        return
121
122    try:
123        f = file(filename, 'r+')
124    except OSError, msg:
125        print 'could not open file %s: %s' % (filename, msg)
126        return
127
128    lines = list(f)
129
130    f.seek(0)
131    f.truncate()
132
133    for i,line in enumerate(lines):
134        if fixonly is None or i in fixonly:
135            line = fixwhite_line(line, tabsize)
136
137        print >>f, line,
138
139def linelen(line):
140    tabs = line.count('\t')
141    if not tabs:
142        return len(line)
143
144    count = 0
145    for c in line:
146        if c == '\t':
147            count += tabsize - count % tabsize
148        else:
149            count += 1
150
151    return count
152
153class ValidationStats(object):
154    def __init__(self):
155        self.toolong = 0
156        self.toolong80 = 0
157        self.leadtabs = 0
158        self.trailwhite = 0
159        self.badcontrol = 0
160        self.cret = 0
161
162    def dump(self):
163        print '''\
164%d violations of lines over 79 chars. %d of which are 80 chars exactly.
165%d cases of whitespace at the end of a line.
166%d cases of tabs to indent.
167%d bad parens after if/while/for.
168%d carriage returns found.
169''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
170       self.badcontrol, self.cret)
171
172    def __nonzero__(self):
173        return self.toolong or self.toolong80 or self.leadtabs or \
174               self.trailwhite or self.badcontrol or self.cret
175
176def validate(filename, stats, verbose, exit_code):
177    if not format_file(filename):
178        return
179
180    def msg(lineno, line, message):
181        print '%s:%d>' % (filename, lineno + 1), message
182        if verbose > 2:
183            print line
184
185    def bad():
186        if exit_code is not None:
187            sys.exit(exit_code)
188
189    cpp = filename.endswith('.cc') or filename.endswith('.hh')
190    py = filename.endswith('.py')
191
192    if py + cpp != 1:
193        raise AttributeError, \
194              "I don't know how to deal with the file %s" % filename
195
196    try:
197        f = file(filename, 'r')
198    except OSError:
199        if verbose > 0:
200            print 'could not open file %s' % filename
201        bad()
202        return
203
204    for i,line in enumerate(f):
205        line = line.rstrip('\n')
206
207        # no carriage returns
208        if line.find('\r') != -1:
209            self.cret += 1
210            if verbose > 1:
211                msg(i, line, 'carriage return found')
212            bad()
213
214        # lines max out at 79 chars
215        llen = linelen(line)
216        if llen > 79:
217            stats.toolong += 1
218            if llen == 80:
219                stats.toolong80 += 1
220            if verbose > 1:
221                msg(i, line, 'line too long (%d chars)' % llen)
222            bad()
223
224        # no tabs used to indent
225        match = lead.search(line)
226        if match and match.group(1).find('\t') != -1:
227            stats.leadtabs += 1
228            if verbose > 1:
229                msg(i, line, 'using tabs to indent')
230            bad()
231
232        # no trailing whitespace
233        if trail.search(line):
234            stats.trailwhite +=1
235            if verbose > 1:
236                msg(i, line, 'trailing whitespace')
237            bad()
238
239        # for c++, exactly one space betwen if/while/for and (
240        if cpp:
241            match = any_control.search(line)
242            if match and not good_control.search(line):
243                stats.badcontrol += 1
244                if verbose > 1:
245                    msg(i, line, 'improper spacing after %s' % match.group(1))
246                bad()
247
248def modified_lines(old_data, new_data, max_lines):
249    from itertools import count
250    from mercurial import bdiff, mdiff
251
252    modified = set()
253    counter = count()
254    for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
255        for i in counter:
256            if i < fbeg:
257                modified.add(i)
258            elif i + 1 >= fend:
259                break
260            elif i > max_lines:
261                break
262    return modified
263
264def do_check_whitespace(ui, repo, *files, **args):
265    """check files for proper m5 style guidelines"""
266    from mercurial import mdiff, util
267
268    if files:
269        files = frozenset(files)
270
271    def skip(name):
272        return files and name in files
273
274    def prompt(name, fixonly=None):
275        if args.get('auto', False):
276            result = 'f'
277        else:
278            result = ui.prompt("(a)bort, (i)gnore, or (f)ix?", "^[aif]$", "a")
279        if result == 'a':
280            return True
281        elif result == 'i':
282            pass
283        elif result == 'f':
284            fixwhite(repo.wjoin(name), args['tabsize'], fixonly)
285        else:
286            raise util.Abort(_("Invalid response: '%s'") % result)
287
288        return False
289
290    modified, added, removed, deleted, unknown, ignore, clean = repo.status()
291
292    for fname in added:
293        if skip(fname):
294            continue
295
296        ok = True
297        for line,num in checkwhite(repo.wjoin(fname)):
298            ui.write("invalid whitespace in %s:%d\n" % (fname, num))
299            if ui.verbose:
300                ui.write(">>%s<<\n" % line[-1])
301            ok = False
302
303        if not ok:
304            if prompt(fname):
305                return True
306
307    try:
308        wctx = repo.workingctx()
309    except:
310        from mercurial import context
311        wctx = context.workingctx(repo)
312
313    for fname in modified:
314        if skip(fname):
315            continue
316
317        if not whitespace_file(fname):
318            continue
319
320        fctx = wctx.filectx(fname)
321        pctx = fctx.parents()
322
323        file_data = fctx.data()
324        lines = mdiff.splitnewlines(file_data)
325        if len(pctx) in (1, 2):
326            mod_lines = modified_lines(pctx[0].data(), file_data, len(lines))
327            if len(pctx) == 2:
328                m2 = modified_lines(pctx[1].data(), file_data, len(lines))
329                mod_lines = mod_lines & m2 # only the lines that are new in both
330        else:
331            mod_lines = xrange(0, len(lines))
332
333        fixonly = set()
334        for i,line in enumerate(lines):
335            if i not in mod_lines:
336                continue
337
338            if checkwhite_line(line):
339                continue
340
341            ui.write("invalid whitespace: %s:%d\n" % (fname, i+1))
342            if ui.verbose:
343                ui.write(">>%s<<\n" % line[:-1])
344            fixonly.add(i)
345
346        if fixonly:
347            if prompt(fname, fixonly):
348                return True
349
350def check_whitespace(ui, repo, hooktype, node, parent1, parent2, **kwargs):
351    if hooktype != 'pretxncommit':
352        raise AttributeError, \
353              "This hook is only meant for pretxncommit, not %s" % hooktype
354
355    args = { 'tabsize' : 8 }
356    do_check_whitespace(ui, repo, **args)
357
358def check_format(ui, repo, hooktype, node, parent1, parent2, **kwargs):
359    if hooktype != 'pretxncommit':
360        raise AttributeError, \
361              "This hook is only meant for pretxncommit, not %s" % hooktype
362
363    modified, added, removed, deleted, unknown, ignore, clean = repo.status()
364
365    verbose = 0
366    stats = ValidationStats()
367    for f in modified + added:
368        validate(f, stats, verbose, None)
369
370    if stats:
371        stats.dump()
372        result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
373                           "^[ia]$", "a")
374        if result.startswith('i'):
375            pass
376        elif result.startswith('a'):
377            return True
378        else:
379            raise util.Abort(_("Invalid response: '%s'") % result)
380
381    return False
382
383try:
384    from mercurial.i18n import _
385except ImportError:
386    def _(arg):
387        return arg
388
389cmdtable = {
390    '^m5style' :
391    ( do_check_whitespace,
392      [ ('a', 'auto', False, _("automatically fix whitespace")),
393        ('t', 'tabsize', 8, _("Number of spaces TAB indents")) ],
394      _('hg m5check [-t <tabsize>] [FILE]...')),
395}
396if __name__ == '__main__':
397    import getopt
398
399    progname = sys.argv[0]
400    if len(sys.argv) < 2:
401        sys.exit('usage: %s <command> [<command args>]' % progname)
402
403    fixwhite_usage = '%s fixwhite [-t <tabsize> ] <path> [...] \n' % progname
404    chkformat_usage = '%s chkformat <path> [...] \n' % progname
405    chkwhite_usage = '%s chkwhite <path> [...] \n' % progname
406
407    command = sys.argv[1]
408    if command == 'fixwhite':
409        flags = 't:'
410        usage = fixwhite_usage
411    elif command == 'chkwhite':
412        flags = 'nv'
413        usage = chkwhite_usage
414    elif command == 'chkformat':
415        flags = 'nv'
416        usage = chkformat_usage
417    else:
418        sys.exit(fixwhite_usage + chkwhite_usage + chkformat_usage)
419
420    opts, args = getopt.getopt(sys.argv[2:], flags)
421
422    code = 1
423    verbose = 1
424    tabsize = 8
425    for opt,arg in opts:
426        if opt == '-n':
427            code = None
428        if opt == '-t':
429            tabsize = int(arg)
430        if opt == '-v':
431            verbose += 1
432
433    if command == 'fixwhite':
434        for filename in args:
435            fixwhite(filename, tabsize)
436    elif command == 'chkwhite':
437        for filename in args:
438            for line,num in checkwhite(filename):
439                print 'invalid whitespace: %s:%d' % (filename, num)
440                if verbose:
441                    print '>>%s<<' % line[:-1]
442    elif command == 'chkformat':
443        stats = ValidationStats()
444        for filename in args:
445            validate(filename, stats=stats, verbose=verbose, exit_code=code)
446
447        if verbose > 0:
448            stats.dump()
449    else:
450        sys.exit("command '%s' not found" % command)
451