style.py revision 10237:b2850bdcec07
1#! /usr/bin/env python
2# Copyright (c) 2006 The Regents of The University of Michigan
3# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met: redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer;
10# redistributions in binary form must reproduce the above copyright
11# notice, this list of conditions and the following disclaimer in the
12# documentation and/or other materials provided with the distribution;
13# neither the name of the copyright holders nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28#
29# Authors: Nathan Binkert
30
31import heapq
32import os
33import re
34import sys
35
36from os.path import dirname, join as joinpath
37from itertools import count
38from mercurial import bdiff, mdiff
39
40current_dir = dirname(__file__)
41sys.path.insert(0, current_dir)
42sys.path.insert(1, joinpath(dirname(current_dir), 'src', 'python'))
43
44from m5.util import neg_inf, pos_inf, Region, Regions
45import sort_includes
46from file_types import lang_type
47
48all_regions = Regions(Region(neg_inf, pos_inf))
49
50tabsize = 8
51lead = re.compile(r'^([ \t]+)')
52trail = re.compile(r'([ \t]+)$')
53any_control = re.compile(r'\b(if|while|for)[ \t]*[(]')
54good_control = re.compile(r'\b(if|while|for) [(]')
55
56format_types = set(('C', 'C++'))
57
58def modified_regions(old_data, new_data):
59    regions = Regions()
60    beg = None
61    for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
62        if beg is not None and beg != fbeg:
63            regions.append(beg, fbeg)
64        beg = fend
65    return regions
66
67def modregions(wctx, fname):
68    fctx = wctx.filectx(fname)
69    pctx = fctx.parents()
70
71    file_data = fctx.data()
72    lines = mdiff.splitnewlines(file_data)
73    if len(pctx) in (1, 2):
74        mod_regions = modified_regions(pctx[0].data(), file_data)
75        if len(pctx) == 2:
76            m2 = modified_regions(pctx[1].data(), file_data)
77            # only the lines that are new in both
78            mod_regions &= m2
79    else:
80        mod_regions = Regions()
81        mod_regions.append(0, len(lines))
82
83    return mod_regions
84
85class UserInterface(object):
86    def __init__(self, verbose=False, auto=False):
87        self.auto = auto
88        self.verbose = verbose
89
90    def prompt(self, prompt, results, default):
91        if self.auto:
92            return self.auto
93
94        while True:
95            result = self.do_prompt(prompt, results, default)
96            if result in results:
97                return result
98
99class MercurialUI(UserInterface):
100    def __init__(self, ui, *args, **kwargs):
101        super(MercurialUI, self).__init__(*args, **kwargs)
102        self.ui = ui
103
104    def do_prompt(self, prompt, results, default):
105        return self.ui.prompt(prompt, default=default)
106
107    def write(self, string):
108        self.ui.write(string)
109
110class StdioUI(UserInterface):
111    def do_prompt(self, prompt, results, default):
112        return raw_input(prompt) or default
113
114    def write(self, string):
115        sys.stdout.write(string)
116
117class Verifier(object):
118    def __init__(self, ui, repo=None):
119        self.ui = ui
120        self.repo = repo
121        if repo is None:
122            self.wctx = None
123
124    def __getattr__(self, attr):
125        if attr in ('prompt', 'write'):
126            return getattr(self.ui, attr)
127
128        if attr == 'wctx':
129            try:
130                wctx = repo.workingctx()
131            except:
132                from mercurial import context
133                wctx = context.workingctx(repo)
134            self.wctx = wctx
135            return wctx
136
137        raise AttributeError
138
139    def open(self, filename, mode):
140        if self.repo:
141            filename = self.repo.wjoin(filename)
142
143        try:
144            f = file(filename, mode)
145        except OSError, msg:
146            print 'could not open file %s: %s' % (filename, msg)
147            return None
148
149        return f
150
151    def skip(self, filename):
152        # We never want to handle symlinks, so always skip them: If the location
153        # pointed to is a directory, skip it. If the location is a file inside
154        # the gem5 directory, it will be checked as a file, so symlink can be
155        # skipped. If the location is a file outside gem5, we don't want to
156        # check it anyway.
157        if os.path.islink(filename):
158            return True
159        return lang_type(filename) not in self.languages
160
161    def check(self, filename, regions=all_regions):
162        f = self.open(filename, 'r')
163
164        errors = 0
165        for num,line in enumerate(f):
166            if num not in regions:
167                continue
168            if not self.check_line(line):
169                self.write("invalid %s in %s:%d\n" % \
170                               (self.test_name, filename, num + 1))
171                if self.ui.verbose:
172                    self.write(">>%s<<\n" % line[-1])
173                errors += 1
174        return errors
175
176    def fix(self, filename, regions=all_regions):
177        f = self.open(filename, 'r+')
178
179        lines = list(f)
180
181        f.seek(0)
182        f.truncate()
183
184        for i,line in enumerate(lines):
185            if i in regions:
186                line = self.fix_line(line)
187
188            f.write(line)
189        f.close()
190
191    def apply(self, filename, prompt, regions=all_regions):
192        if not self.skip(filename):
193            errors = self.check(filename, regions)
194            if errors:
195                if prompt(filename, self.fix, regions):
196                    return True
197        return False
198
199
200class Whitespace(Verifier):
201    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
202    test_name = 'whitespace'
203    def check_line(self, line):
204        match = lead.search(line)
205        if match and match.group(1).find('\t') != -1:
206            return False
207
208        match = trail.search(line)
209        if match:
210            return False
211
212        return True
213
214    def fix_line(self, line):
215        if lead.search(line):
216            newline = ''
217            for i,c in enumerate(line):
218                if c == ' ':
219                    newline += ' '
220                elif c == '\t':
221                    newline += ' ' * (tabsize - len(newline) % tabsize)
222                else:
223                    newline += line[i:]
224                    break
225
226            line = newline
227
228        return line.rstrip() + '\n'
229
230class SortedIncludes(Verifier):
231    languages = sort_includes.default_languages
232    def __init__(self, *args, **kwargs):
233        super(SortedIncludes, self).__init__(*args, **kwargs)
234        self.sort_includes = sort_includes.SortIncludes()
235
236    def check(self, filename, regions=all_regions):
237        f = self.open(filename, 'r')
238
239        lines = [ l.rstrip('\n') for l in f.xreadlines() ]
240        old = ''.join(line + '\n' for line in lines)
241        f.close()
242
243        if len(lines) == 0:
244            return 0
245
246        language = lang_type(filename, lines[0])
247        sort_lines = list(self.sort_includes(lines, filename, language))
248        new = ''.join(line + '\n' for line in sort_lines)
249
250        mod = modified_regions(old, new)
251        modified = mod & regions
252
253        if modified:
254            self.write("invalid sorting of includes in %s\n" % (filename))
255            if self.ui.verbose:
256                for start, end in modified.regions:
257                    self.write("bad region [%d, %d)\n" % (start, end))
258            return 1
259
260        return 0
261
262    def fix(self, filename, regions=all_regions):
263        f = self.open(filename, 'r+')
264
265        old = f.readlines()
266        lines = [ l.rstrip('\n') for l in old ]
267        language = lang_type(filename, lines[0])
268        sort_lines = list(self.sort_includes(lines, filename, language))
269        new = ''.join(line + '\n' for line in sort_lines)
270
271        f.seek(0)
272        f.truncate()
273
274        for i,line in enumerate(sort_lines):
275            f.write(line)
276            f.write('\n')
277        f.close()
278
279def linelen(line):
280    tabs = line.count('\t')
281    if not tabs:
282        return len(line)
283
284    count = 0
285    for c in line:
286        if c == '\t':
287            count += tabsize - count % tabsize
288        else:
289            count += 1
290
291    return count
292
293class ValidationStats(object):
294    def __init__(self):
295        self.toolong = 0
296        self.toolong80 = 0
297        self.leadtabs = 0
298        self.trailwhite = 0
299        self.badcontrol = 0
300        self.cret = 0
301
302    def dump(self):
303        print '''\
304%d violations of lines over 79 chars. %d of which are 80 chars exactly.
305%d cases of whitespace at the end of a line.
306%d cases of tabs to indent.
307%d bad parens after if/while/for.
308%d carriage returns found.
309''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
310       self.badcontrol, self.cret)
311
312    def __nonzero__(self):
313        return self.toolong or self.toolong80 or self.leadtabs or \
314               self.trailwhite or self.badcontrol or self.cret
315
316def validate(filename, stats, verbose, exit_code):
317    if lang_type(filename) not in format_types:
318        return
319
320    def msg(lineno, line, message):
321        print '%s:%d>' % (filename, lineno + 1), message
322        if verbose > 2:
323            print line
324
325    def bad():
326        if exit_code is not None:
327            sys.exit(exit_code)
328
329    try:
330        f = file(filename, 'r')
331    except OSError:
332        if verbose > 0:
333            print 'could not open file %s' % filename
334        bad()
335        return
336
337    for i,line in enumerate(f):
338        line = line.rstrip('\n')
339
340        # no carriage returns
341        if line.find('\r') != -1:
342            self.cret += 1
343            if verbose > 1:
344                msg(i, line, 'carriage return found')
345            bad()
346
347        # lines max out at 79 chars
348        llen = linelen(line)
349        if llen > 79:
350            stats.toolong += 1
351            if llen == 80:
352                stats.toolong80 += 1
353            if verbose > 1:
354                msg(i, line, 'line too long (%d chars)' % llen)
355            bad()
356
357        # no tabs used to indent
358        match = lead.search(line)
359        if match and match.group(1).find('\t') != -1:
360            stats.leadtabs += 1
361            if verbose > 1:
362                msg(i, line, 'using tabs to indent')
363            bad()
364
365        # no trailing whitespace
366        if trail.search(line):
367            stats.trailwhite +=1
368            if verbose > 1:
369                msg(i, line, 'trailing whitespace')
370            bad()
371
372        # for c++, exactly one space betwen if/while/for and (
373        if cpp:
374            match = any_control.search(line)
375            if match and not good_control.search(line):
376                stats.badcontrol += 1
377                if verbose > 1:
378                    msg(i, line, 'improper spacing after %s' % match.group(1))
379                bad()
380
381def do_check_style(hgui, repo, *files, **args):
382    """check files for proper m5 style guidelines"""
383    from mercurial import mdiff, util
384
385    auto = args.get('auto', False)
386    if auto:
387        auto = 'f'
388    ui = MercurialUI(hgui, hgui.verbose, auto)
389
390    if files:
391        files = frozenset(files)
392
393    def skip(name):
394        # We never want to handle symlinks, so always skip them: If the location
395        # pointed to is a directory, skip it. If the location is a file inside
396        # the gem5 directory, it will be checked as a file, so symlink can be
397        # skipped. If the location is a file outside gem5, we don't want to
398        # check it anyway.
399        if os.path.islink(name):
400            return True
401        return files and name in files
402
403    def prompt(name, func, regions=all_regions):
404        result = ui.prompt("(a)bort, (i)gnore, or (f)ix?", 'aif', 'a')
405        if result == 'a':
406            return True
407        elif result == 'f':
408            func(repo.wjoin(name), regions)
409
410        return False
411
412    modified, added, removed, deleted, unknown, ignore, clean = repo.status()
413
414    whitespace = Whitespace(ui)
415    sorted_includes = SortedIncludes(ui)
416    for fname in added:
417        if skip(fname):
418            continue
419
420        fpath = joinpath(repo.root, fname)
421
422        if whitespace.apply(fpath, prompt):
423            return True
424
425        if sorted_includes.apply(fpath, prompt):
426            return True
427
428    try:
429        wctx = repo.workingctx()
430    except:
431        from mercurial import context
432        wctx = context.workingctx(repo)
433
434    for fname in modified:
435        if skip(fname):
436            continue
437
438        fpath = joinpath(repo.root, fname)
439        regions = modregions(wctx, fname)
440
441        if whitespace.apply(fpath, prompt, regions):
442            return True
443
444        if sorted_includes.apply(fpath, prompt, regions):
445            return True
446
447    return False
448
449def do_check_format(hgui, repo, **args):
450    ui = MercurialUI(hgui, hgui.verbose, auto)
451
452    modified, added, removed, deleted, unknown, ignore, clean = repo.status()
453
454    verbose = 0
455    stats = ValidationStats()
456    for f in modified + added:
457        validate(joinpath(repo.root, f), stats, verbose, None)
458
459    if stats:
460        stats.dump()
461        result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
462                           'ai', 'a')
463        if result == 'a':
464            return True
465
466    return False
467
468def check_hook(hooktype):
469    if hooktype not in ('pretxncommit', 'pre-qrefresh'):
470        raise AttributeError, \
471              "This hook is not meant for %s" % hooktype
472
473def check_style(ui, repo, hooktype, **kwargs):
474    check_hook(hooktype)
475    args = {}
476
477    try:
478        return do_check_style(ui, repo, **args)
479    except Exception, e:
480        import traceback
481        traceback.print_exc()
482        return True
483
484def check_format(ui, repo, hooktype, **kwargs):
485    check_hook(hooktype)
486    args = {}
487
488    try:
489        return do_check_format(ui, repo, **args)
490    except Exception, e:
491        import traceback
492        traceback.print_exc()
493        return True
494
495try:
496    from mercurial.i18n import _
497except ImportError:
498    def _(arg):
499        return arg
500
501cmdtable = {
502    '^m5style' :
503    ( do_check_style,
504      [ ('a', 'auto', False, _("automatically fix whitespace")) ],
505      _('hg m5style [-a] [FILE]...')),
506    '^m5format' :
507    ( do_check_format,
508      [ ],
509      _('hg m5format [FILE]...')),
510}
511
512if __name__ == '__main__':
513    import getopt
514
515    progname = sys.argv[0]
516    if len(sys.argv) < 2:
517        sys.exit('usage: %s <command> [<command args>]' % progname)
518
519    fixwhite_usage = '%s fixwhite [-t <tabsize> ] <path> [...] \n' % progname
520    chkformat_usage = '%s chkformat <path> [...] \n' % progname
521    chkwhite_usage = '%s chkwhite <path> [...] \n' % progname
522
523    command = sys.argv[1]
524    if command == 'fixwhite':
525        flags = 't:'
526        usage = fixwhite_usage
527    elif command == 'chkwhite':
528        flags = 'nv'
529        usage = chkwhite_usage
530    elif command == 'chkformat':
531        flags = 'nv'
532        usage = chkformat_usage
533    else:
534        sys.exit(fixwhite_usage + chkwhite_usage + chkformat_usage)
535
536    opts, args = getopt.getopt(sys.argv[2:], flags)
537
538    code = 1
539    verbose = 1
540    for opt,arg in opts:
541        if opt == '-n':
542            code = None
543        if opt == '-t':
544            tabsize = int(arg)
545        if opt == '-v':
546            verbose += 1
547
548    if command == 'fixwhite':
549        for filename in args:
550            fixwhite(filename, tabsize)
551    elif command == 'chkwhite':
552        for filename in args:
553            for line,num in checkwhite(filename):
554                print 'invalid whitespace: %s:%d' % (filename, num)
555                if verbose:
556                    print '>>%s<<' % line[:-1]
557    elif command == 'chkformat':
558        stats = ValidationStats()
559        for filename in args:
560            validate(filename, stats=stats, verbose=verbose, exit_code=code)
561
562        if verbose > 0:
563            stats.dump()
564    else:
565        sys.exit("command '%s' not found" % command)
566