style.py revision 10674:e2f9644a7738
1#! /usr/bin/env python
2# Copyright (c) 2014 ARM Limited
3# All rights reserved
4#
5# The license below extends only to copyright in the software and shall
6# not be construed as granting a license to any other intellectual
7# property including but not limited to intellectual property relating
8# to a hardware implementation of the functionality of the software
9# licensed hereunder.  You may use the software subject to the license
10# terms below provided that you ensure that this notice is replicated
11# unmodified and in its entirety in all distributions of the software,
12# modified or unmodified, in source code or in binary form.
13#
14# Copyright (c) 2006 The Regents of The University of Michigan
15# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
16# All rights reserved.
17#
18# Redistribution and use in source and binary forms, with or without
19# modification, are permitted provided that the following conditions are
20# met: redistributions of source code must retain the above copyright
21# notice, this list of conditions and the following disclaimer;
22# redistributions in binary form must reproduce the above copyright
23# notice, this list of conditions and the following disclaimer in the
24# documentation and/or other materials provided with the distribution;
25# neither the name of the copyright holders nor the names of its
26# contributors may be used to endorse or promote products derived from
27# this software without specific prior written permission.
28#
29# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40#
41# Authors: Nathan Binkert
42
43import heapq
44import os
45import re
46import sys
47
48from os.path import dirname, join as joinpath
49from itertools import count
50from mercurial import bdiff, mdiff, commands
51
52current_dir = dirname(__file__)
53sys.path.insert(0, current_dir)
54sys.path.insert(1, joinpath(dirname(current_dir), 'src', 'python'))
55
56from m5.util import neg_inf, pos_inf, Region, Regions
57import sort_includes
58from file_types import lang_type
59
60all_regions = Regions(Region(neg_inf, pos_inf))
61
62tabsize = 8
63lead = re.compile(r'^([ \t]+)')
64trail = re.compile(r'([ \t]+)$')
65any_control = re.compile(r'\b(if|while|for)[ \t]*[(]')
66good_control = re.compile(r'\b(if|while|for) [(]')
67
68format_types = set(('C', 'C++'))
69
70
71def re_ignore(expr):
72    """Helper function to create regular expression ignore file
73    matcher functions"""
74
75    rex = re.compile(expr)
76    def match_re(fname):
77        return rex.match(fname)
78    return match_re
79
80# This list contains a list of functions that are called to determine
81# if a file should be excluded from the style matching rules or
82# not. The functions are called with the file name relative to the
83# repository root (without a leading slash) as their argument. A file
84# is excluded if any function in the list returns true.
85style_ignores = [
86    # Ignore external projects as they are unlikely to follow the gem5
87    # coding convention.
88    re_ignore("^ext/"),
89]
90
91def check_ignores(fname):
92    """Check if a file name matches any of the ignore rules"""
93
94    for rule in style_ignores:
95        if rule(fname):
96            return True
97
98    return False
99
100
101def modified_regions(old_data, new_data):
102    regions = Regions()
103    beg = None
104    for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
105        if beg is not None and beg != fbeg:
106            regions.append(beg, fbeg)
107        beg = fend
108    return regions
109
110def modregions(wctx, fname):
111    fctx = wctx.filectx(fname)
112    pctx = fctx.parents()
113
114    file_data = fctx.data()
115    lines = mdiff.splitnewlines(file_data)
116    if len(pctx) in (1, 2):
117        mod_regions = modified_regions(pctx[0].data(), file_data)
118        if len(pctx) == 2:
119            m2 = modified_regions(pctx[1].data(), file_data)
120            # only the lines that are new in both
121            mod_regions &= m2
122    else:
123        mod_regions = Regions()
124        mod_regions.append(0, len(lines))
125
126    return mod_regions
127
128class UserInterface(object):
129    def __init__(self, verbose=False, auto=False):
130        self.auto = auto
131        self.verbose = verbose
132
133    def prompt(self, prompt, results, default):
134        if self.auto:
135            return self.auto
136
137        while True:
138            result = self.do_prompt(prompt, results, default)
139            if result in results:
140                return result
141
142class MercurialUI(UserInterface):
143    def __init__(self, ui, *args, **kwargs):
144        super(MercurialUI, self).__init__(*args, **kwargs)
145        self.ui = ui
146
147    def do_prompt(self, prompt, results, default):
148        return self.ui.prompt(prompt, default=default)
149
150    def write(self, string):
151        self.ui.write(string)
152
153class StdioUI(UserInterface):
154    def do_prompt(self, prompt, results, default):
155        return raw_input(prompt) or default
156
157    def write(self, string):
158        sys.stdout.write(string)
159
160class Verifier(object):
161    def __init__(self, ui, repo):
162        self.ui = ui
163        self.repo = repo
164
165    def __getattr__(self, attr):
166        if attr in ('prompt', 'write'):
167            return getattr(self.ui, attr)
168
169        if attr == 'wctx':
170            try:
171                wctx = repo.workingctx()
172            except:
173                from mercurial import context
174                wctx = context.workingctx(repo)
175            self.wctx = wctx
176            return wctx
177
178        raise AttributeError
179
180    def open(self, filename, mode):
181        filename = self.repo.wjoin(filename)
182
183        try:
184            f = file(filename, mode)
185        except OSError, msg:
186            print 'could not open file %s: %s' % (filename, msg)
187            return None
188
189        return f
190
191    def skip(self, filename):
192        filename = self.repo.wjoin(filename)
193
194        # We never want to handle symlinks, so always skip them: If the location
195        # pointed to is a directory, skip it. If the location is a file inside
196        # the gem5 directory, it will be checked as a file, so symlink can be
197        # skipped. If the location is a file outside gem5, we don't want to
198        # check it anyway.
199        if os.path.islink(filename):
200            return True
201        return lang_type(filename) not in self.languages
202
203    def check(self, filename, regions=all_regions):
204        f = self.open(filename, 'r')
205
206        errors = 0
207        for num,line in enumerate(f):
208            if num not in regions:
209                continue
210            if not self.check_line(line):
211                self.write("invalid %s in %s:%d\n" % \
212                               (self.test_name, filename, num + 1))
213                if self.ui.verbose:
214                    self.write(">>%s<<\n" % line[-1])
215                errors += 1
216        return errors
217
218    def fix(self, filename, regions=all_regions):
219        f = self.open(filename, 'r+')
220
221        lines = list(f)
222
223        f.seek(0)
224        f.truncate()
225
226        for i,line in enumerate(lines):
227            if i in regions:
228                line = self.fix_line(line)
229
230            f.write(line)
231        f.close()
232
233    def apply(self, filename, prompt, regions=all_regions):
234        if not self.skip(filename):
235            errors = self.check(filename, regions)
236            if errors:
237                if prompt(filename, self.fix, regions):
238                    return True
239        return False
240
241
242class Whitespace(Verifier):
243    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
244    test_name = 'whitespace'
245    def check_line(self, line):
246        match = lead.search(line)
247        if match and match.group(1).find('\t') != -1:
248            return False
249
250        match = trail.search(line)
251        if match:
252            return False
253
254        return True
255
256    def fix_line(self, line):
257        if lead.search(line):
258            newline = ''
259            for i,c in enumerate(line):
260                if c == ' ':
261                    newline += ' '
262                elif c == '\t':
263                    newline += ' ' * (tabsize - len(newline) % tabsize)
264                else:
265                    newline += line[i:]
266                    break
267
268            line = newline
269
270        return line.rstrip() + '\n'
271
272class SortedIncludes(Verifier):
273    languages = sort_includes.default_languages
274    def __init__(self, *args, **kwargs):
275        super(SortedIncludes, self).__init__(*args, **kwargs)
276        self.sort_includes = sort_includes.SortIncludes()
277
278    def check(self, filename, regions=all_regions):
279        f = self.open(filename, 'r')
280
281        lines = [ l.rstrip('\n') for l in f.xreadlines() ]
282        old = ''.join(line + '\n' for line in lines)
283        f.close()
284
285        if len(lines) == 0:
286            return 0
287
288        language = lang_type(filename, lines[0])
289        sort_lines = list(self.sort_includes(lines, filename, language))
290        new = ''.join(line + '\n' for line in sort_lines)
291
292        mod = modified_regions(old, new)
293        modified = mod & regions
294
295        if modified:
296            self.write("invalid sorting of includes in %s\n" % (filename))
297            if self.ui.verbose:
298                for start, end in modified.regions:
299                    self.write("bad region [%d, %d)\n" % (start, end))
300            return 1
301
302        return 0
303
304    def fix(self, filename, regions=all_regions):
305        f = self.open(filename, 'r+')
306
307        old = f.readlines()
308        lines = [ l.rstrip('\n') for l in old ]
309        language = lang_type(filename, lines[0])
310        sort_lines = list(self.sort_includes(lines, filename, language))
311        new = ''.join(line + '\n' for line in sort_lines)
312
313        f.seek(0)
314        f.truncate()
315
316        for i,line in enumerate(sort_lines):
317            f.write(line)
318            f.write('\n')
319        f.close()
320
321def linelen(line):
322    tabs = line.count('\t')
323    if not tabs:
324        return len(line)
325
326    count = 0
327    for c in line:
328        if c == '\t':
329            count += tabsize - count % tabsize
330        else:
331            count += 1
332
333    return count
334
335class ValidationStats(object):
336    def __init__(self):
337        self.toolong = 0
338        self.toolong80 = 0
339        self.leadtabs = 0
340        self.trailwhite = 0
341        self.badcontrol = 0
342        self.cret = 0
343
344    def dump(self):
345        print '''\
346%d violations of lines over 79 chars. %d of which are 80 chars exactly.
347%d cases of whitespace at the end of a line.
348%d cases of tabs to indent.
349%d bad parens after if/while/for.
350%d carriage returns found.
351''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
352       self.badcontrol, self.cret)
353
354    def __nonzero__(self):
355        return self.toolong or self.toolong80 or self.leadtabs or \
356               self.trailwhite or self.badcontrol or self.cret
357
358def validate(filename, stats, verbose, exit_code):
359    if lang_type(filename) not in format_types:
360        return
361
362    def msg(lineno, line, message):
363        print '%s:%d>' % (filename, lineno + 1), message
364        if verbose > 2:
365            print line
366
367    def bad():
368        if exit_code is not None:
369            sys.exit(exit_code)
370
371    try:
372        f = file(filename, 'r')
373    except OSError:
374        if verbose > 0:
375            print 'could not open file %s' % filename
376        bad()
377        return
378
379    for i,line in enumerate(f):
380        line = line.rstrip('\n')
381
382        # no carriage returns
383        if line.find('\r') != -1:
384            self.cret += 1
385            if verbose > 1:
386                msg(i, line, 'carriage return found')
387            bad()
388
389        # lines max out at 79 chars
390        llen = linelen(line)
391        if llen > 79:
392            stats.toolong += 1
393            if llen == 80:
394                stats.toolong80 += 1
395            if verbose > 1:
396                msg(i, line, 'line too long (%d chars)' % llen)
397            bad()
398
399        # no tabs used to indent
400        match = lead.search(line)
401        if match and match.group(1).find('\t') != -1:
402            stats.leadtabs += 1
403            if verbose > 1:
404                msg(i, line, 'using tabs to indent')
405            bad()
406
407        # no trailing whitespace
408        if trail.search(line):
409            stats.trailwhite +=1
410            if verbose > 1:
411                msg(i, line, 'trailing whitespace')
412            bad()
413
414        # for c++, exactly one space betwen if/while/for and (
415        if cpp:
416            match = any_control.search(line)
417            if match and not good_control.search(line):
418                stats.badcontrol += 1
419                if verbose > 1:
420                    msg(i, line, 'improper spacing after %s' % match.group(1))
421                bad()
422
423
424def do_check_style(hgui, repo, *pats, **opts):
425    """check files for proper m5 style guidelines
426
427    Without an argument, checks all modified and added files for gem5
428    coding style violations. A list of files can be specified to limit
429    the checker to a subset of the repository. The style rules are
430    normally applied on a diff of the repository state (i.e., added
431    files are checked in their entirety while only modifications of
432    modified files are checked).
433
434    The --all option can be specified to include clean files and check
435    modified files in their entirety.
436    """
437    from mercurial import mdiff, util
438
439    opt_fix_white = opts.get('fix_white', False)
440    opt_all = opts.get('all', False)
441    opt_no_ignore = opts.get('no_ignore', False)
442    ui = MercurialUI(hgui, hgui.verbose, opt_fix_white)
443
444    def prompt(name, func, regions=all_regions):
445        result = ui.prompt("(a)bort, (i)gnore, or (f)ix?", 'aif', 'a')
446        if result == 'a':
447            return True
448        elif result == 'f':
449            func(name, regions)
450
451        return False
452
453
454    # Import the match (repository file name matching helper)
455    # function. Different versions of Mercurial keep it in different
456    # modules and implement them differently.
457    try:
458        from mercurial import scmutil
459        m = scmutil.match(repo[None], pats, opts)
460    except ImportError:
461        from mercurial import cmdutil
462        m = cmdutil.match(repo, pats, opts)
463
464    modified, added, removed, deleted, unknown, ignore, clean = \
465        repo.status(match=m, clean=opt_all)
466    if not opt_all:
467        try:
468            wctx = repo.workingctx()
469        except:
470            from mercurial import context
471            wctx = context.workingctx(repo)
472
473        files = [ (fn, all_regions) for fn in added ] + \
474            [ (fn,  modregions(wctx, fn)) for fn in modified ]
475    else:
476        files = [ (fn, all_regions) for fn in added + modified + clean ]
477
478    whitespace = Whitespace(ui, repo)
479    sorted_includes = SortedIncludes(ui, repo)
480    for fname, mod_regions in files:
481        if not opt_no_ignore and check_ignores(fname):
482            continue
483
484        if whitespace.apply(fname, prompt, mod_regions):
485            return True
486
487        if sorted_includes.apply(fname, prompt, mod_regions):
488            return True
489
490    return False
491
492def do_check_format(hgui, repo, **args):
493    ui = MercurialUI(hgui, hgui.verbose, auto)
494
495    modified, added, removed, deleted, unknown, ignore, clean = repo.status()
496
497    verbose = 0
498    stats = ValidationStats()
499    for f in modified + added:
500        validate(joinpath(repo.root, f), stats, verbose, None)
501
502    if stats:
503        stats.dump()
504        result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
505                           'ai', 'a')
506        if result == 'a':
507            return True
508
509    return False
510
511def check_hook(hooktype):
512    if hooktype not in ('pretxncommit', 'pre-qrefresh'):
513        raise AttributeError, \
514              "This hook is not meant for %s" % hooktype
515
516def check_style(ui, repo, hooktype, **kwargs):
517    check_hook(hooktype)
518    args = {}
519
520    try:
521        return do_check_style(ui, repo, **args)
522    except Exception, e:
523        import traceback
524        traceback.print_exc()
525        return True
526
527def check_format(ui, repo, hooktype, **kwargs):
528    check_hook(hooktype)
529    args = {}
530
531    try:
532        return do_check_format(ui, repo, **args)
533    except Exception, e:
534        import traceback
535        traceback.print_exc()
536        return True
537
538try:
539    from mercurial.i18n import _
540except ImportError:
541    def _(arg):
542        return arg
543
544cmdtable = {
545    '^m5style' : (
546        do_check_style, [
547            ('w', 'fix-white', False, _("automatically fix whitespace")),
548            ('a', 'all', False,
549             _("include clean files and unmodified parts of modified files")),
550            ('', 'no-ignore', False, _("ignore the style ignore list")),
551            ] +  commands.walkopts,
552        _('hg m5style [-a] [FILE]...')),
553    '^m5format' :
554    ( do_check_format,
555      [ ],
556      _('hg m5format [FILE]...')),
557}
558
559if __name__ == '__main__':
560    import getopt
561
562    progname = sys.argv[0]
563    if len(sys.argv) < 2:
564        sys.exit('usage: %s <command> [<command args>]' % progname)
565
566    fixwhite_usage = '%s fixwhite [-t <tabsize> ] <path> [...] \n' % progname
567    chkformat_usage = '%s chkformat <path> [...] \n' % progname
568    chkwhite_usage = '%s chkwhite <path> [...] \n' % progname
569
570    command = sys.argv[1]
571    if command == 'fixwhite':
572        flags = 't:'
573        usage = fixwhite_usage
574    elif command == 'chkwhite':
575        flags = 'nv'
576        usage = chkwhite_usage
577    elif command == 'chkformat':
578        flags = 'nv'
579        usage = chkformat_usage
580    else:
581        sys.exit(fixwhite_usage + chkwhite_usage + chkformat_usage)
582
583    opts, args = getopt.getopt(sys.argv[2:], flags)
584
585    code = 1
586    verbose = 1
587    for opt,arg in opts:
588        if opt == '-n':
589            code = None
590        if opt == '-t':
591            tabsize = int(arg)
592        if opt == '-v':
593            verbose += 1
594
595    if command == 'fixwhite':
596        for filename in args:
597            fixwhite(filename, tabsize)
598    elif command == 'chkwhite':
599        for filename in args:
600            for line,num in checkwhite(filename):
601                print 'invalid whitespace: %s:%d' % (filename, num)
602                if verbose:
603                    print '>>%s<<' % line[:-1]
604    elif command == 'chkformat':
605        stats = ValidationStats()
606        for filename in args:
607            validate(filename, stats=stats, verbose=verbose, exit_code=code)
608
609        if verbose > 0:
610            stats.dump()
611    else:
612        sys.exit("command '%s' not found" % command)
613