hgstyle.py revision 11402:ac9e1a3bed79
1#! /usr/bin/env python
2# Copyright (c) 2014 ARM Limited
3# All rights reserved
4#
5# The license below extends only to copyright in the software and shall
6# not be construed as granting a license to any other intellectual
7# property including but not limited to intellectual property relating
8# to a hardware implementation of the functionality of the software
9# licensed hereunder.  You may use the software subject to the license
10# terms below provided that you ensure that this notice is replicated
11# unmodified and in its entirety in all distributions of the software,
12# modified or unmodified, in source code or in binary form.
13#
14# Copyright (c) 2006 The Regents of The University of Michigan
15# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
16# Copyright (c) 2016 Advanced Micro Devices, Inc.
17# All rights reserved.
18#
19# Redistribution and use in source and binary forms, with or without
20# modification, are permitted provided that the following conditions are
21# met: redistributions of source code must retain the above copyright
22# notice, this list of conditions and the following disclaimer;
23# redistributions in binary form must reproduce the above copyright
24# notice, this list of conditions and the following disclaimer in the
25# documentation and/or other materials provided with the distribution;
26# neither the name of the copyright holders nor the names of its
27# contributors may be used to endorse or promote products derived from
28# this software without specific prior written permission.
29#
30# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
34# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41#
42# Authors: Nathan Binkert
43#          Steve Reinhardt
44
45import heapq
46import os
47import re
48import sys
49
50from os.path import dirname, join as joinpath
51from itertools import count
52from mercurial import bdiff, mdiff, commands
53
54current_dir = dirname(__file__)
55sys.path.insert(0, current_dir)
56sys.path.insert(1, joinpath(dirname(current_dir), 'src', 'python'))
57
58from m5.util import neg_inf, pos_inf, Region, Regions
59import sort_includes
60from file_types import lang_type
61
62all_regions = Regions(Region(neg_inf, pos_inf))
63
64tabsize = 8
65lead = re.compile(r'^([ \t]+)')
66trail = re.compile(r'([ \t]+)$')
67any_control = re.compile(r'\b(if|while|for)([ \t]*)\(')
68
69format_types = set(('C', 'C++'))
70
71
72def re_ignore(expr):
73    """Helper function to create regular expression ignore file
74    matcher functions"""
75
76    rex = re.compile(expr)
77    def match_re(fname):
78        return rex.match(fname)
79    return match_re
80
81# This list contains a list of functions that are called to determine
82# if a file should be excluded from the style matching rules or
83# not. The functions are called with the file name relative to the
84# repository root (without a leading slash) as their argument. A file
85# is excluded if any function in the list returns true.
86style_ignores = [
87    # Ignore external projects as they are unlikely to follow the gem5
88    # coding convention.
89    re_ignore("^ext/"),
90]
91
92def check_ignores(fname):
93    """Check if a file name matches any of the ignore rules"""
94
95    for rule in style_ignores:
96        if rule(fname):
97            return True
98
99    return False
100
101
102def modified_regions(old_data, new_data):
103    regions = Regions()
104    beg = None
105    for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
106        if beg is not None and beg != fbeg:
107            regions.append(beg, fbeg)
108        beg = fend
109    return regions
110
111def modregions(wctx, fname):
112    fctx = wctx.filectx(fname)
113    pctx = fctx.parents()
114
115    file_data = fctx.data()
116    lines = mdiff.splitnewlines(file_data)
117    if len(pctx) in (1, 2):
118        mod_regions = modified_regions(pctx[0].data(), file_data)
119        if len(pctx) == 2:
120            m2 = modified_regions(pctx[1].data(), file_data)
121            # only the lines that are new in both
122            mod_regions &= m2
123    else:
124        mod_regions = Regions()
125        mod_regions.append(0, len(lines))
126
127    return mod_regions
128
129class UserInterface(object):
130    def __init__(self, verbose=False):
131        self.verbose = verbose
132
133    def prompt(self, prompt, results, default):
134        while True:
135            result = self.do_prompt(prompt, results, default)
136            if result in results:
137                return result
138
139class MercurialUI(UserInterface):
140    def __init__(self, ui, *args, **kwargs):
141        super(MercurialUI, self).__init__(*args, **kwargs)
142        self.ui = ui
143
144    def do_prompt(self, prompt, results, default):
145        return self.ui.prompt(prompt, default=default)
146
147    def write(self, string):
148        self.ui.write(string)
149
150class StdioUI(UserInterface):
151    def do_prompt(self, prompt, results, default):
152        return raw_input(prompt) or default
153
154    def write(self, string):
155        sys.stdout.write(string)
156
157
158class Verifier(object):
159    """Base class for style verifier objects
160
161    Subclasses must define these class attributes:
162      languages = set of strings identifying applicable languages
163      test_name = long descriptive name of test, will be used in
164                  messages such as "error in <foo>" or "invalid <foo>"
165      opt_name = short name used to generate command-line options to
166                 control the test (--fix-<foo>, --ignore-<foo>, etc.)
167    """
168
169    def __init__(self, ui, repo, opts):
170        self.ui = ui
171        self.repo = repo
172        # opt_name must be defined as a class attribute of derived classes.
173        # Check test-specific opts first as these have precedence.
174        self.opt_fix = opts.get('fix_' + self.opt_name, False)
175        self.opt_ignore = opts.get('ignore_' + self.opt_name, False)
176        self.opt_skip = opts.get('skip_' + self.opt_name, False)
177        # If no test-specific opts were set, then set based on "-all" opts.
178        if not (self.opt_fix or self.opt_ignore or self.opt_skip):
179            self.opt_fix = opts.get('fix_all', False)
180            self.opt_ignore = opts.get('ignore_all', False)
181            self.opt_skip = opts.get('skip_all', False)
182
183    def __getattr__(self, attr):
184        if attr in ('prompt', 'write'):
185            return getattr(self.ui, attr)
186
187        if attr == 'wctx':
188            try:
189                wctx = repo.workingctx()
190            except:
191                from mercurial import context
192                wctx = context.workingctx(repo)
193            self.wctx = wctx
194            return wctx
195
196        raise AttributeError
197
198    def open(self, filename, mode):
199        filename = self.repo.wjoin(filename)
200
201        try:
202            f = file(filename, mode)
203        except OSError, msg:
204            print 'could not open file %s: %s' % (filename, msg)
205            return None
206
207        return f
208
209    def skip(self, filename):
210        filename = self.repo.wjoin(filename)
211
212        # We never want to handle symlinks, so always skip them: If the location
213        # pointed to is a directory, skip it. If the location is a file inside
214        # the gem5 directory, it will be checked as a file, so symlink can be
215        # skipped. If the location is a file outside gem5, we don't want to
216        # check it anyway.
217        if os.path.islink(filename):
218            return True
219        return lang_type(filename) not in self.languages
220
221    def check(self, filename, regions=all_regions):
222        """Check specified regions of file 'filename'.
223
224        Line-by-line checks can simply provide a check_line() method
225        that returns True if the line is OK and False if it has an
226        error.  Verifiers that need a multi-line view (like
227        SortedIncludes) must override this entire function.
228
229        Returns a count of errors (0 if none), though actual non-zero
230        count value is not currently used anywhere.
231        """
232
233        f = self.open(filename, 'r')
234
235        errors = 0
236        for num,line in enumerate(f):
237            if num not in regions:
238                continue
239            line = line.rstrip('\n')
240            if not self.check_line(line):
241                self.write("invalid %s in %s:%d\n" % \
242                           (self.test_name, filename, num + 1))
243                if self.ui.verbose:
244                    self.write(">>%s<<\n" % line[:-1])
245                errors += 1
246        return errors
247
248    def fix(self, filename, regions=all_regions):
249        """Fix specified regions of file 'filename'.
250
251        Line-by-line fixes can simply provide a fix_line() method that
252        returns the fixed line. Verifiers that need a multi-line view
253        (like SortedIncludes) must override this entire function.
254        """
255
256        f = self.open(filename, 'r+')
257
258        lines = list(f)
259
260        f.seek(0)
261        f.truncate()
262
263        for i,line in enumerate(lines):
264            if i in regions:
265                line = self.fix_line(line)
266
267            f.write(line)
268        f.close()
269
270
271    def apply(self, filename, regions=all_regions):
272        """Possibly apply to specified regions of file 'filename'.
273
274        Verifier is skipped if --skip-<test> option was provided or if
275        file is not of an applicable type.  Otherwise file is checked
276        and error messages printed.  Errors are fixed or ignored if
277        the corresponding --fix-<test> or --ignore-<test> options were
278        provided.  If neither, the user is prompted for an action.
279
280        Returns True to abort, False otherwise.
281        """
282        if not (self.opt_skip or self.skip(filename)):
283            errors = self.check(filename, regions)
284            if errors and not self.opt_ignore:
285                if self.opt_fix:
286                    self.fix(filename, regions)
287                else:
288                    result = self.ui.prompt("(a)bort, (i)gnore, or (f)ix?",
289                                            'aif', 'a')
290                    if result == 'f':
291                        self.fix(filename, regions)
292                    elif result == 'a':
293                        return True # abort
294
295        return False
296
297
298class Whitespace(Verifier):
299    """Check whitespace.
300
301    Specifically:
302    - No tabs used for indent
303    - No trailing whitespace
304    """
305
306    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
307    test_name = 'whitespace'
308    opt_name = 'white'
309
310    def check_line(self, line):
311        match = lead.search(line)
312        if match and match.group(1).find('\t') != -1:
313            return False
314
315        match = trail.search(line)
316        if match:
317            return False
318
319        return True
320
321    def fix_line(self, line):
322        if lead.search(line):
323            newline = ''
324            for i,c in enumerate(line):
325                if c == ' ':
326                    newline += ' '
327                elif c == '\t':
328                    newline += ' ' * (tabsize - len(newline) % tabsize)
329                else:
330                    newline += line[i:]
331                    break
332
333            line = newline
334
335        return line.rstrip() + '\n'
336
337
338class ControlSpace(Verifier):
339    """Check for exactly one space after if/while/for"""
340
341    languages = set(('C', 'C++'))
342    test_name = 'spacing after if/while/for'
343    opt_name = 'control'
344
345    def check_line(self, line):
346        match = any_control.search(line)
347        return not (match and match.group(2) != " ")
348
349    def fix_line(self, line):
350        new_line = any_control.sub(r'\1 (', line)
351        return new_line
352
353
354class SortedIncludes(Verifier):
355    """Check for proper sorting of include statements"""
356
357    languages = sort_includes.default_languages
358    test_name = 'include file order'
359    opt_name = 'include'
360
361    def __init__(self, *args, **kwargs):
362        super(SortedIncludes, self).__init__(*args, **kwargs)
363        self.sort_includes = sort_includes.SortIncludes()
364
365    def check(self, filename, regions=all_regions):
366        f = self.open(filename, 'r')
367
368        lines = [ l.rstrip('\n') for l in f.xreadlines() ]
369        old = ''.join(line + '\n' for line in lines)
370        f.close()
371
372        if len(lines) == 0:
373            return 0
374
375        language = lang_type(filename, lines[0])
376        sort_lines = list(self.sort_includes(lines, filename, language))
377        new = ''.join(line + '\n' for line in sort_lines)
378
379        mod = modified_regions(old, new)
380        modified = mod & regions
381
382        if modified:
383            self.write("invalid sorting of includes in %s\n" % (filename))
384            if self.ui.verbose:
385                for start, end in modified.regions:
386                    self.write("bad region [%d, %d)\n" % (start, end))
387            return 1
388
389        return 0
390
391    def fix(self, filename, regions=all_regions):
392        f = self.open(filename, 'r+')
393
394        old = f.readlines()
395        lines = [ l.rstrip('\n') for l in old ]
396        language = lang_type(filename, lines[0])
397        sort_lines = list(self.sort_includes(lines, filename, language))
398        new = ''.join(line + '\n' for line in sort_lines)
399
400        f.seek(0)
401        f.truncate()
402
403        for i,line in enumerate(sort_lines):
404            f.write(line)
405            f.write('\n')
406        f.close()
407
408
409def linelen(line):
410    tabs = line.count('\t')
411    if not tabs:
412        return len(line)
413
414    count = 0
415    for c in line:
416        if c == '\t':
417            count += tabsize - count % tabsize
418        else:
419            count += 1
420
421    return count
422
423class LineLength(Verifier):
424    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
425    test_name = 'line length'
426    opt_name = 'length'
427
428    def check_line(self, line):
429        return linelen(line) <= 78
430
431    def fix(self, filename, regions=all_regions):
432        self.write("Warning: cannot automatically fix overly long lines.\n")
433
434
435class BoolCompare(Verifier):
436    languages = set(('C', 'C++', 'python'))
437    test_name = 'boolean comparison'
438    opt_name = 'boolcomp'
439
440    regex = re.compile(r'\s*==\s*([Tt]rue|[Ff]alse)\b')
441
442    def check_line(self, line):
443        return self.regex.search(line) == None
444
445    def fix_line(self, line):
446        match = self.regex.search(line)
447        if match:
448            if match.group(1) in ('true', 'True'):
449                line = self.regex.sub('', line)
450            else:
451                self.write("Warning: cannot automatically fix "
452                           "comparisons with false/False.\n")
453        return line
454
455
456# list of all verifier classes
457all_verifiers = [
458    Whitespace,
459    ControlSpace,
460    LineLength,
461    BoolCompare,
462    SortedIncludes
463]
464
465class ValidationStats(object):
466    def __init__(self):
467        self.toolong = 0
468        self.toolong80 = 0
469        self.leadtabs = 0
470        self.trailwhite = 0
471        self.badcontrol = 0
472        self.cret = 0
473
474    def dump(self):
475        print '''\
476%d violations of lines over 79 chars. %d of which are 80 chars exactly.
477%d cases of whitespace at the end of a line.
478%d cases of tabs to indent.
479%d bad parens after if/while/for.
480%d carriage returns found.
481''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
482       self.badcontrol, self.cret)
483
484    def __nonzero__(self):
485        return self.toolong or self.toolong80 or self.leadtabs or \
486               self.trailwhite or self.badcontrol or self.cret
487
488def validate(filename, stats, verbose, exit_code):
489    lang = lang_type(filename)
490    if lang not in format_types:
491        return
492
493    def msg(lineno, line, message):
494        print '%s:%d>' % (filename, lineno + 1), message
495        if verbose > 2:
496            print line
497
498    def bad():
499        if exit_code is not None:
500            sys.exit(exit_code)
501
502    try:
503        f = file(filename, 'r')
504    except OSError:
505        if verbose > 0:
506            print 'could not open file %s' % filename
507        bad()
508        return
509
510    for i,line in enumerate(f):
511        line = line.rstrip('\n')
512
513        # no carriage returns
514        if line.find('\r') != -1:
515            self.cret += 1
516            if verbose > 1:
517                msg(i, line, 'carriage return found')
518            bad()
519
520        # lines max out at 79 chars
521        llen = linelen(line)
522        if llen > 79:
523            stats.toolong += 1
524            if llen == 80:
525                stats.toolong80 += 1
526            if verbose > 1:
527                msg(i, line, 'line too long (%d chars)' % llen)
528            bad()
529
530        # no tabs used to indent
531        match = lead.search(line)
532        if match and match.group(1).find('\t') != -1:
533            stats.leadtabs += 1
534            if verbose > 1:
535                msg(i, line, 'using tabs to indent')
536            bad()
537
538        # no trailing whitespace
539        if trail.search(line):
540            stats.trailwhite +=1
541            if verbose > 1:
542                msg(i, line, 'trailing whitespace')
543            bad()
544
545        # for c++, exactly one space betwen if/while/for and (
546        if lang == 'C++':
547            match = any_control.search(line)
548            if match and match.group(2) != " ":
549                stats.badcontrol += 1
550                if verbose > 1:
551                    msg(i, line, 'improper spacing after %s' % match.group(1))
552                bad()
553
554
555def _modified_regions(repo, patterns, **kwargs):
556    opt_all = kwargs.get('all', False)
557    opt_no_ignore = kwargs.get('no_ignore', False)
558
559    # Import the match (repository file name matching helper)
560    # function. Different versions of Mercurial keep it in different
561    # modules and implement them differently.
562    try:
563        from mercurial import scmutil
564        m = scmutil.match(repo[None], patterns, kwargs)
565    except ImportError:
566        from mercurial import cmdutil
567        m = cmdutil.match(repo, patterns, kwargs)
568
569    modified, added, removed, deleted, unknown, ignore, clean = \
570        repo.status(match=m, clean=opt_all)
571
572    if not opt_all:
573        try:
574            wctx = repo.workingctx()
575        except:
576            from mercurial import context
577            wctx = context.workingctx(repo)
578
579        files = [ (fn, all_regions) for fn in added ] + \
580            [ (fn,  modregions(wctx, fn)) for fn in modified ]
581    else:
582        files = [ (fn, all_regions) for fn in added + modified + clean ]
583
584    for fname, mod_regions in files:
585        if opt_no_ignore or not check_ignores(fname):
586            yield fname, mod_regions
587
588
589def do_check_style(hgui, repo, *pats, **opts):
590    """check files for proper m5 style guidelines
591
592    Without an argument, checks all modified and added files for gem5
593    coding style violations. A list of files can be specified to limit
594    the checker to a subset of the repository. The style rules are
595    normally applied on a diff of the repository state (i.e., added
596    files are checked in their entirety while only modifications of
597    modified files are checked).
598
599    The --all option can be specified to include clean files and check
600    modified files in their entirety.
601
602    The --fix-<check>, --ignore-<check>, and --skip-<check> options
603    can be used to control individual style checks:
604
605    --fix-<check> will perform the check and automatically attempt to
606      fix sny style error (printing a warning if unsuccessful)
607
608    --ignore-<check> will perform the check but ignore any errors
609      found (other than printing a message for each)
610
611    --skip-<check> will skip performing the check entirely
612
613    If none of these options are given, all checks will be performed
614    and the user will be prompted on how to handle each error.
615
616    --fix-all, --ignore-all, and --skip-all are equivalent to specifying
617    --fix-<check>, --ignore-<check>, or --skip-<check> for all checks,
618    respectively.  However, option settings for specific checks take
619    precedence.  Thus --skip-all --fix-white can be used to skip every
620    check other than whitespace errors, which will be checked and
621    automatically fixed.
622
623    The -v/--verbose flag will display the offending line(s) as well
624    as their location.
625    """
626
627    ui = MercurialUI(hgui, verbose=hgui.verbose)
628
629    # instantiate varifier objects
630    verifiers = [v(ui, repo, opts) for v in all_verifiers]
631
632    for fname, mod_regions in _modified_regions(repo, pats, **opts):
633        for verifier in verifiers:
634            if verifier.apply(fname, mod_regions):
635                return True
636
637    return False
638
639def do_check_format(hgui, repo, *pats, **opts):
640    """check files for gem5 code formatting violations
641
642    Without an argument, checks all modified and added files for gem5
643    code formatting violations. A list of files can be specified to
644    limit the checker to a subset of the repository. The style rules
645    are normally applied on a diff of the repository state (i.e.,
646    added files are checked in their entirety while only modifications
647    of modified files are checked).
648
649    The --all option can be specified to include clean files and check
650    modified files in their entirety.
651    """
652    ui = MercurialUI(hgui, hgui.verbose)
653
654    verbose = 0
655    for fname, mod_regions in _modified_regions(repo, pats, **opts):
656        stats = ValidationStats()
657        validate(joinpath(repo.root, fname), stats, verbose, None)
658        if stats:
659            print "%s:" % fname
660            stats.dump()
661            result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
662                               'ai', 'a')
663            if result == 'a':
664                return True
665
666    return False
667
668def check_hook(hooktype):
669    if hooktype not in ('pretxncommit', 'pre-qrefresh'):
670        raise AttributeError, \
671              "This hook is not meant for %s" % hooktype
672
673# This function provides a hook that is called before transaction
674# commit and on qrefresh
675def check_style(ui, repo, hooktype, **kwargs):
676    check_hook(hooktype)
677    args = {}
678
679    try:
680        return do_check_style(ui, repo, **args)
681    except Exception, e:
682        import traceback
683        traceback.print_exc()
684        return True
685
686def check_format(ui, repo, hooktype, **kwargs):
687    check_hook(hooktype)
688    args = {}
689
690    try:
691        return do_check_format(ui, repo, **args)
692    except Exception, e:
693        import traceback
694        traceback.print_exc()
695        return True
696
697try:
698    from mercurial.i18n import _
699except ImportError:
700    def _(arg):
701        return arg
702
703_common_region_options = [
704    ('a', 'all', False,
705     _("include clean files and unmodified parts of modified files")),
706    ('', 'no-ignore', False, _("ignore the style ignore list")),
707    ]
708
709
710fix_opts = [('f', 'fix-all', False, _("fix all style errors"))] + \
711           [('', 'fix-' + v.opt_name, False,
712             _('fix errors in ' + v.test_name)) for v in all_verifiers]
713ignore_opts = [('', 'ignore-all', False, _("ignore all style errors"))] + \
714              [('', 'ignore-' + v.opt_name, False,
715                _('ignore errors in ' + v.test_name)) for v in all_verifiers]
716skip_opts = [('', 'skip-all', False, _("skip all style error checks"))] + \
717            [('', 'skip-' + v.opt_name, False,
718              _('skip checking for ' + v.test_name)) for v in all_verifiers]
719all_opts = fix_opts + ignore_opts + skip_opts
720
721
722cmdtable = {
723    '^m5style' : (
724        do_check_style, all_opts + _common_region_options + commands.walkopts,
725        _('hg m5style [-a] [FILE]...')),
726    '^m5format' :
727    ( do_check_format, [
728            ] + _common_region_options + commands.walkopts,
729      _('hg m5format [FILE]...')),
730}
731
732if __name__ == '__main__':
733    import argparse
734
735    parser = argparse.ArgumentParser(
736        description="Check a file for style violations")
737
738    parser.add_argument("--verbose", "-v", action="count",
739                        help="Produce verbose output")
740
741    parser.add_argument("file", metavar="FILE", nargs="+",
742                        type=str,
743                        help="Source file to inspect")
744
745    args = parser.parse_args()
746
747    stats = ValidationStats()
748    for filename in args.file:
749        validate(filename, stats=stats, verbose=args.verbose, exit_code=1)
750
751        if args.verbose > 0:
752            stats.dump()
753