style.py revision 11319:7ca84595249c
1#! /usr/bin/env python
2# Copyright (c) 2014 ARM Limited
3# All rights reserved
4#
5# The license below extends only to copyright in the software and shall
6# not be construed as granting a license to any other intellectual
7# property including but not limited to intellectual property relating
8# to a hardware implementation of the functionality of the software
9# licensed hereunder.  You may use the software subject to the license
10# terms below provided that you ensure that this notice is replicated
11# unmodified and in its entirety in all distributions of the software,
12# modified or unmodified, in source code or in binary form.
13#
14# Copyright (c) 2006 The Regents of The University of Michigan
15# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
16# Copyright (c) 2016 Advanced Micro Devices, Inc.
17# All rights reserved.
18#
19# Redistribution and use in source and binary forms, with or without
20# modification, are permitted provided that the following conditions are
21# met: redistributions of source code must retain the above copyright
22# notice, this list of conditions and the following disclaimer;
23# redistributions in binary form must reproduce the above copyright
24# notice, this list of conditions and the following disclaimer in the
25# documentation and/or other materials provided with the distribution;
26# neither the name of the copyright holders nor the names of its
27# contributors may be used to endorse or promote products derived from
28# this software without specific prior written permission.
29#
30# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
34# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41#
42# Authors: Nathan Binkert
43#          Steve Reinhardt
44
45import heapq
46import os
47import re
48import sys
49
50from os.path import dirname, join as joinpath
51from itertools import count
52from mercurial import bdiff, mdiff, commands
53
54current_dir = dirname(__file__)
55sys.path.insert(0, current_dir)
56sys.path.insert(1, joinpath(dirname(current_dir), 'src', 'python'))
57
58from m5.util import neg_inf, pos_inf, Region, Regions
59import sort_includes
60from file_types import lang_type
61
62all_regions = Regions(Region(neg_inf, pos_inf))
63
64tabsize = 8
65lead = re.compile(r'^([ \t]+)')
66trail = re.compile(r'([ \t]+)$')
67any_control = re.compile(r'\b(if|while|for)([ \t]*)\(')
68
69format_types = set(('C', 'C++'))
70
71
72def re_ignore(expr):
73    """Helper function to create regular expression ignore file
74    matcher functions"""
75
76    rex = re.compile(expr)
77    def match_re(fname):
78        return rex.match(fname)
79    return match_re
80
81# This list contains a list of functions that are called to determine
82# if a file should be excluded from the style matching rules or
83# not. The functions are called with the file name relative to the
84# repository root (without a leading slash) as their argument. A file
85# is excluded if any function in the list returns true.
86style_ignores = [
87    # Ignore external projects as they are unlikely to follow the gem5
88    # coding convention.
89    re_ignore("^ext/"),
90]
91
92def check_ignores(fname):
93    """Check if a file name matches any of the ignore rules"""
94
95    for rule in style_ignores:
96        if rule(fname):
97            return True
98
99    return False
100
101
102def modified_regions(old_data, new_data):
103    regions = Regions()
104    beg = None
105    for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
106        if beg is not None and beg != fbeg:
107            regions.append(beg, fbeg)
108        beg = fend
109    return regions
110
111def modregions(wctx, fname):
112    fctx = wctx.filectx(fname)
113    pctx = fctx.parents()
114
115    file_data = fctx.data()
116    lines = mdiff.splitnewlines(file_data)
117    if len(pctx) in (1, 2):
118        mod_regions = modified_regions(pctx[0].data(), file_data)
119        if len(pctx) == 2:
120            m2 = modified_regions(pctx[1].data(), file_data)
121            # only the lines that are new in both
122            mod_regions &= m2
123    else:
124        mod_regions = Regions()
125        mod_regions.append(0, len(lines))
126
127    return mod_regions
128
129class UserInterface(object):
130    def __init__(self, verbose=False):
131        self.verbose = verbose
132
133    def prompt(self, prompt, results, default):
134        while True:
135            result = self.do_prompt(prompt, results, default)
136            if result in results:
137                return result
138
139class MercurialUI(UserInterface):
140    def __init__(self, ui, *args, **kwargs):
141        super(MercurialUI, self).__init__(*args, **kwargs)
142        self.ui = ui
143
144    def do_prompt(self, prompt, results, default):
145        return self.ui.prompt(prompt, default=default)
146
147    def write(self, string):
148        self.ui.write(string)
149
150class StdioUI(UserInterface):
151    def do_prompt(self, prompt, results, default):
152        return raw_input(prompt) or default
153
154    def write(self, string):
155        sys.stdout.write(string)
156
157
158class Verifier(object):
159    """Base class for style verifier objects
160
161    Subclasses must define these class attributes:
162      languages = set of strings identifying applicable languages
163      test_name = long descriptive name of test, will be used in
164                  messages such as "error in <foo>" or "invalid <foo>"
165      opt_name = short name used to generate command-line options to
166                 control the test (--fix-<foo>, --ignore-<foo>, etc.)
167    """
168
169    def __init__(self, ui, repo, opts):
170        self.ui = ui
171        self.repo = repo
172        # opt_name must be defined as a class attribute of derived classes.
173        # Check test-specific opts first as these have precedence.
174        self.opt_fix = opts.get('fix_' + self.opt_name, False)
175        self.opt_ignore = opts.get('ignore_' + self.opt_name, False)
176        self.opt_skip = opts.get('skip_' + self.opt_name, False)
177        # If no test-specific opts were set, then set based on "-all" opts.
178        if not (self.opt_fix or self.opt_ignore or self.opt_skip):
179            self.opt_fix = opts.get('fix_all', False)
180            self.opt_ignore = opts.get('ignore_all', False)
181            self.opt_skip = opts.get('skip_all', False)
182
183    def __getattr__(self, attr):
184        if attr in ('prompt', 'write'):
185            return getattr(self.ui, attr)
186
187        if attr == 'wctx':
188            try:
189                wctx = repo.workingctx()
190            except:
191                from mercurial import context
192                wctx = context.workingctx(repo)
193            self.wctx = wctx
194            return wctx
195
196        raise AttributeError
197
198    def open(self, filename, mode):
199        filename = self.repo.wjoin(filename)
200
201        try:
202            f = file(filename, mode)
203        except OSError, msg:
204            print 'could not open file %s: %s' % (filename, msg)
205            return None
206
207        return f
208
209    def skip(self, filename):
210        filename = self.repo.wjoin(filename)
211
212        # We never want to handle symlinks, so always skip them: If the location
213        # pointed to is a directory, skip it. If the location is a file inside
214        # the gem5 directory, it will be checked as a file, so symlink can be
215        # skipped. If the location is a file outside gem5, we don't want to
216        # check it anyway.
217        if os.path.islink(filename):
218            return True
219        return lang_type(filename) not in self.languages
220
221    def check(self, filename, regions=all_regions):
222        """Check specified regions of file 'filename'.
223
224        Line-by-line checks can simply provide a check_line() method
225        that returns True if the line is OK and False if it has an
226        error.  Verifiers that need a multi-line view (like
227        SortedIncludes) must override this entire function.
228
229        Returns a count of errors (0 if none), though actual non-zero
230        count value is not currently used anywhere.
231        """
232
233        f = self.open(filename, 'r')
234
235        errors = 0
236        for num,line in enumerate(f):
237            if num not in regions:
238                continue
239            if not self.check_line(line):
240                self.write("invalid %s in %s:%d\n" % \
241                           (self.test_name, filename, num + 1))
242                if self.ui.verbose:
243                    self.write(">>%s<<\n" % line[:-1])
244                errors += 1
245        return errors
246
247    def fix(self, filename, regions=all_regions):
248        """Fix specified regions of file 'filename'.
249
250        Line-by-line fixes can simply provide a fix_line() method that
251        returns the fixed line. Verifiers that need a multi-line view
252        (like SortedIncludes) must override this entire function.
253        """
254
255        f = self.open(filename, 'r+')
256
257        lines = list(f)
258
259        f.seek(0)
260        f.truncate()
261
262        for i,line in enumerate(lines):
263            if i in regions:
264                line = self.fix_line(line)
265
266            f.write(line)
267        f.close()
268
269
270    def apply(self, filename, regions=all_regions):
271        """Possibly apply to specified regions of file 'filename'.
272
273        Verifier is skipped if --skip-<test> option was provided or if
274        file is not of an applicable type.  Otherwise file is checked
275        and error messages printed.  Errors are fixed or ignored if
276        the corresponding --fix-<test> or --ignore-<test> options were
277        provided.  If neither, the user is prompted for an action.
278
279        Returns True to abort, False otherwise.
280        """
281        if not (self.opt_skip or self.skip(filename)):
282            errors = self.check(filename, regions)
283            if errors and not self.opt_ignore:
284                if self.opt_fix:
285                    self.fix(filename, regions)
286                else:
287                    result = self.ui.prompt("(a)bort, (i)gnore, or (f)ix?",
288                                            'aif', 'a')
289                    if result == 'f':
290                        self.fix(filename, regions)
291                    elif result == 'a':
292                        return True # abort
293
294        return False
295
296
297class Whitespace(Verifier):
298    """Check whitespace.
299
300    Specifically:
301    - No tabs used for indent
302    - No trailing whitespace
303    """
304
305    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
306    test_name = 'whitespace'
307    opt_name = 'white'
308
309    def check_line(self, line):
310        match = lead.search(line)
311        if match and match.group(1).find('\t') != -1:
312            return False
313
314        match = trail.search(line)
315        if match:
316            return False
317
318        return True
319
320    def fix_line(self, line):
321        if lead.search(line):
322            newline = ''
323            for i,c in enumerate(line):
324                if c == ' ':
325                    newline += ' '
326                elif c == '\t':
327                    newline += ' ' * (tabsize - len(newline) % tabsize)
328                else:
329                    newline += line[i:]
330                    break
331
332            line = newline
333
334        return line.rstrip() + '\n'
335
336
337class ControlSpace(Verifier):
338    """Check for exactly one space after if/while/for"""
339
340    languages = set(('C', 'C++'))
341    test_name = 'spacing after if/while/for'
342    opt_name = 'control'
343
344    def check_line(self, line):
345        match = any_control.search(line)
346        return not (match and match.group(2) != " ")
347
348    def fix_line(self, line):
349        new_line = any_control.sub(r'\1 (', line)
350        return new_line
351
352
353class SortedIncludes(Verifier):
354    """Check for proper sorting of include statements"""
355
356    languages = sort_includes.default_languages
357    test_name = 'include file order'
358    opt_name = 'include'
359
360    def __init__(self, *args, **kwargs):
361        super(SortedIncludes, self).__init__(*args, **kwargs)
362        self.sort_includes = sort_includes.SortIncludes()
363
364    def check(self, filename, regions=all_regions):
365        f = self.open(filename, 'r')
366
367        lines = [ l.rstrip('\n') for l in f.xreadlines() ]
368        old = ''.join(line + '\n' for line in lines)
369        f.close()
370
371        if len(lines) == 0:
372            return 0
373
374        language = lang_type(filename, lines[0])
375        sort_lines = list(self.sort_includes(lines, filename, language))
376        new = ''.join(line + '\n' for line in sort_lines)
377
378        mod = modified_regions(old, new)
379        modified = mod & regions
380
381        if modified:
382            self.write("invalid sorting of includes in %s\n" % (filename))
383            if self.ui.verbose:
384                for start, end in modified.regions:
385                    self.write("bad region [%d, %d)\n" % (start, end))
386            return 1
387
388        return 0
389
390    def fix(self, filename, regions=all_regions):
391        f = self.open(filename, 'r+')
392
393        old = f.readlines()
394        lines = [ l.rstrip('\n') for l in old ]
395        language = lang_type(filename, lines[0])
396        sort_lines = list(self.sort_includes(lines, filename, language))
397        new = ''.join(line + '\n' for line in sort_lines)
398
399        f.seek(0)
400        f.truncate()
401
402        for i,line in enumerate(sort_lines):
403            f.write(line)
404            f.write('\n')
405        f.close()
406
407# list of all verifier classes
408all_verifiers = [
409    Whitespace,
410    ControlSpace,
411    SortedIncludes
412]
413
414def linelen(line):
415    tabs = line.count('\t')
416    if not tabs:
417        return len(line)
418
419    count = 0
420    for c in line:
421        if c == '\t':
422            count += tabsize - count % tabsize
423        else:
424            count += 1
425
426    return count
427
428class ValidationStats(object):
429    def __init__(self):
430        self.toolong = 0
431        self.toolong80 = 0
432        self.leadtabs = 0
433        self.trailwhite = 0
434        self.badcontrol = 0
435        self.cret = 0
436
437    def dump(self):
438        print '''\
439%d violations of lines over 79 chars. %d of which are 80 chars exactly.
440%d cases of whitespace at the end of a line.
441%d cases of tabs to indent.
442%d bad parens after if/while/for.
443%d carriage returns found.
444''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
445       self.badcontrol, self.cret)
446
447    def __nonzero__(self):
448        return self.toolong or self.toolong80 or self.leadtabs or \
449               self.trailwhite or self.badcontrol or self.cret
450
451def validate(filename, stats, verbose, exit_code):
452    lang = lang_type(filename)
453    if lang not in format_types:
454        return
455
456    def msg(lineno, line, message):
457        print '%s:%d>' % (filename, lineno + 1), message
458        if verbose > 2:
459            print line
460
461    def bad():
462        if exit_code is not None:
463            sys.exit(exit_code)
464
465    try:
466        f = file(filename, 'r')
467    except OSError:
468        if verbose > 0:
469            print 'could not open file %s' % filename
470        bad()
471        return
472
473    for i,line in enumerate(f):
474        line = line.rstrip('\n')
475
476        # no carriage returns
477        if line.find('\r') != -1:
478            self.cret += 1
479            if verbose > 1:
480                msg(i, line, 'carriage return found')
481            bad()
482
483        # lines max out at 79 chars
484        llen = linelen(line)
485        if llen > 79:
486            stats.toolong += 1
487            if llen == 80:
488                stats.toolong80 += 1
489            if verbose > 1:
490                msg(i, line, 'line too long (%d chars)' % llen)
491            bad()
492
493        # no tabs used to indent
494        match = lead.search(line)
495        if match and match.group(1).find('\t') != -1:
496            stats.leadtabs += 1
497            if verbose > 1:
498                msg(i, line, 'using tabs to indent')
499            bad()
500
501        # no trailing whitespace
502        if trail.search(line):
503            stats.trailwhite +=1
504            if verbose > 1:
505                msg(i, line, 'trailing whitespace')
506            bad()
507
508        # for c++, exactly one space betwen if/while/for and (
509        if lang == 'C++':
510            match = any_control.search(line)
511            if match and match.group(2) != " ":
512                stats.badcontrol += 1
513                if verbose > 1:
514                    msg(i, line, 'improper spacing after %s' % match.group(1))
515                bad()
516
517
518def _modified_regions(repo, patterns, **kwargs):
519    opt_all = kwargs.get('all', False)
520    opt_no_ignore = kwargs.get('no_ignore', False)
521
522    # Import the match (repository file name matching helper)
523    # function. Different versions of Mercurial keep it in different
524    # modules and implement them differently.
525    try:
526        from mercurial import scmutil
527        m = scmutil.match(repo[None], patterns, kwargs)
528    except ImportError:
529        from mercurial import cmdutil
530        m = cmdutil.match(repo, patterns, kwargs)
531
532    modified, added, removed, deleted, unknown, ignore, clean = \
533        repo.status(match=m, clean=opt_all)
534
535    if not opt_all:
536        try:
537            wctx = repo.workingctx()
538        except:
539            from mercurial import context
540            wctx = context.workingctx(repo)
541
542        files = [ (fn, all_regions) for fn in added ] + \
543            [ (fn,  modregions(wctx, fn)) for fn in modified ]
544    else:
545        files = [ (fn, all_regions) for fn in added + modified + clean ]
546
547    for fname, mod_regions in files:
548        if opt_no_ignore or not check_ignores(fname):
549            yield fname, mod_regions
550
551
552def do_check_style(hgui, repo, *pats, **opts):
553    """check files for proper m5 style guidelines
554
555    Without an argument, checks all modified and added files for gem5
556    coding style violations. A list of files can be specified to limit
557    the checker to a subset of the repository. The style rules are
558    normally applied on a diff of the repository state (i.e., added
559    files are checked in their entirety while only modifications of
560    modified files are checked).
561
562    The --all option can be specified to include clean files and check
563    modified files in their entirety.
564
565    The --fix-<check>, --ignore-<check>, and --skip-<check> options
566    can be used to control individual style checks:
567
568    --fix-<check> will perform the check and automatically attempt to
569      fix sny style error (printing a warning if unsuccessful)
570
571    --ignore-<check> will perform the check but ignore any errors
572      found (other than printing a message for each)
573
574    --skip-<check> will skip performing the check entirely
575
576    If none of these options are given, all checks will be performed
577    and the user will be prompted on how to handle each error.
578
579    --fix-all, --ignore-all, and --skip-all are equivalent to specifying
580    --fix-<check>, --ignore-<check>, or --skip-<check> for all checks,
581    respectively.  However, option settings for specific checks take
582    precedence.  Thus --skip-all --fix-white can be used to skip every
583    check other than whitespace errors, which will be checked and
584    automatically fixed.
585
586    The -v/--verbose flag will display the offending line(s) as well
587    as their location.
588    """
589
590    ui = MercurialUI(hgui, verbose=hgui.verbose)
591
592    # instantiate varifier objects
593    verifiers = [v(ui, repo, opts) for v in all_verifiers]
594
595    for fname, mod_regions in _modified_regions(repo, pats, **opts):
596        for verifier in verifiers:
597            if verifier.apply(fname, mod_regions):
598                return True
599
600    return False
601
602def do_check_format(hgui, repo, *pats, **opts):
603    """check files for gem5 code formatting violations
604
605    Without an argument, checks all modified and added files for gem5
606    code formatting violations. A list of files can be specified to
607    limit the checker to a subset of the repository. The style rules
608    are normally applied on a diff of the repository state (i.e.,
609    added files are checked in their entirety while only modifications
610    of modified files are checked).
611
612    The --all option can be specified to include clean files and check
613    modified files in their entirety.
614    """
615    ui = MercurialUI(hgui, hgui.verbose)
616
617    verbose = 0
618    for fname, mod_regions in _modified_regions(repo, pats, **opts):
619        stats = ValidationStats()
620        validate(joinpath(repo.root, fname), stats, verbose, None)
621        if stats:
622            print "%s:" % fname
623            stats.dump()
624            result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
625                               'ai', 'a')
626            if result == 'a':
627                return True
628
629    return False
630
631def check_hook(hooktype):
632    if hooktype not in ('pretxncommit', 'pre-qrefresh'):
633        raise AttributeError, \
634              "This hook is not meant for %s" % hooktype
635
636# This function provides a hook that is called before transaction
637# commit and on qrefresh
638def check_style(ui, repo, hooktype, **kwargs):
639    check_hook(hooktype)
640    args = {}
641
642    try:
643        return do_check_style(ui, repo, **args)
644    except Exception, e:
645        import traceback
646        traceback.print_exc()
647        return True
648
649def check_format(ui, repo, hooktype, **kwargs):
650    check_hook(hooktype)
651    args = {}
652
653    try:
654        return do_check_format(ui, repo, **args)
655    except Exception, e:
656        import traceback
657        traceback.print_exc()
658        return True
659
660try:
661    from mercurial.i18n import _
662except ImportError:
663    def _(arg):
664        return arg
665
666_common_region_options = [
667    ('a', 'all', False,
668     _("include clean files and unmodified parts of modified files")),
669    ('', 'no-ignore', False, _("ignore the style ignore list")),
670    ]
671
672
673fix_opts = [('f', 'fix-all', False, _("fix all style errors"))] + \
674           [('', 'fix-' + v.opt_name, False,
675             _('fix errors in ' + v.test_name)) for v in all_verifiers]
676ignore_opts = [('', 'ignore-all', False, _("ignore all style errors"))] + \
677              [('', 'ignore-' + v.opt_name, False,
678                _('ignore errors in ' + v.test_name)) for v in all_verifiers]
679skip_opts = [('', 'skip-all', False, _("skip all style error checks"))] + \
680            [('', 'skip-' + v.opt_name, False,
681              _('skip checking for ' + v.test_name)) for v in all_verifiers]
682all_opts = fix_opts + ignore_opts + skip_opts
683
684
685cmdtable = {
686    '^m5style' : (
687        do_check_style, all_opts + _common_region_options + commands.walkopts,
688        _('hg m5style [-a] [FILE]...')),
689    '^m5format' :
690    ( do_check_format, [
691            ] + _common_region_options + commands.walkopts,
692      _('hg m5format [FILE]...')),
693}
694
695if __name__ == '__main__':
696    import getopt
697
698    progname = sys.argv[0]
699    if len(sys.argv) < 2:
700        sys.exit('usage: %s <command> [<command args>]' % progname)
701
702    fixwhite_usage = '%s fixwhite [-t <tabsize> ] <path> [...] \n' % progname
703    chkformat_usage = '%s chkformat <path> [...] \n' % progname
704    chkwhite_usage = '%s chkwhite <path> [...] \n' % progname
705
706    command = sys.argv[1]
707    if command == 'fixwhite':
708        flags = 't:'
709        usage = fixwhite_usage
710    elif command == 'chkwhite':
711        flags = 'nv'
712        usage = chkwhite_usage
713    elif command == 'chkformat':
714        flags = 'nv'
715        usage = chkformat_usage
716    else:
717        sys.exit(fixwhite_usage + chkwhite_usage + chkformat_usage)
718
719    opts, args = getopt.getopt(sys.argv[2:], flags)
720
721    code = 1
722    verbose = 1
723    for opt,arg in opts:
724        if opt == '-n':
725            code = None
726        if opt == '-t':
727            tabsize = int(arg)
728        if opt == '-v':
729            verbose += 1
730
731    if command == 'fixwhite':
732        for filename in args:
733            fixwhite(filename, tabsize)
734    elif command == 'chkwhite':
735        for filename in args:
736            for line,num in checkwhite(filename):
737                print 'invalid whitespace: %s:%d' % (filename, num)
738                if verbose:
739                    print '>>%s<<' % line[:-1]
740    elif command == 'chkformat':
741        stats = ValidationStats()
742        for filename in args:
743            validate(filename, stats=stats, verbose=verbose, exit_code=code)
744
745        if verbose > 0:
746            stats.dump()
747    else:
748        sys.exit("command '%s' not found" % command)
749