gem5/util/hgstyle.py

#! /usr/bin/env python
# Copyright (c) 2014 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder.  You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Copyright (c) 2006 The Regents of The University of Michigan
# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
# Copyright (c) 2016 Advanced Micro Devices, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Nathan Binkert
#          Steve Reinhardt

import heapq
import os
import re
import sys

from os.path import dirname, join as joinpath
from itertools import count
from mercurial import bdiff, mdiff, commands

current_dir = dirname(__file__)
sys.path.insert(0, current_dir)
sys.path.insert(1, joinpath(dirname(current_dir), 'src', 'python'))

from m5.util import neg_inf, pos_inf, Region, Regions
import sort_includes
from file_types import lang_type

all_regions = Regions(Region(neg_inf, pos_inf))

tabsize = 8
lead = re.compile(r'^([ \t]+)')
trail = re.compile(r'([ \t]+)$')
any_control = re.compile(r'\b(if|while|for)([ \t]*)\(')

format_types = set(('C', 'C++'))


def re_ignore(expr):
    """Helper function to create regular expression ignore file
    matcher functions"""

    rex = re.compile(expr)
    def match_re(fname):
        return rex.match(fname)
    return match_re

# This list contains a list of functions that are called to determine
# if a file should be excluded from the style matching rules or
# not. The functions are called with the file name relative to the
# repository root (without a leading slash) as their argument. A file
# is excluded if any function in the list returns true.
style_ignores = [
    # Ignore external projects as they are unlikely to follow the gem5
    # coding convention.
    re_ignore("^ext/"),
]

def check_ignores(fname):
    """Check if a file name matches any of the ignore rules"""

    for rule in style_ignores:
        if rule(fname):
            return True

    return False


def modified_regions(old_data, new_data):
    regions = Regions()
    beg = None
    for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
        if beg is not None and beg != fbeg:
            regions.append(beg, fbeg)
        beg = fend
    return regions

def modregions(wctx, fname):
    fctx = wctx.filectx(fname)
    pctx = fctx.parents()

    file_data = fctx.data()
    lines = mdiff.splitnewlines(file_data)
    if len(pctx) in (1, 2):
        mod_regions = modified_regions(pctx[0].data(), file_data)
        if len(pctx) == 2:
            m2 = modified_regions(pctx[1].data(), file_data)
            # only the lines that are new in both
            mod_regions &= m2
    else:
        mod_regions = Regions()
        mod_regions.append(0, len(lines))

    return mod_regions

class UserInterface(object):
    def __init__(self, verbose=False):
        self.verbose = verbose

    def prompt(self, prompt, results, default):
        while True:
            result = self.do_prompt(prompt, results, default)
            if result in results:
                return result

class MercurialUI(UserInterface):
    def __init__(self, ui, *args, **kwargs):
        super(MercurialUI, self).__init__(*args, **kwargs)
        self.ui = ui

    def do_prompt(self, prompt, results, default):
        return self.ui.prompt(prompt, default=default)

    def write(self, string):
        self.ui.write(string)

class StdioUI(UserInterface):
    def do_prompt(self, prompt, results, default):
        return raw_input(prompt) or default

    def write(self, string):
        sys.stdout.write(string)


class Verifier(object):
    """Base class for style verifier objects

    Subclasses must define these class attributes:
      languages = set of strings identifying applicable languages
      test_name = long descriptive name of test, will be used in
                  messages such as "error in <foo>" or "invalid <foo>"
      opt_name = short name used to generate command-line options to
                 control the test (--fix-<foo>, --ignore-<foo>, etc.)
    """

    def __init__(self, ui, repo, opts):
        self.ui = ui
        self.repo = repo
        # opt_name must be defined as a class attribute of derived classes.
        # Check test-specific opts first as these have precedence.
        self.opt_fix = opts.get('fix_' + self.opt_name, False)
        self.opt_ignore = opts.get('ignore_' + self.opt_name, False)
        self.opt_skip = opts.get('skip_' + self.opt_name, False)
        # If no test-specific opts were set, then set based on "-all" opts.
        if not (self.opt_fix or self.opt_ignore or self.opt_skip):
            self.opt_fix = opts.get('fix_all', False)
            self.opt_ignore = opts.get('ignore_all', False)
            self.opt_skip = opts.get('skip_all', False)

    def __getattr__(self, attr):
        if attr in ('prompt', 'write'):
            return getattr(self.ui, attr)

        if attr == 'wctx':
            try:
                wctx = repo.workingctx()
            except:
                from mercurial import context
                wctx = context.workingctx(repo)
            self.wctx = wctx
            return wctx

        raise AttributeError

    def open(self, filename, mode):
        filename = self.repo.wjoin(filename)

        try:
            f = file(filename, mode)
        except OSError, msg:
            print 'could not open file %s: %s' % (filename, msg)
            return None

        return f

    def skip(self, filename):
        filename = self.repo.wjoin(filename)

        # We never want to handle symlinks, so always skip them: If the location
        # pointed to is a directory, skip it. If the location is a file inside
        # the gem5 directory, it will be checked as a file, so symlink can be
        # skipped. If the location is a file outside gem5, we don't want to
        # check it anyway.
        if os.path.islink(filename):
            return True
        return lang_type(filename) not in self.languages

    def check(self, filename, regions=all_regions):
        """Check specified regions of file 'filename'.

        Line-by-line checks can simply provide a check_line() method
        that returns True if the line is OK and False if it has an
        error.  Verifiers that need a multi-line view (like
        SortedIncludes) must override this entire function.

        Returns a count of errors (0 if none), though actual non-zero
        count value is not currently used anywhere.
        """

        f = self.open(filename, 'r')

        errors = 0
        for num,line in enumerate(f):
            if num not in regions:
                continue
            line = line.rstrip('\n')
            if not self.check_line(line):
                self.write("invalid %s in %s:%d\n" % \
                           (self.test_name, filename, num + 1))
                if self.ui.verbose:
                    self.write(">>%s<<\n" % line[:-1])
                errors += 1
        return errors

    def fix(self, filename, regions=all_regions):
        """Fix specified regions of file 'filename'.

        Line-by-line fixes can simply provide a fix_line() method that
        returns the fixed line. Verifiers that need a multi-line view
        (like SortedIncludes) must override this entire function.
        """

        f = self.open(filename, 'r+')

        lines = list(f)

        f.seek(0)
        f.truncate()

        for i,line in enumerate(lines):
            if i in regions:
                line = self.fix_line(line)

            f.write(line)
        f.close()


    def apply(self, filename, regions=all_regions):
        """Possibly apply to specified regions of file 'filename'.

        Verifier is skipped if --skip-<test> option was provided or if
        file is not of an applicable type.  Otherwise file is checked
        and error messages printed.  Errors are fixed or ignored if
        the corresponding --fix-<test> or --ignore-<test> options were
        provided.  If neither, the user is prompted for an action.

        Returns True to abort, False otherwise.
        """
        if not (self.opt_skip or self.skip(filename)):
            errors = self.check(filename, regions)
            if errors and not self.opt_ignore:
                if self.opt_fix:
                    self.fix(filename, regions)
                else:
                    result = self.ui.prompt("(a)bort, (i)gnore, or (f)ix?",
                                            'aif', 'a')
                    if result == 'f':
                        self.fix(filename, regions)
                    elif result == 'a':
                        return True # abort

        return False


class Whitespace(Verifier):
    """Check whitespace.

    Specifically:
    - No tabs used for indent
    - No trailing whitespace
    """

    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
    test_name = 'whitespace'
    opt_name = 'white'

    def check_line(self, line):
        match = lead.search(line)
        if match and match.group(1).find('\t') != -1:
            return False

        match = trail.search(line)
        if match:
            return False

        return True

    def fix_line(self, line):
        if lead.search(line):
            newline = ''
            for i,c in enumerate(line):
                if c == ' ':
                    newline += ' '
                elif c == '\t':
                    newline += ' ' * (tabsize - len(newline) % tabsize)
                else:
                    newline += line[i:]
                    break

            line = newline

        return line.rstrip() + '\n'


class ControlSpace(Verifier):
    """Check for exactly one space after if/while/for"""

    languages = set(('C', 'C++'))
    test_name = 'spacing after if/while/for'
    opt_name = 'control'

    def check_line(self, line):
        match = any_control.search(line)
        return not (match and match.group(2) != " ")

    def fix_line(self, line):
        new_line = any_control.sub(r'\1 (', line)
        return new_line


class SortedIncludes(Verifier):
    """Check for proper sorting of include statements"""

    languages = sort_includes.default_languages
    test_name = 'include file order'
    opt_name = 'include'

    def __init__(self, *args, **kwargs):
        super(SortedIncludes, self).__init__(*args, **kwargs)
        self.sort_includes = sort_includes.SortIncludes()

    def check(self, filename, regions=all_regions):
        f = self.open(filename, 'r')

        lines = [ l.rstrip('\n') for l in f.xreadlines() ]
        old = ''.join(line + '\n' for line in lines)
        f.close()

        if len(lines) == 0:
            return 0

        language = lang_type(filename, lines[0])
        sort_lines = list(self.sort_includes(lines, filename, language))
        new = ''.join(line + '\n' for line in sort_lines)

        mod = modified_regions(old, new)
        modified = mod & regions

        if modified:
            self.write("invalid sorting of includes in %s\n" % (filename))
            if self.ui.verbose:
                for start, end in modified.regions:
                    self.write("bad region [%d, %d)\n" % (start, end))
            return 1

        return 0

    def fix(self, filename, regions=all_regions):
        f = self.open(filename, 'r+')

        old = f.readlines()
        lines = [ l.rstrip('\n') for l in old ]
        language = lang_type(filename, lines[0])
        sort_lines = list(self.sort_includes(lines, filename, language))
        new = ''.join(line + '\n' for line in sort_lines)

        f.seek(0)
        f.truncate()

        for i,line in enumerate(sort_lines):
            f.write(line)
            f.write('\n')
        f.close()


def linelen(line):
    tabs = line.count('\t')
    if not tabs:
        return len(line)

    count = 0
    for c in line:
        if c == '\t':
            count += tabsize - count % tabsize
        else:
            count += 1

    return count

class LineLength(Verifier):
    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
    test_name = 'line length'
    opt_name = 'length'

    def check_line(self, line):
        return linelen(line) <= 78

    def fix(self, filename, regions=all_regions):
        self.write("Warning: cannot automatically fix overly long lines.\n")


class BoolCompare(Verifier):
    languages = set(('C', 'C++', 'python'))
    test_name = 'boolean comparison'
    opt_name = 'boolcomp'

    regex = re.compile(r'\s*==\s*([Tt]rue|[Ff]alse)\b')

    def check_line(self, line):
        return self.regex.search(line) == None

    def fix_line(self, line):
        match = self.regex.search(line)
        if match:
            if match.group(1) in ('true', 'True'):
                line = self.regex.sub('', line)
            else:
                self.write("Warning: cannot automatically fix "
                           "comparisons with false/False.\n")
        return line


# list of all verifier classes
all_verifiers = [
    Whitespace,
    ControlSpace,
    LineLength,
    BoolCompare,
    SortedIncludes
]

class ValidationStats(object):
    def __init__(self):
        self.toolong = 0
        self.toolong80 = 0
        self.leadtabs = 0
        self.trailwhite = 0
        self.badcontrol = 0
        self.cret = 0

    def dump(self):
        print '''\
%d violations of lines over 79 chars. %d of which are 80 chars exactly.
%d cases of whitespace at the end of a line.
%d cases of tabs to indent.
%d bad parens after if/while/for.
%d carriage returns found.
''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
       self.badcontrol, self.cret)

    def __nonzero__(self):
        return self.toolong or self.toolong80 or self.leadtabs or \
               self.trailwhite or self.badcontrol or self.cret

def validate(filename, stats, verbose, exit_code):
    lang = lang_type(filename)
    if lang not in format_types:
        return

    def msg(lineno, line, message):
        print '%s:%d>' % (filename, lineno + 1), message
        if verbose > 2:
            print line

    def bad():
        if exit_code is not None:
            sys.exit(exit_code)

    try:
        f = file(filename, 'r')
    except OSError:
        if verbose > 0:
            print 'could not open file %s' % filename
        bad()
        return

    for i,line in enumerate(f):
        line = line.rstrip('\n')

        # no carriage returns
        if line.find('\r') != -1:
            self.cret += 1
            if verbose > 1:
                msg(i, line, 'carriage return found')
            bad()

        # lines max out at 79 chars
        llen = linelen(line)
        if llen > 79:
            stats.toolong += 1
            if llen == 80:
                stats.toolong80 += 1
            if verbose > 1:
                msg(i, line, 'line too long (%d chars)' % llen)
            bad()

        # no tabs used to indent
        match = lead.search(line)
        if match and match.group(1).find('\t') != -1:
            stats.leadtabs += 1
            if verbose > 1:
                msg(i, line, 'using tabs to indent')
            bad()

        # no trailing whitespace
        if trail.search(line):
            stats.trailwhite +=1
            if verbose > 1:
                msg(i, line, 'trailing whitespace')
            bad()

        # for c++, exactly one space betwen if/while/for and (
        if lang == 'C++':
            match = any_control.search(line)
            if match and match.group(2) != " ":
                stats.badcontrol += 1
                if verbose > 1:
                    msg(i, line, 'improper spacing after %s' % match.group(1))
                bad()


def _modified_regions(repo, patterns, **kwargs):
    opt_all = kwargs.get('all', False)
    opt_no_ignore = kwargs.get('no_ignore', False)

    # Import the match (repository file name matching helper)
    # function. Different versions of Mercurial keep it in different
    # modules and implement them differently.
    try:
        from mercurial import scmutil
        m = scmutil.match(repo[None], patterns, kwargs)
    except ImportError:
        from mercurial import cmdutil
        m = cmdutil.match(repo, patterns, kwargs)

    modified, added, removed, deleted, unknown, ignore, clean = \
        repo.status(match=m, clean=opt_all)

    if not opt_all:
        try:
            wctx = repo.workingctx()
        except:
            from mercurial import context
            wctx = context.workingctx(repo)

        files = [ (fn, all_regions) for fn in added ] + \
            [ (fn,  modregions(wctx, fn)) for fn in modified ]
    else:
        files = [ (fn, all_regions) for fn in added + modified + clean ]

    for fname, mod_regions in files:
        if opt_no_ignore or not check_ignores(fname):
            yield fname, mod_regions


def do_check_style(hgui, repo, *pats, **opts):
    """check files for proper m5 style guidelines

    Without an argument, checks all modified and added files for gem5
    coding style violations. A list of files can be specified to limit
    the checker to a subset of the repository. The style rules are
    normally applied on a diff of the repository state (i.e., added
    files are checked in their entirety while only modifications of
    modified files are checked).

    The --all option can be specified to include clean files and check
    modified files in their entirety.

    The --fix-<check>, --ignore-<check>, and --skip-<check> options
    can be used to control individual style checks:

    --fix-<check> will perform the check and automatically attempt to
      fix sny style error (printing a warning if unsuccessful)

    --ignore-<check> will perform the check but ignore any errors
      found (other than printing a message for each)

    --skip-<check> will skip performing the check entirely

    If none of these options are given, all checks will be performed
    and the user will be prompted on how to handle each error.

    --fix-all, --ignore-all, and --skip-all are equivalent to specifying
    --fix-<check>, --ignore-<check>, or --skip-<check> for all checks,
    respectively.  However, option settings for specific checks take
    precedence.  Thus --skip-all --fix-white can be used to skip every
    check other than whitespace errors, which will be checked and
    automatically fixed.

    The -v/--verbose flag will display the offending line(s) as well
    as their location.
    """

    ui = MercurialUI(hgui, verbose=hgui.verbose)

    # instantiate varifier objects
    verifiers = [v(ui, repo, opts) for v in all_verifiers]

    for fname, mod_regions in _modified_regions(repo, pats, **opts):
        for verifier in verifiers:
            if verifier.apply(fname, mod_regions):
                return True

    return False

def do_check_format(hgui, repo, *pats, **opts):
    """check files for gem5 code formatting violations

    Without an argument, checks all modified and added files for gem5
    code formatting violations. A list of files can be specified to
    limit the checker to a subset of the repository. The style rules
    are normally applied on a diff of the repository state (i.e.,
    added files are checked in their entirety while only modifications
    of modified files are checked).

    The --all option can be specified to include clean files and check
    modified files in their entirety.
    """
    ui = MercurialUI(hgui, hgui.verbose)

    verbose = 0
    for fname, mod_regions in _modified_regions(repo, pats, **opts):
        stats = ValidationStats()
        validate(joinpath(repo.root, fname), stats, verbose, None)
        if stats:
            print "%s:" % fname
            stats.dump()
            result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
                               'ai', 'a')
            if result == 'a':
                return True

    return False

def check_hook(hooktype):
    if hooktype not in ('pretxncommit', 'pre-qrefresh'):
        raise AttributeError, \
              "This hook is not meant for %s" % hooktype

# This function provides a hook that is called before transaction
# commit and on qrefresh
def check_style(ui, repo, hooktype, **kwargs):
    check_hook(hooktype)
    args = {}

    try:
        return do_check_style(ui, repo, **args)
    except Exception, e:
        import traceback
        traceback.print_exc()
        return True

def check_format(ui, repo, hooktype, **kwargs):
    check_hook(hooktype)
    args = {}

    try:
        return do_check_format(ui, repo, **args)
    except Exception, e:
        import traceback
        traceback.print_exc()
        return True

try:
    from mercurial.i18n import _
except ImportError:
    def _(arg):
        return arg

_common_region_options = [
    ('a', 'all', False,
     _("include clean files and unmodified parts of modified files")),
    ('', 'no-ignore', False, _("ignore the style ignore list")),
    ]


fix_opts = [('f', 'fix-all', False, _("fix all style errors"))] + \
           [('', 'fix-' + v.opt_name, False,
             _('fix errors in ' + v.test_name)) for v in all_verifiers]
ignore_opts = [('', 'ignore-all', False, _("ignore all style errors"))] + \
              [('', 'ignore-' + v.opt_name, False,
                _('ignore errors in ' + v.test_name)) for v in all_verifiers]
skip_opts = [('', 'skip-all', False, _("skip all style error checks"))] + \
            [('', 'skip-' + v.opt_name, False,
              _('skip checking for ' + v.test_name)) for v in all_verifiers]
all_opts = fix_opts + ignore_opts + skip_opts


cmdtable = {
    '^m5style' : (
        do_check_style, all_opts + _common_region_options + commands.walkopts,
        _('hg m5style [-a] [FILE]...')),
    '^m5format' :
    ( do_check_format, [
            ] + _common_region_options + commands.walkopts,
      _('hg m5format [FILE]...')),
}

if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(
        description="Check a file for style violations")

    parser.add_argument("--verbose", "-v", action="count",
                        help="Produce verbose output")

    parser.add_argument("file", metavar="FILE", nargs="+",
                        type=str,
                        help="Source file to inspect")

    args = parser.parse_args()

    stats = ValidationStats()
    for filename in args.file:
        validate(filename, stats=stats, verbose=args.verbose, exit_code=1)

        if args.verbose > 0:
            stats.dump()