style.py revision 10692:ab81a0feab55
1#! /usr/bin/env python
2# Copyright (c) 2014 ARM Limited
3# All rights reserved
4#
5# The license below extends only to copyright in the software and shall
6# not be construed as granting a license to any other intellectual
7# property including but not limited to intellectual property relating
8# to a hardware implementation of the functionality of the software
9# licensed hereunder.  You may use the software subject to the license
10# terms below provided that you ensure that this notice is replicated
11# unmodified and in its entirety in all distributions of the software,
12# modified or unmodified, in source code or in binary form.
13#
14# Copyright (c) 2006 The Regents of The University of Michigan
15# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
16# All rights reserved.
17#
18# Redistribution and use in source and binary forms, with or without
19# modification, are permitted provided that the following conditions are
20# met: redistributions of source code must retain the above copyright
21# notice, this list of conditions and the following disclaimer;
22# redistributions in binary form must reproduce the above copyright
23# notice, this list of conditions and the following disclaimer in the
24# documentation and/or other materials provided with the distribution;
25# neither the name of the copyright holders nor the names of its
26# contributors may be used to endorse or promote products derived from
27# this software without specific prior written permission.
28#
29# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40#
41# Authors: Nathan Binkert
42
43import heapq
44import os
45import re
46import sys
47
48from os.path import dirname, join as joinpath
49from itertools import count
50from mercurial import bdiff, mdiff, commands
51
52current_dir = dirname(__file__)
53sys.path.insert(0, current_dir)
54sys.path.insert(1, joinpath(dirname(current_dir), 'src', 'python'))
55
56from m5.util import neg_inf, pos_inf, Region, Regions
57import sort_includes
58from file_types import lang_type
59
60all_regions = Regions(Region(neg_inf, pos_inf))
61
62tabsize = 8
63lead = re.compile(r'^([ \t]+)')
64trail = re.compile(r'([ \t]+)$')
65any_control = re.compile(r'\b(if|while|for)[ \t]*[(]')
66good_control = re.compile(r'\b(if|while|for) [(]')
67
68format_types = set(('C', 'C++'))
69
70
71def re_ignore(expr):
72    """Helper function to create regular expression ignore file
73    matcher functions"""
74
75    rex = re.compile(expr)
76    def match_re(fname):
77        return rex.match(fname)
78    return match_re
79
80# This list contains a list of functions that are called to determine
81# if a file should be excluded from the style matching rules or
82# not. The functions are called with the file name relative to the
83# repository root (without a leading slash) as their argument. A file
84# is excluded if any function in the list returns true.
85style_ignores = [
86    # Ignore external projects as they are unlikely to follow the gem5
87    # coding convention.
88    re_ignore("^ext/"),
89]
90
91def check_ignores(fname):
92    """Check if a file name matches any of the ignore rules"""
93
94    for rule in style_ignores:
95        if rule(fname):
96            return True
97
98    return False
99
100
101def modified_regions(old_data, new_data):
102    regions = Regions()
103    beg = None
104    for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
105        if beg is not None and beg != fbeg:
106            regions.append(beg, fbeg)
107        beg = fend
108    return regions
109
110def modregions(wctx, fname):
111    fctx = wctx.filectx(fname)
112    pctx = fctx.parents()
113
114    file_data = fctx.data()
115    lines = mdiff.splitnewlines(file_data)
116    if len(pctx) in (1, 2):
117        mod_regions = modified_regions(pctx[0].data(), file_data)
118        if len(pctx) == 2:
119            m2 = modified_regions(pctx[1].data(), file_data)
120            # only the lines that are new in both
121            mod_regions &= m2
122    else:
123        mod_regions = Regions()
124        mod_regions.append(0, len(lines))
125
126    return mod_regions
127
128class UserInterface(object):
129    def __init__(self, verbose=False):
130        self.verbose = verbose
131
132    def prompt(self, prompt, results, default):
133        while True:
134            result = self.do_prompt(prompt, results, default)
135            if result in results:
136                return result
137
138class MercurialUI(UserInterface):
139    def __init__(self, ui, *args, **kwargs):
140        super(MercurialUI, self).__init__(*args, **kwargs)
141        self.ui = ui
142
143    def do_prompt(self, prompt, results, default):
144        return self.ui.prompt(prompt, default=default)
145
146    def write(self, string):
147        self.ui.write(string)
148
149class StdioUI(UserInterface):
150    def do_prompt(self, prompt, results, default):
151        return raw_input(prompt) or default
152
153    def write(self, string):
154        sys.stdout.write(string)
155
156class Verifier(object):
157    def __init__(self, ui, repo):
158        self.ui = ui
159        self.repo = repo
160
161    def __getattr__(self, attr):
162        if attr in ('prompt', 'write'):
163            return getattr(self.ui, attr)
164
165        if attr == 'wctx':
166            try:
167                wctx = repo.workingctx()
168            except:
169                from mercurial import context
170                wctx = context.workingctx(repo)
171            self.wctx = wctx
172            return wctx
173
174        raise AttributeError
175
176    def open(self, filename, mode):
177        filename = self.repo.wjoin(filename)
178
179        try:
180            f = file(filename, mode)
181        except OSError, msg:
182            print 'could not open file %s: %s' % (filename, msg)
183            return None
184
185        return f
186
187    def skip(self, filename):
188        filename = self.repo.wjoin(filename)
189
190        # We never want to handle symlinks, so always skip them: If the location
191        # pointed to is a directory, skip it. If the location is a file inside
192        # the gem5 directory, it will be checked as a file, so symlink can be
193        # skipped. If the location is a file outside gem5, we don't want to
194        # check it anyway.
195        if os.path.islink(filename):
196            return True
197        return lang_type(filename) not in self.languages
198
199    def check(self, filename, regions=all_regions):
200        f = self.open(filename, 'r')
201
202        errors = 0
203        for num,line in enumerate(f):
204            if num not in regions:
205                continue
206            if not self.check_line(line):
207                self.write("invalid %s in %s:%d\n" % \
208                               (self.test_name, filename, num + 1))
209                if self.ui.verbose:
210                    self.write(">>%s<<\n" % line[-1])
211                errors += 1
212        return errors
213
214    def fix(self, filename, regions=all_regions):
215        f = self.open(filename, 'r+')
216
217        lines = list(f)
218
219        f.seek(0)
220        f.truncate()
221
222        for i,line in enumerate(lines):
223            if i in regions:
224                line = self.fix_line(line)
225
226            f.write(line)
227        f.close()
228
229    def apply(self, filename, prompt, regions=all_regions):
230        if not self.skip(filename):
231            errors = self.check(filename, regions)
232            if errors:
233                if prompt(filename, self.fix, regions):
234                    return True
235        return False
236
237
238class Whitespace(Verifier):
239    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
240    test_name = 'whitespace'
241    def check_line(self, line):
242        match = lead.search(line)
243        if match and match.group(1).find('\t') != -1:
244            return False
245
246        match = trail.search(line)
247        if match:
248            return False
249
250        return True
251
252    def fix_line(self, line):
253        if lead.search(line):
254            newline = ''
255            for i,c in enumerate(line):
256                if c == ' ':
257                    newline += ' '
258                elif c == '\t':
259                    newline += ' ' * (tabsize - len(newline) % tabsize)
260                else:
261                    newline += line[i:]
262                    break
263
264            line = newline
265
266        return line.rstrip() + '\n'
267
268class SortedIncludes(Verifier):
269    languages = sort_includes.default_languages
270    def __init__(self, *args, **kwargs):
271        super(SortedIncludes, self).__init__(*args, **kwargs)
272        self.sort_includes = sort_includes.SortIncludes()
273
274    def check(self, filename, regions=all_regions):
275        f = self.open(filename, 'r')
276
277        lines = [ l.rstrip('\n') for l in f.xreadlines() ]
278        old = ''.join(line + '\n' for line in lines)
279        f.close()
280
281        if len(lines) == 0:
282            return 0
283
284        language = lang_type(filename, lines[0])
285        sort_lines = list(self.sort_includes(lines, filename, language))
286        new = ''.join(line + '\n' for line in sort_lines)
287
288        mod = modified_regions(old, new)
289        modified = mod & regions
290
291        if modified:
292            self.write("invalid sorting of includes in %s\n" % (filename))
293            if self.ui.verbose:
294                for start, end in modified.regions:
295                    self.write("bad region [%d, %d)\n" % (start, end))
296            return 1
297
298        return 0
299
300    def fix(self, filename, regions=all_regions):
301        f = self.open(filename, 'r+')
302
303        old = f.readlines()
304        lines = [ l.rstrip('\n') for l in old ]
305        language = lang_type(filename, lines[0])
306        sort_lines = list(self.sort_includes(lines, filename, language))
307        new = ''.join(line + '\n' for line in sort_lines)
308
309        f.seek(0)
310        f.truncate()
311
312        for i,line in enumerate(sort_lines):
313            f.write(line)
314            f.write('\n')
315        f.close()
316
317def linelen(line):
318    tabs = line.count('\t')
319    if not tabs:
320        return len(line)
321
322    count = 0
323    for c in line:
324        if c == '\t':
325            count += tabsize - count % tabsize
326        else:
327            count += 1
328
329    return count
330
331class ValidationStats(object):
332    def __init__(self):
333        self.toolong = 0
334        self.toolong80 = 0
335        self.leadtabs = 0
336        self.trailwhite = 0
337        self.badcontrol = 0
338        self.cret = 0
339
340    def dump(self):
341        print '''\
342%d violations of lines over 79 chars. %d of which are 80 chars exactly.
343%d cases of whitespace at the end of a line.
344%d cases of tabs to indent.
345%d bad parens after if/while/for.
346%d carriage returns found.
347''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
348       self.badcontrol, self.cret)
349
350    def __nonzero__(self):
351        return self.toolong or self.toolong80 or self.leadtabs or \
352               self.trailwhite or self.badcontrol or self.cret
353
354def validate(filename, stats, verbose, exit_code):
355    lang = lang_type(filename)
356    if lang not in format_types:
357        return
358
359    def msg(lineno, line, message):
360        print '%s:%d>' % (filename, lineno + 1), message
361        if verbose > 2:
362            print line
363
364    def bad():
365        if exit_code is not None:
366            sys.exit(exit_code)
367
368    try:
369        f = file(filename, 'r')
370    except OSError:
371        if verbose > 0:
372            print 'could not open file %s' % filename
373        bad()
374        return
375
376    for i,line in enumerate(f):
377        line = line.rstrip('\n')
378
379        # no carriage returns
380        if line.find('\r') != -1:
381            self.cret += 1
382            if verbose > 1:
383                msg(i, line, 'carriage return found')
384            bad()
385
386        # lines max out at 79 chars
387        llen = linelen(line)
388        if llen > 79:
389            stats.toolong += 1
390            if llen == 80:
391                stats.toolong80 += 1
392            if verbose > 1:
393                msg(i, line, 'line too long (%d chars)' % llen)
394            bad()
395
396        # no tabs used to indent
397        match = lead.search(line)
398        if match and match.group(1).find('\t') != -1:
399            stats.leadtabs += 1
400            if verbose > 1:
401                msg(i, line, 'using tabs to indent')
402            bad()
403
404        # no trailing whitespace
405        if trail.search(line):
406            stats.trailwhite +=1
407            if verbose > 1:
408                msg(i, line, 'trailing whitespace')
409            bad()
410
411        # for c++, exactly one space betwen if/while/for and (
412        if lang == 'C++':
413            match = any_control.search(line)
414            if match and not good_control.search(line):
415                stats.badcontrol += 1
416                if verbose > 1:
417                    msg(i, line, 'improper spacing after %s' % match.group(1))
418                bad()
419
420
421def _modified_regions(repo, patterns, **kwargs):
422    opt_all = kwargs.get('all', False)
423    opt_no_ignore = kwargs.get('no_ignore', False)
424
425    # Import the match (repository file name matching helper)
426    # function. Different versions of Mercurial keep it in different
427    # modules and implement them differently.
428    try:
429        from mercurial import scmutil
430        m = scmutil.match(repo[None], patterns, kwargs)
431    except ImportError:
432        from mercurial import cmdutil
433        m = cmdutil.match(repo, patterns, kwargs)
434
435    modified, added, removed, deleted, unknown, ignore, clean = \
436        repo.status(match=m, clean=opt_all)
437
438    if not opt_all:
439        try:
440            wctx = repo.workingctx()
441        except:
442            from mercurial import context
443            wctx = context.workingctx(repo)
444
445        files = [ (fn, all_regions) for fn in added ] + \
446            [ (fn,  modregions(wctx, fn)) for fn in modified ]
447    else:
448        files = [ (fn, all_regions) for fn in added + modified + clean ]
449
450    for fname, mod_regions in files:
451        if opt_no_ignore or not check_ignores(fname):
452            yield fname, mod_regions
453
454
455def do_check_style(hgui, repo, *pats, **opts):
456    """check files for proper m5 style guidelines
457
458    Without an argument, checks all modified and added files for gem5
459    coding style violations. A list of files can be specified to limit
460    the checker to a subset of the repository. The style rules are
461    normally applied on a diff of the repository state (i.e., added
462    files are checked in their entirety while only modifications of
463    modified files are checked).
464
465    The --all option can be specified to include clean files and check
466    modified files in their entirety.
467    """
468    opt_fix_all = opts.get('fix_all', False)
469    if not opt_fix_all:
470        opt_fix_white = opts.get('fix_white', False)
471        opt_fix_include = opts.get('fix_include', False)
472    else:
473        opt_fix_white = True
474        opt_fix_include = True
475
476    ui = MercurialUI(hgui, verbose=hgui.verbose)
477
478    def prompt(name, func, regions=all_regions):
479        result = ui.prompt("(a)bort, (i)gnore, or (f)ix?", 'aif', 'a')
480        if result == 'a':
481            return True
482        elif result == 'f':
483            func(name, regions)
484
485        return False
486
487    def no_prompt(name, func, regions=all_regions):
488        func(name, regions)
489        return False
490
491    prompt_white = prompt if not opt_fix_white else no_prompt
492    prompt_include = prompt if not opt_fix_include else no_prompt
493
494    whitespace = Whitespace(ui, repo)
495    sorted_includes = SortedIncludes(ui, repo)
496    for fname, mod_regions in _modified_regions(repo, pats, **opts):
497        if whitespace.apply(fname, prompt_white, mod_regions):
498            return True
499
500        if sorted_includes.apply(fname, prompt_include, mod_regions):
501            return True
502
503    return False
504
505def do_check_format(hgui, repo, *pats, **opts):
506    """check files for gem5 code formatting violations
507
508    Without an argument, checks all modified and added files for gem5
509    code formatting violations. A list of files can be specified to
510    limit the checker to a subset of the repository. The style rules
511    are normally applied on a diff of the repository state (i.e.,
512    added files are checked in their entirety while only modifications
513    of modified files are checked).
514
515    The --all option can be specified to include clean files and check
516    modified files in their entirety.
517    """
518    ui = MercurialUI(hgui, hgui.verbose)
519
520    verbose = 0
521    for fname, mod_regions in _modified_regions(repo, pats, **opts):
522        stats = ValidationStats()
523        validate(joinpath(repo.root, fname), stats, verbose, None)
524        if stats:
525            print "%s:" % fname
526            stats.dump()
527            result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
528                               'ai', 'a')
529            if result == 'a':
530                return True
531
532    return False
533
534def check_hook(hooktype):
535    if hooktype not in ('pretxncommit', 'pre-qrefresh'):
536        raise AttributeError, \
537              "This hook is not meant for %s" % hooktype
538
539def check_style(ui, repo, hooktype, **kwargs):
540    check_hook(hooktype)
541    args = {}
542
543    try:
544        return do_check_style(ui, repo, **args)
545    except Exception, e:
546        import traceback
547        traceback.print_exc()
548        return True
549
550def check_format(ui, repo, hooktype, **kwargs):
551    check_hook(hooktype)
552    args = {}
553
554    try:
555        return do_check_format(ui, repo, **args)
556    except Exception, e:
557        import traceback
558        traceback.print_exc()
559        return True
560
561try:
562    from mercurial.i18n import _
563except ImportError:
564    def _(arg):
565        return arg
566
567_common_region_options = [
568    ('a', 'all', False,
569     _("include clean files and unmodified parts of modified files")),
570    ('', 'no-ignore', False, _("ignore the style ignore list")),
571    ]
572
573cmdtable = {
574    '^m5style' : (
575        do_check_style, [
576            ('f', 'fix-all', False, _("automatically fix style issues")),
577            ('', 'fix-white', False, _("automatically fix white space issues")),
578            ('', 'fix-include', False, _("automatically fix include ordering")),
579            ] + _common_region_options +  commands.walkopts,
580        _('hg m5style [-a] [FILE]...')),
581    '^m5format' :
582    ( do_check_format, [
583            ] + _common_region_options + commands.walkopts,
584      _('hg m5format [FILE]...')),
585}
586
587if __name__ == '__main__':
588    import getopt
589
590    progname = sys.argv[0]
591    if len(sys.argv) < 2:
592        sys.exit('usage: %s <command> [<command args>]' % progname)
593
594    fixwhite_usage = '%s fixwhite [-t <tabsize> ] <path> [...] \n' % progname
595    chkformat_usage = '%s chkformat <path> [...] \n' % progname
596    chkwhite_usage = '%s chkwhite <path> [...] \n' % progname
597
598    command = sys.argv[1]
599    if command == 'fixwhite':
600        flags = 't:'
601        usage = fixwhite_usage
602    elif command == 'chkwhite':
603        flags = 'nv'
604        usage = chkwhite_usage
605    elif command == 'chkformat':
606        flags = 'nv'
607        usage = chkformat_usage
608    else:
609        sys.exit(fixwhite_usage + chkwhite_usage + chkformat_usage)
610
611    opts, args = getopt.getopt(sys.argv[2:], flags)
612
613    code = 1
614    verbose = 1
615    for opt,arg in opts:
616        if opt == '-n':
617            code = None
618        if opt == '-t':
619            tabsize = int(arg)
620        if opt == '-v':
621            verbose += 1
622
623    if command == 'fixwhite':
624        for filename in args:
625            fixwhite(filename, tabsize)
626    elif command == 'chkwhite':
627        for filename in args:
628            for line,num in checkwhite(filename):
629                print 'invalid whitespace: %s:%d' % (filename, num)
630                if verbose:
631                    print '>>%s<<' % line[:-1]
632    elif command == 'chkformat':
633        stats = ValidationStats()
634        for filename in args:
635            validate(filename, stats=stats, verbose=verbose, exit_code=code)
636
637        if verbose > 0:
638            stats.dump()
639    else:
640        sys.exit("command '%s' not found" % command)
641