style.py revision 10691:65da28dee7cf
1#! /usr/bin/env python
2# Copyright (c) 2014 ARM Limited
3# All rights reserved
4#
5# The license below extends only to copyright in the software and shall
6# not be construed as granting a license to any other intellectual
7# property including but not limited to intellectual property relating
8# to a hardware implementation of the functionality of the software
9# licensed hereunder.  You may use the software subject to the license
10# terms below provided that you ensure that this notice is replicated
11# unmodified and in its entirety in all distributions of the software,
12# modified or unmodified, in source code or in binary form.
13#
14# Copyright (c) 2006 The Regents of The University of Michigan
15# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
16# All rights reserved.
17#
18# Redistribution and use in source and binary forms, with or without
19# modification, are permitted provided that the following conditions are
20# met: redistributions of source code must retain the above copyright
21# notice, this list of conditions and the following disclaimer;
22# redistributions in binary form must reproduce the above copyright
23# notice, this list of conditions and the following disclaimer in the
24# documentation and/or other materials provided with the distribution;
25# neither the name of the copyright holders nor the names of its
26# contributors may be used to endorse or promote products derived from
27# this software without specific prior written permission.
28#
29# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40#
41# Authors: Nathan Binkert
42
43import heapq
44import os
45import re
46import sys
47
48from os.path import dirname, join as joinpath
49from itertools import count
50from mercurial import bdiff, mdiff, commands
51
52current_dir = dirname(__file__)
53sys.path.insert(0, current_dir)
54sys.path.insert(1, joinpath(dirname(current_dir), 'src', 'python'))
55
56from m5.util import neg_inf, pos_inf, Region, Regions
57import sort_includes
58from file_types import lang_type
59
60all_regions = Regions(Region(neg_inf, pos_inf))
61
62tabsize = 8
63lead = re.compile(r'^([ \t]+)')
64trail = re.compile(r'([ \t]+)$')
65any_control = re.compile(r'\b(if|while|for)[ \t]*[(]')
66good_control = re.compile(r'\b(if|while|for) [(]')
67
68format_types = set(('C', 'C++'))
69
70
71def re_ignore(expr):
72    """Helper function to create regular expression ignore file
73    matcher functions"""
74
75    rex = re.compile(expr)
76    def match_re(fname):
77        return rex.match(fname)
78    return match_re
79
80# This list contains a list of functions that are called to determine
81# if a file should be excluded from the style matching rules or
82# not. The functions are called with the file name relative to the
83# repository root (without a leading slash) as their argument. A file
84# is excluded if any function in the list returns true.
85style_ignores = [
86    # Ignore external projects as they are unlikely to follow the gem5
87    # coding convention.
88    re_ignore("^ext/"),
89]
90
91def check_ignores(fname):
92    """Check if a file name matches any of the ignore rules"""
93
94    for rule in style_ignores:
95        if rule(fname):
96            return True
97
98    return False
99
100
101def modified_regions(old_data, new_data):
102    regions = Regions()
103    beg = None
104    for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
105        if beg is not None and beg != fbeg:
106            regions.append(beg, fbeg)
107        beg = fend
108    return regions
109
110def modregions(wctx, fname):
111    fctx = wctx.filectx(fname)
112    pctx = fctx.parents()
113
114    file_data = fctx.data()
115    lines = mdiff.splitnewlines(file_data)
116    if len(pctx) in (1, 2):
117        mod_regions = modified_regions(pctx[0].data(), file_data)
118        if len(pctx) == 2:
119            m2 = modified_regions(pctx[1].data(), file_data)
120            # only the lines that are new in both
121            mod_regions &= m2
122    else:
123        mod_regions = Regions()
124        mod_regions.append(0, len(lines))
125
126    return mod_regions
127
128class UserInterface(object):
129    def __init__(self, verbose=False):
130        self.verbose = verbose
131
132    def prompt(self, prompt, results, default):
133        while True:
134            result = self.do_prompt(prompt, results, default)
135            if result in results:
136                return result
137
138class MercurialUI(UserInterface):
139    def __init__(self, ui, *args, **kwargs):
140        super(MercurialUI, self).__init__(*args, **kwargs)
141        self.ui = ui
142
143    def do_prompt(self, prompt, results, default):
144        return self.ui.prompt(prompt, default=default)
145
146    def write(self, string):
147        self.ui.write(string)
148
149class StdioUI(UserInterface):
150    def do_prompt(self, prompt, results, default):
151        return raw_input(prompt) or default
152
153    def write(self, string):
154        sys.stdout.write(string)
155
156class Verifier(object):
157    def __init__(self, ui, repo):
158        self.ui = ui
159        self.repo = repo
160
161    def __getattr__(self, attr):
162        if attr in ('prompt', 'write'):
163            return getattr(self.ui, attr)
164
165        if attr == 'wctx':
166            try:
167                wctx = repo.workingctx()
168            except:
169                from mercurial import context
170                wctx = context.workingctx(repo)
171            self.wctx = wctx
172            return wctx
173
174        raise AttributeError
175
176    def open(self, filename, mode):
177        filename = self.repo.wjoin(filename)
178
179        try:
180            f = file(filename, mode)
181        except OSError, msg:
182            print 'could not open file %s: %s' % (filename, msg)
183            return None
184
185        return f
186
187    def skip(self, filename):
188        filename = self.repo.wjoin(filename)
189
190        # We never want to handle symlinks, so always skip them: If the location
191        # pointed to is a directory, skip it. If the location is a file inside
192        # the gem5 directory, it will be checked as a file, so symlink can be
193        # skipped. If the location is a file outside gem5, we don't want to
194        # check it anyway.
195        if os.path.islink(filename):
196            return True
197        return lang_type(filename) not in self.languages
198
199    def check(self, filename, regions=all_regions):
200        f = self.open(filename, 'r')
201
202        errors = 0
203        for num,line in enumerate(f):
204            if num not in regions:
205                continue
206            if not self.check_line(line):
207                self.write("invalid %s in %s:%d\n" % \
208                               (self.test_name, filename, num + 1))
209                if self.ui.verbose:
210                    self.write(">>%s<<\n" % line[-1])
211                errors += 1
212        return errors
213
214    def fix(self, filename, regions=all_regions):
215        f = self.open(filename, 'r+')
216
217        lines = list(f)
218
219        f.seek(0)
220        f.truncate()
221
222        for i,line in enumerate(lines):
223            if i in regions:
224                line = self.fix_line(line)
225
226            f.write(line)
227        f.close()
228
229    def apply(self, filename, prompt, regions=all_regions):
230        if not self.skip(filename):
231            errors = self.check(filename, regions)
232            if errors:
233                if prompt(filename, self.fix, regions):
234                    return True
235        return False
236
237
238class Whitespace(Verifier):
239    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
240    test_name = 'whitespace'
241    def check_line(self, line):
242        match = lead.search(line)
243        if match and match.group(1).find('\t') != -1:
244            return False
245
246        match = trail.search(line)
247        if match:
248            return False
249
250        return True
251
252    def fix_line(self, line):
253        if lead.search(line):
254            newline = ''
255            for i,c in enumerate(line):
256                if c == ' ':
257                    newline += ' '
258                elif c == '\t':
259                    newline += ' ' * (tabsize - len(newline) % tabsize)
260                else:
261                    newline += line[i:]
262                    break
263
264            line = newline
265
266        return line.rstrip() + '\n'
267
268class SortedIncludes(Verifier):
269    languages = sort_includes.default_languages
270    def __init__(self, *args, **kwargs):
271        super(SortedIncludes, self).__init__(*args, **kwargs)
272        self.sort_includes = sort_includes.SortIncludes()
273
274    def check(self, filename, regions=all_regions):
275        f = self.open(filename, 'r')
276
277        lines = [ l.rstrip('\n') for l in f.xreadlines() ]
278        old = ''.join(line + '\n' for line in lines)
279        f.close()
280
281        if len(lines) == 0:
282            return 0
283
284        language = lang_type(filename, lines[0])
285        sort_lines = list(self.sort_includes(lines, filename, language))
286        new = ''.join(line + '\n' for line in sort_lines)
287
288        mod = modified_regions(old, new)
289        modified = mod & regions
290
291        if modified:
292            self.write("invalid sorting of includes in %s\n" % (filename))
293            if self.ui.verbose:
294                for start, end in modified.regions:
295                    self.write("bad region [%d, %d)\n" % (start, end))
296            return 1
297
298        return 0
299
300    def fix(self, filename, regions=all_regions):
301        f = self.open(filename, 'r+')
302
303        old = f.readlines()
304        lines = [ l.rstrip('\n') for l in old ]
305        language = lang_type(filename, lines[0])
306        sort_lines = list(self.sort_includes(lines, filename, language))
307        new = ''.join(line + '\n' for line in sort_lines)
308
309        f.seek(0)
310        f.truncate()
311
312        for i,line in enumerate(sort_lines):
313            f.write(line)
314            f.write('\n')
315        f.close()
316
317def linelen(line):
318    tabs = line.count('\t')
319    if not tabs:
320        return len(line)
321
322    count = 0
323    for c in line:
324        if c == '\t':
325            count += tabsize - count % tabsize
326        else:
327            count += 1
328
329    return count
330
331class ValidationStats(object):
332    def __init__(self):
333        self.toolong = 0
334        self.toolong80 = 0
335        self.leadtabs = 0
336        self.trailwhite = 0
337        self.badcontrol = 0
338        self.cret = 0
339
340    def dump(self):
341        print '''\
342%d violations of lines over 79 chars. %d of which are 80 chars exactly.
343%d cases of whitespace at the end of a line.
344%d cases of tabs to indent.
345%d bad parens after if/while/for.
346%d carriage returns found.
347''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
348       self.badcontrol, self.cret)
349
350    def __nonzero__(self):
351        return self.toolong or self.toolong80 or self.leadtabs or \
352               self.trailwhite or self.badcontrol or self.cret
353
354def validate(filename, stats, verbose, exit_code):
355    if lang_type(filename) not in format_types:
356        return
357
358    def msg(lineno, line, message):
359        print '%s:%d>' % (filename, lineno + 1), message
360        if verbose > 2:
361            print line
362
363    def bad():
364        if exit_code is not None:
365            sys.exit(exit_code)
366
367    try:
368        f = file(filename, 'r')
369    except OSError:
370        if verbose > 0:
371            print 'could not open file %s' % filename
372        bad()
373        return
374
375    for i,line in enumerate(f):
376        line = line.rstrip('\n')
377
378        # no carriage returns
379        if line.find('\r') != -1:
380            self.cret += 1
381            if verbose > 1:
382                msg(i, line, 'carriage return found')
383            bad()
384
385        # lines max out at 79 chars
386        llen = linelen(line)
387        if llen > 79:
388            stats.toolong += 1
389            if llen == 80:
390                stats.toolong80 += 1
391            if verbose > 1:
392                msg(i, line, 'line too long (%d chars)' % llen)
393            bad()
394
395        # no tabs used to indent
396        match = lead.search(line)
397        if match and match.group(1).find('\t') != -1:
398            stats.leadtabs += 1
399            if verbose > 1:
400                msg(i, line, 'using tabs to indent')
401            bad()
402
403        # no trailing whitespace
404        if trail.search(line):
405            stats.trailwhite +=1
406            if verbose > 1:
407                msg(i, line, 'trailing whitespace')
408            bad()
409
410        # for c++, exactly one space betwen if/while/for and (
411        if cpp:
412            match = any_control.search(line)
413            if match and not good_control.search(line):
414                stats.badcontrol += 1
415                if verbose > 1:
416                    msg(i, line, 'improper spacing after %s' % match.group(1))
417                bad()
418
419
420def do_check_style(hgui, repo, *pats, **opts):
421    """check files for proper m5 style guidelines
422
423    Without an argument, checks all modified and added files for gem5
424    coding style violations. A list of files can be specified to limit
425    the checker to a subset of the repository. The style rules are
426    normally applied on a diff of the repository state (i.e., added
427    files are checked in their entirety while only modifications of
428    modified files are checked).
429
430    The --all option can be specified to include clean files and check
431    modified files in their entirety.
432    """
433    from mercurial import mdiff, util
434
435    opt_fix_all = opts.get('fix_all', False)
436    if not opt_fix_all:
437        opt_fix_white = opts.get('fix_white', False)
438        opt_fix_include = opts.get('fix_include', False)
439    else:
440        opt_fix_white = True
441        opt_fix_include = True
442
443    opt_all = opts.get('all', False)
444    opt_no_ignore = opts.get('no_ignore', False)
445    ui = MercurialUI(hgui, verbose=hgui.verbose)
446
447    def prompt(name, func, regions=all_regions):
448        result = ui.prompt("(a)bort, (i)gnore, or (f)ix?", 'aif', 'a')
449        if result == 'a':
450            return True
451        elif result == 'f':
452            func(name, regions)
453
454        return False
455
456    def no_prompt(name, func, regions=all_regions):
457        func(name, regions)
458        return False
459
460    prompt_white = prompt if not opt_fix_white else no_prompt
461    prompt_include = prompt if not opt_fix_include else no_prompt
462
463    # Import the match (repository file name matching helper)
464    # function. Different versions of Mercurial keep it in different
465    # modules and implement them differently.
466    try:
467        from mercurial import scmutil
468        m = scmutil.match(repo[None], pats, opts)
469    except ImportError:
470        from mercurial import cmdutil
471        m = cmdutil.match(repo, pats, opts)
472
473    modified, added, removed, deleted, unknown, ignore, clean = \
474        repo.status(match=m, clean=opt_all)
475    if not opt_all:
476        try:
477            wctx = repo.workingctx()
478        except:
479            from mercurial import context
480            wctx = context.workingctx(repo)
481
482        files = [ (fn, all_regions) for fn in added ] + \
483            [ (fn,  modregions(wctx, fn)) for fn in modified ]
484    else:
485        files = [ (fn, all_regions) for fn in added + modified + clean ]
486
487    whitespace = Whitespace(ui, repo)
488    sorted_includes = SortedIncludes(ui, repo)
489    for fname, mod_regions in files:
490        if not opt_no_ignore and check_ignores(fname):
491            continue
492
493        if whitespace.apply(fname, prompt_white, mod_regions):
494            return True
495
496        if sorted_includes.apply(fname, prompt_include, mod_regions):
497            return True
498
499    return False
500
501def do_check_format(hgui, repo, **args):
502    ui = MercurialUI(hgui, hgui.verbose)
503
504    modified, added, removed, deleted, unknown, ignore, clean = repo.status()
505
506    verbose = 0
507    stats = ValidationStats()
508    for f in modified + added:
509        validate(joinpath(repo.root, f), stats, verbose, None)
510
511    if stats:
512        stats.dump()
513        result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
514                           'ai', 'a')
515        if result == 'a':
516            return True
517
518    return False
519
520def check_hook(hooktype):
521    if hooktype not in ('pretxncommit', 'pre-qrefresh'):
522        raise AttributeError, \
523              "This hook is not meant for %s" % hooktype
524
525def check_style(ui, repo, hooktype, **kwargs):
526    check_hook(hooktype)
527    args = {}
528
529    try:
530        return do_check_style(ui, repo, **args)
531    except Exception, e:
532        import traceback
533        traceback.print_exc()
534        return True
535
536def check_format(ui, repo, hooktype, **kwargs):
537    check_hook(hooktype)
538    args = {}
539
540    try:
541        return do_check_format(ui, repo, **args)
542    except Exception, e:
543        import traceback
544        traceback.print_exc()
545        return True
546
547try:
548    from mercurial.i18n import _
549except ImportError:
550    def _(arg):
551        return arg
552
553cmdtable = {
554    '^m5style' : (
555        do_check_style, [
556            ('f', 'fix-all', False, _("automatically fix style issues")),
557            ('', 'fix-white', False, _("automatically fix white space issues")),
558            ('', 'fix-include', False, _("automatically fix include ordering")),
559            ('a', 'all', False,
560             _("include clean files and unmodified parts of modified files")),
561            ('', 'no-ignore', False, _("ignore the style ignore list")),
562            ] +  commands.walkopts,
563        _('hg m5style [-a] [FILE]...')),
564    '^m5format' :
565    ( do_check_format,
566      [ ],
567      _('hg m5format [FILE]...')),
568}
569
570if __name__ == '__main__':
571    import getopt
572
573    progname = sys.argv[0]
574    if len(sys.argv) < 2:
575        sys.exit('usage: %s <command> [<command args>]' % progname)
576
577    fixwhite_usage = '%s fixwhite [-t <tabsize> ] <path> [...] \n' % progname
578    chkformat_usage = '%s chkformat <path> [...] \n' % progname
579    chkwhite_usage = '%s chkwhite <path> [...] \n' % progname
580
581    command = sys.argv[1]
582    if command == 'fixwhite':
583        flags = 't:'
584        usage = fixwhite_usage
585    elif command == 'chkwhite':
586        flags = 'nv'
587        usage = chkwhite_usage
588    elif command == 'chkformat':
589        flags = 'nv'
590        usage = chkformat_usage
591    else:
592        sys.exit(fixwhite_usage + chkwhite_usage + chkformat_usage)
593
594    opts, args = getopt.getopt(sys.argv[2:], flags)
595
596    code = 1
597    verbose = 1
598    for opt,arg in opts:
599        if opt == '-n':
600            code = None
601        if opt == '-t':
602            tabsize = int(arg)
603        if opt == '-v':
604            verbose += 1
605
606    if command == 'fixwhite':
607        for filename in args:
608            fixwhite(filename, tabsize)
609    elif command == 'chkwhite':
610        for filename in args:
611            for line,num in checkwhite(filename):
612                print 'invalid whitespace: %s:%d' % (filename, num)
613                if verbose:
614                    print '>>%s<<' % line[:-1]
615    elif command == 'chkformat':
616        stats = ValidationStats()
617        for filename in args:
618            validate(filename, stats=stats, verbose=verbose, exit_code=code)
619
620        if verbose > 0:
621            stats.dump()
622    else:
623        sys.exit("command '%s' not found" % command)
624