style.py revision 10294:b58f6afe14c5
1#! /usr/bin/env python
2# Copyright (c) 2014 ARM Limited
3# All rights reserved
4#
5# The license below extends only to copyright in the software and shall
6# not be construed as granting a license to any other intellectual
7# property including but not limited to intellectual property relating
8# to a hardware implementation of the functionality of the software
9# licensed hereunder.  You may use the software subject to the license
10# terms below provided that you ensure that this notice is replicated
11# unmodified and in its entirety in all distributions of the software,
12# modified or unmodified, in source code or in binary form.
13#
14# Copyright (c) 2006 The Regents of The University of Michigan
15# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
16# All rights reserved.
17#
18# Redistribution and use in source and binary forms, with or without
19# modification, are permitted provided that the following conditions are
20# met: redistributions of source code must retain the above copyright
21# notice, this list of conditions and the following disclaimer;
22# redistributions in binary form must reproduce the above copyright
23# notice, this list of conditions and the following disclaimer in the
24# documentation and/or other materials provided with the distribution;
25# neither the name of the copyright holders nor the names of its
26# contributors may be used to endorse or promote products derived from
27# this software without specific prior written permission.
28#
29# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40#
41# Authors: Nathan Binkert
42
43import heapq
44import os
45import re
46import sys
47
48from os.path import dirname, join as joinpath
49from itertools import count
50from mercurial import bdiff, mdiff, commands
51
52current_dir = dirname(__file__)
53sys.path.insert(0, current_dir)
54sys.path.insert(1, joinpath(dirname(current_dir), 'src', 'python'))
55
56from m5.util import neg_inf, pos_inf, Region, Regions
57import sort_includes
58from file_types import lang_type
59
60all_regions = Regions(Region(neg_inf, pos_inf))
61
62tabsize = 8
63lead = re.compile(r'^([ \t]+)')
64trail = re.compile(r'([ \t]+)$')
65any_control = re.compile(r'\b(if|while|for)[ \t]*[(]')
66good_control = re.compile(r'\b(if|while|for) [(]')
67
68format_types = set(('C', 'C++'))
69
70
71def re_ignore(expr):
72    """Helper function to create regular expression ignore file
73    matcher functions"""
74
75    rex = re.compile(expr)
76    def match_re(fname):
77        return rex.match(fname)
78    return match_re
79
80# This list contains a list of functions that are called to determine
81# if a file should be excluded from the style matching rules or
82# not. The functions are called with the file name relative to the
83# repository root (without a leading slash) as their argument. A file
84# is excluded if any function in the list returns true.
85style_ignores = [
86    # Ignore external projects as they are unlikely to follow the gem5
87    # coding convention.
88    re_ignore("^ext/"),
89]
90
91def check_ignores(fname):
92    """Check if a file name matches any of the ignore rules"""
93
94    for rule in style_ignores:
95        if rule(fname):
96            return True
97
98    return False
99
100
101def modified_regions(old_data, new_data):
102    regions = Regions()
103    beg = None
104    for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
105        if beg is not None and beg != fbeg:
106            regions.append(beg, fbeg)
107        beg = fend
108    return regions
109
110def modregions(wctx, fname):
111    fctx = wctx.filectx(fname)
112    pctx = fctx.parents()
113
114    file_data = fctx.data()
115    lines = mdiff.splitnewlines(file_data)
116    if len(pctx) in (1, 2):
117        mod_regions = modified_regions(pctx[0].data(), file_data)
118        if len(pctx) == 2:
119            m2 = modified_regions(pctx[1].data(), file_data)
120            # only the lines that are new in both
121            mod_regions &= m2
122    else:
123        mod_regions = Regions()
124        mod_regions.append(0, len(lines))
125
126    return mod_regions
127
128class UserInterface(object):
129    def __init__(self, verbose=False, auto=False):
130        self.auto = auto
131        self.verbose = verbose
132
133    def prompt(self, prompt, results, default):
134        if self.auto:
135            return self.auto
136
137        while True:
138            result = self.do_prompt(prompt, results, default)
139            if result in results:
140                return result
141
142class MercurialUI(UserInterface):
143    def __init__(self, ui, *args, **kwargs):
144        super(MercurialUI, self).__init__(*args, **kwargs)
145        self.ui = ui
146
147    def do_prompt(self, prompt, results, default):
148        return self.ui.prompt(prompt, default=default)
149
150    def write(self, string):
151        self.ui.write(string)
152
153class StdioUI(UserInterface):
154    def do_prompt(self, prompt, results, default):
155        return raw_input(prompt) or default
156
157    def write(self, string):
158        sys.stdout.write(string)
159
160class Verifier(object):
161    def __init__(self, ui, repo=None):
162        self.ui = ui
163        self.repo = repo
164        if repo is None:
165            self.wctx = None
166
167    def __getattr__(self, attr):
168        if attr in ('prompt', 'write'):
169            return getattr(self.ui, attr)
170
171        if attr == 'wctx':
172            try:
173                wctx = repo.workingctx()
174            except:
175                from mercurial import context
176                wctx = context.workingctx(repo)
177            self.wctx = wctx
178            return wctx
179
180        raise AttributeError
181
182    def open(self, filename, mode):
183        if self.repo:
184            filename = self.repo.wjoin(filename)
185
186        try:
187            f = file(filename, mode)
188        except OSError, msg:
189            print 'could not open file %s: %s' % (filename, msg)
190            return None
191
192        return f
193
194    def skip(self, filename):
195        # We never want to handle symlinks, so always skip them: If the location
196        # pointed to is a directory, skip it. If the location is a file inside
197        # the gem5 directory, it will be checked as a file, so symlink can be
198        # skipped. If the location is a file outside gem5, we don't want to
199        # check it anyway.
200        if os.path.islink(filename):
201            return True
202        return lang_type(filename) not in self.languages
203
204    def check(self, filename, regions=all_regions):
205        f = self.open(filename, 'r')
206
207        errors = 0
208        for num,line in enumerate(f):
209            if num not in regions:
210                continue
211            if not self.check_line(line):
212                self.write("invalid %s in %s:%d\n" % \
213                               (self.test_name, filename, num + 1))
214                if self.ui.verbose:
215                    self.write(">>%s<<\n" % line[-1])
216                errors += 1
217        return errors
218
219    def fix(self, filename, regions=all_regions):
220        f = self.open(filename, 'r+')
221
222        lines = list(f)
223
224        f.seek(0)
225        f.truncate()
226
227        for i,line in enumerate(lines):
228            if i in regions:
229                line = self.fix_line(line)
230
231            f.write(line)
232        f.close()
233
234    def apply(self, filename, prompt, regions=all_regions):
235        if not self.skip(filename):
236            errors = self.check(filename, regions)
237            if errors:
238                if prompt(filename, self.fix, regions):
239                    return True
240        return False
241
242
243class Whitespace(Verifier):
244    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
245    test_name = 'whitespace'
246    def check_line(self, line):
247        match = lead.search(line)
248        if match and match.group(1).find('\t') != -1:
249            return False
250
251        match = trail.search(line)
252        if match:
253            return False
254
255        return True
256
257    def fix_line(self, line):
258        if lead.search(line):
259            newline = ''
260            for i,c in enumerate(line):
261                if c == ' ':
262                    newline += ' '
263                elif c == '\t':
264                    newline += ' ' * (tabsize - len(newline) % tabsize)
265                else:
266                    newline += line[i:]
267                    break
268
269            line = newline
270
271        return line.rstrip() + '\n'
272
273class SortedIncludes(Verifier):
274    languages = sort_includes.default_languages
275    def __init__(self, *args, **kwargs):
276        super(SortedIncludes, self).__init__(*args, **kwargs)
277        self.sort_includes = sort_includes.SortIncludes()
278
279    def check(self, filename, regions=all_regions):
280        f = self.open(filename, 'r')
281
282        lines = [ l.rstrip('\n') for l in f.xreadlines() ]
283        old = ''.join(line + '\n' for line in lines)
284        f.close()
285
286        if len(lines) == 0:
287            return 0
288
289        language = lang_type(filename, lines[0])
290        sort_lines = list(self.sort_includes(lines, filename, language))
291        new = ''.join(line + '\n' for line in sort_lines)
292
293        mod = modified_regions(old, new)
294        modified = mod & regions
295
296        if modified:
297            self.write("invalid sorting of includes in %s\n" % (filename))
298            if self.ui.verbose:
299                for start, end in modified.regions:
300                    self.write("bad region [%d, %d)\n" % (start, end))
301            return 1
302
303        return 0
304
305    def fix(self, filename, regions=all_regions):
306        f = self.open(filename, 'r+')
307
308        old = f.readlines()
309        lines = [ l.rstrip('\n') for l in old ]
310        language = lang_type(filename, lines[0])
311        sort_lines = list(self.sort_includes(lines, filename, language))
312        new = ''.join(line + '\n' for line in sort_lines)
313
314        f.seek(0)
315        f.truncate()
316
317        for i,line in enumerate(sort_lines):
318            f.write(line)
319            f.write('\n')
320        f.close()
321
322def linelen(line):
323    tabs = line.count('\t')
324    if not tabs:
325        return len(line)
326
327    count = 0
328    for c in line:
329        if c == '\t':
330            count += tabsize - count % tabsize
331        else:
332            count += 1
333
334    return count
335
336class ValidationStats(object):
337    def __init__(self):
338        self.toolong = 0
339        self.toolong80 = 0
340        self.leadtabs = 0
341        self.trailwhite = 0
342        self.badcontrol = 0
343        self.cret = 0
344
345    def dump(self):
346        print '''\
347%d violations of lines over 79 chars. %d of which are 80 chars exactly.
348%d cases of whitespace at the end of a line.
349%d cases of tabs to indent.
350%d bad parens after if/while/for.
351%d carriage returns found.
352''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
353       self.badcontrol, self.cret)
354
355    def __nonzero__(self):
356        return self.toolong or self.toolong80 or self.leadtabs or \
357               self.trailwhite or self.badcontrol or self.cret
358
359def validate(filename, stats, verbose, exit_code):
360    if lang_type(filename) not in format_types:
361        return
362
363    def msg(lineno, line, message):
364        print '%s:%d>' % (filename, lineno + 1), message
365        if verbose > 2:
366            print line
367
368    def bad():
369        if exit_code is not None:
370            sys.exit(exit_code)
371
372    try:
373        f = file(filename, 'r')
374    except OSError:
375        if verbose > 0:
376            print 'could not open file %s' % filename
377        bad()
378        return
379
380    for i,line in enumerate(f):
381        line = line.rstrip('\n')
382
383        # no carriage returns
384        if line.find('\r') != -1:
385            self.cret += 1
386            if verbose > 1:
387                msg(i, line, 'carriage return found')
388            bad()
389
390        # lines max out at 79 chars
391        llen = linelen(line)
392        if llen > 79:
393            stats.toolong += 1
394            if llen == 80:
395                stats.toolong80 += 1
396            if verbose > 1:
397                msg(i, line, 'line too long (%d chars)' % llen)
398            bad()
399
400        # no tabs used to indent
401        match = lead.search(line)
402        if match and match.group(1).find('\t') != -1:
403            stats.leadtabs += 1
404            if verbose > 1:
405                msg(i, line, 'using tabs to indent')
406            bad()
407
408        # no trailing whitespace
409        if trail.search(line):
410            stats.trailwhite +=1
411            if verbose > 1:
412                msg(i, line, 'trailing whitespace')
413            bad()
414
415        # for c++, exactly one space betwen if/while/for and (
416        if cpp:
417            match = any_control.search(line)
418            if match and not good_control.search(line):
419                stats.badcontrol += 1
420                if verbose > 1:
421                    msg(i, line, 'improper spacing after %s' % match.group(1))
422                bad()
423
424
425def do_check_style(hgui, repo, *pats, **opts):
426    """check files for proper m5 style guidelines
427
428    Without an argument, checks all modified and added files for gem5
429    coding style violations. A list of files can be specified to limit
430    the checker to a subset of the repository. The style rules are
431    normally applied on a diff of the repository state (i.e., added
432    files are checked in their entirety while only modifications of
433    modified files are checked).
434
435    The --all option can be specified to include clean files and check
436    modified files in their entirety.
437    """
438    from mercurial import mdiff, util
439
440    opt_fix_white = opts.get('fix_white', False)
441    opt_all = opts.get('all', False)
442    opt_no_ignore = opts.get('no_ignore', False)
443    ui = MercurialUI(hgui, hgui.verbose, opt_fix_white)
444
445    def prompt(name, func, regions=all_regions):
446        result = ui.prompt("(a)bort, (i)gnore, or (f)ix?", 'aif', 'a')
447        if result == 'a':
448            return True
449        elif result == 'f':
450            func(repo.wjoin(name), regions)
451
452        return False
453
454
455    # Import the match (repository file name matching helper)
456    # function. Different versions of Mercurial keep it in different
457    # modules and implement them differently.
458    try:
459        from mercurial import scmutil
460        m = scmutil.match(repo[None], pats, opts)
461    except ImportError:
462        from mercurial import cmdutil
463        m = cmdutil.match(repo, pats, opts)
464
465    modified, added, removed, deleted, unknown, ignore, clean = \
466        repo.status(match=m, clean=opt_all)
467    if not opt_all:
468        try:
469            wctx = repo.workingctx()
470        except:
471            from mercurial import context
472            wctx = context.workingctx(repo)
473
474        files = [ (fn, all_regions) for fn in added ] + \
475            [ (fn,  modregions(wctx, fn)) for fn in modified ]
476    else:
477        files = [ (fn, all_regions) for fn in added + modified + clean ]
478
479    whitespace = Whitespace(ui)
480    sorted_includes = SortedIncludes(ui)
481    for fname, mod_regions in files:
482        if not opt_no_ignore and check_ignores(fname):
483            continue
484
485        fpath = joinpath(repo.root, fname)
486
487        if whitespace.apply(fpath, prompt, mod_regions):
488            return True
489
490        if sorted_includes.apply(fpath, prompt, mod_regions):
491            return True
492
493    return False
494
495def do_check_format(hgui, repo, **args):
496    ui = MercurialUI(hgui, hgui.verbose, auto)
497
498    modified, added, removed, deleted, unknown, ignore, clean = repo.status()
499
500    verbose = 0
501    stats = ValidationStats()
502    for f in modified + added:
503        validate(joinpath(repo.root, f), stats, verbose, None)
504
505    if stats:
506        stats.dump()
507        result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
508                           'ai', 'a')
509        if result == 'a':
510            return True
511
512    return False
513
514def check_hook(hooktype):
515    if hooktype not in ('pretxncommit', 'pre-qrefresh'):
516        raise AttributeError, \
517              "This hook is not meant for %s" % hooktype
518
519def check_style(ui, repo, hooktype, **kwargs):
520    check_hook(hooktype)
521    args = {}
522
523    try:
524        return do_check_style(ui, repo, **args)
525    except Exception, e:
526        import traceback
527        traceback.print_exc()
528        return True
529
530def check_format(ui, repo, hooktype, **kwargs):
531    check_hook(hooktype)
532    args = {}
533
534    try:
535        return do_check_format(ui, repo, **args)
536    except Exception, e:
537        import traceback
538        traceback.print_exc()
539        return True
540
541try:
542    from mercurial.i18n import _
543except ImportError:
544    def _(arg):
545        return arg
546
547cmdtable = {
548    '^m5style' : (
549        do_check_style, [
550            ('w', 'fix-white', False, _("automatically fix whitespace")),
551            ('a', 'all', False,
552             _("include clean files and unmodified parts of modified files")),
553            ('', 'no-ignore', False, _("ignore the style ignore list")),
554            ] +  commands.walkopts,
555        _('hg m5style [-a] [FILE]...')),
556    '^m5format' :
557    ( do_check_format,
558      [ ],
559      _('hg m5format [FILE]...')),
560}
561
562if __name__ == '__main__':
563    import getopt
564
565    progname = sys.argv[0]
566    if len(sys.argv) < 2:
567        sys.exit('usage: %s <command> [<command args>]' % progname)
568
569    fixwhite_usage = '%s fixwhite [-t <tabsize> ] <path> [...] \n' % progname
570    chkformat_usage = '%s chkformat <path> [...] \n' % progname
571    chkwhite_usage = '%s chkwhite <path> [...] \n' % progname
572
573    command = sys.argv[1]
574    if command == 'fixwhite':
575        flags = 't:'
576        usage = fixwhite_usage
577    elif command == 'chkwhite':
578        flags = 'nv'
579        usage = chkwhite_usage
580    elif command == 'chkformat':
581        flags = 'nv'
582        usage = chkformat_usage
583    else:
584        sys.exit(fixwhite_usage + chkwhite_usage + chkformat_usage)
585
586    opts, args = getopt.getopt(sys.argv[2:], flags)
587
588    code = 1
589    verbose = 1
590    for opt,arg in opts:
591        if opt == '-n':
592            code = None
593        if opt == '-t':
594            tabsize = int(arg)
595        if opt == '-v':
596            verbose += 1
597
598    if command == 'fixwhite':
599        for filename in args:
600            fixwhite(filename, tabsize)
601    elif command == 'chkwhite':
602        for filename in args:
603            for line,num in checkwhite(filename):
604                print 'invalid whitespace: %s:%d' % (filename, num)
605                if verbose:
606                    print '>>%s<<' % line[:-1]
607    elif command == 'chkformat':
608        stats = ValidationStats()
609        for filename in args:
610            validate(filename, stats=stats, verbose=verbose, exit_code=code)
611
612        if verbose > 0:
613            stats.dump()
614    else:
615        sys.exit("command '%s' not found" % command)
616