style.py revision 10293:62c95c428a3d
1#! /usr/bin/env python
2# Copyright (c) 2014 ARM Limited
3# All rights reserved
4#
5# The license below extends only to copyright in the software and shall
6# not be construed as granting a license to any other intellectual
7# property including but not limited to intellectual property relating
8# to a hardware implementation of the functionality of the software
9# licensed hereunder.  You may use the software subject to the license
10# terms below provided that you ensure that this notice is replicated
11# unmodified and in its entirety in all distributions of the software,
12# modified or unmodified, in source code or in binary form.
13#
14# Copyright (c) 2006 The Regents of The University of Michigan
15# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
16# All rights reserved.
17#
18# Redistribution and use in source and binary forms, with or without
19# modification, are permitted provided that the following conditions are
20# met: redistributions of source code must retain the above copyright
21# notice, this list of conditions and the following disclaimer;
22# redistributions in binary form must reproduce the above copyright
23# notice, this list of conditions and the following disclaimer in the
24# documentation and/or other materials provided with the distribution;
25# neither the name of the copyright holders nor the names of its
26# contributors may be used to endorse or promote products derived from
27# this software without specific prior written permission.
28#
29# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40#
41# Authors: Nathan Binkert
42
43import heapq
44import os
45import re
46import sys
47
48from os.path import dirname, join as joinpath
49from itertools import count
50from mercurial import bdiff, mdiff, commands
51
52current_dir = dirname(__file__)
53sys.path.insert(0, current_dir)
54sys.path.insert(1, joinpath(dirname(current_dir), 'src', 'python'))
55
56from m5.util import neg_inf, pos_inf, Region, Regions
57import sort_includes
58from file_types import lang_type
59
60all_regions = Regions(Region(neg_inf, pos_inf))
61
62tabsize = 8
63lead = re.compile(r'^([ \t]+)')
64trail = re.compile(r'([ \t]+)$')
65any_control = re.compile(r'\b(if|while|for)[ \t]*[(]')
66good_control = re.compile(r'\b(if|while|for) [(]')
67
68format_types = set(('C', 'C++'))
69
70def modified_regions(old_data, new_data):
71    regions = Regions()
72    beg = None
73    for pbeg, pend, fbeg, fend in bdiff.blocks(old_data, new_data):
74        if beg is not None and beg != fbeg:
75            regions.append(beg, fbeg)
76        beg = fend
77    return regions
78
79def modregions(wctx, fname):
80    fctx = wctx.filectx(fname)
81    pctx = fctx.parents()
82
83    file_data = fctx.data()
84    lines = mdiff.splitnewlines(file_data)
85    if len(pctx) in (1, 2):
86        mod_regions = modified_regions(pctx[0].data(), file_data)
87        if len(pctx) == 2:
88            m2 = modified_regions(pctx[1].data(), file_data)
89            # only the lines that are new in both
90            mod_regions &= m2
91    else:
92        mod_regions = Regions()
93        mod_regions.append(0, len(lines))
94
95    return mod_regions
96
97class UserInterface(object):
98    def __init__(self, verbose=False, auto=False):
99        self.auto = auto
100        self.verbose = verbose
101
102    def prompt(self, prompt, results, default):
103        if self.auto:
104            return self.auto
105
106        while True:
107            result = self.do_prompt(prompt, results, default)
108            if result in results:
109                return result
110
111class MercurialUI(UserInterface):
112    def __init__(self, ui, *args, **kwargs):
113        super(MercurialUI, self).__init__(*args, **kwargs)
114        self.ui = ui
115
116    def do_prompt(self, prompt, results, default):
117        return self.ui.prompt(prompt, default=default)
118
119    def write(self, string):
120        self.ui.write(string)
121
122class StdioUI(UserInterface):
123    def do_prompt(self, prompt, results, default):
124        return raw_input(prompt) or default
125
126    def write(self, string):
127        sys.stdout.write(string)
128
129class Verifier(object):
130    def __init__(self, ui, repo=None):
131        self.ui = ui
132        self.repo = repo
133        if repo is None:
134            self.wctx = None
135
136    def __getattr__(self, attr):
137        if attr in ('prompt', 'write'):
138            return getattr(self.ui, attr)
139
140        if attr == 'wctx':
141            try:
142                wctx = repo.workingctx()
143            except:
144                from mercurial import context
145                wctx = context.workingctx(repo)
146            self.wctx = wctx
147            return wctx
148
149        raise AttributeError
150
151    def open(self, filename, mode):
152        if self.repo:
153            filename = self.repo.wjoin(filename)
154
155        try:
156            f = file(filename, mode)
157        except OSError, msg:
158            print 'could not open file %s: %s' % (filename, msg)
159            return None
160
161        return f
162
163    def skip(self, filename):
164        # We never want to handle symlinks, so always skip them: If the location
165        # pointed to is a directory, skip it. If the location is a file inside
166        # the gem5 directory, it will be checked as a file, so symlink can be
167        # skipped. If the location is a file outside gem5, we don't want to
168        # check it anyway.
169        if os.path.islink(filename):
170            return True
171        return lang_type(filename) not in self.languages
172
173    def check(self, filename, regions=all_regions):
174        f = self.open(filename, 'r')
175
176        errors = 0
177        for num,line in enumerate(f):
178            if num not in regions:
179                continue
180            if not self.check_line(line):
181                self.write("invalid %s in %s:%d\n" % \
182                               (self.test_name, filename, num + 1))
183                if self.ui.verbose:
184                    self.write(">>%s<<\n" % line[-1])
185                errors += 1
186        return errors
187
188    def fix(self, filename, regions=all_regions):
189        f = self.open(filename, 'r+')
190
191        lines = list(f)
192
193        f.seek(0)
194        f.truncate()
195
196        for i,line in enumerate(lines):
197            if i in regions:
198                line = self.fix_line(line)
199
200            f.write(line)
201        f.close()
202
203    def apply(self, filename, prompt, regions=all_regions):
204        if not self.skip(filename):
205            errors = self.check(filename, regions)
206            if errors:
207                if prompt(filename, self.fix, regions):
208                    return True
209        return False
210
211
212class Whitespace(Verifier):
213    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
214    test_name = 'whitespace'
215    def check_line(self, line):
216        match = lead.search(line)
217        if match and match.group(1).find('\t') != -1:
218            return False
219
220        match = trail.search(line)
221        if match:
222            return False
223
224        return True
225
226    def fix_line(self, line):
227        if lead.search(line):
228            newline = ''
229            for i,c in enumerate(line):
230                if c == ' ':
231                    newline += ' '
232                elif c == '\t':
233                    newline += ' ' * (tabsize - len(newline) % tabsize)
234                else:
235                    newline += line[i:]
236                    break
237
238            line = newline
239
240        return line.rstrip() + '\n'
241
242class SortedIncludes(Verifier):
243    languages = sort_includes.default_languages
244    def __init__(self, *args, **kwargs):
245        super(SortedIncludes, self).__init__(*args, **kwargs)
246        self.sort_includes = sort_includes.SortIncludes()
247
248    def check(self, filename, regions=all_regions):
249        f = self.open(filename, 'r')
250
251        lines = [ l.rstrip('\n') for l in f.xreadlines() ]
252        old = ''.join(line + '\n' for line in lines)
253        f.close()
254
255        if len(lines) == 0:
256            return 0
257
258        language = lang_type(filename, lines[0])
259        sort_lines = list(self.sort_includes(lines, filename, language))
260        new = ''.join(line + '\n' for line in sort_lines)
261
262        mod = modified_regions(old, new)
263        modified = mod & regions
264
265        if modified:
266            self.write("invalid sorting of includes in %s\n" % (filename))
267            if self.ui.verbose:
268                for start, end in modified.regions:
269                    self.write("bad region [%d, %d)\n" % (start, end))
270            return 1
271
272        return 0
273
274    def fix(self, filename, regions=all_regions):
275        f = self.open(filename, 'r+')
276
277        old = f.readlines()
278        lines = [ l.rstrip('\n') for l in old ]
279        language = lang_type(filename, lines[0])
280        sort_lines = list(self.sort_includes(lines, filename, language))
281        new = ''.join(line + '\n' for line in sort_lines)
282
283        f.seek(0)
284        f.truncate()
285
286        for i,line in enumerate(sort_lines):
287            f.write(line)
288            f.write('\n')
289        f.close()
290
291def linelen(line):
292    tabs = line.count('\t')
293    if not tabs:
294        return len(line)
295
296    count = 0
297    for c in line:
298        if c == '\t':
299            count += tabsize - count % tabsize
300        else:
301            count += 1
302
303    return count
304
305class ValidationStats(object):
306    def __init__(self):
307        self.toolong = 0
308        self.toolong80 = 0
309        self.leadtabs = 0
310        self.trailwhite = 0
311        self.badcontrol = 0
312        self.cret = 0
313
314    def dump(self):
315        print '''\
316%d violations of lines over 79 chars. %d of which are 80 chars exactly.
317%d cases of whitespace at the end of a line.
318%d cases of tabs to indent.
319%d bad parens after if/while/for.
320%d carriage returns found.
321''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
322       self.badcontrol, self.cret)
323
324    def __nonzero__(self):
325        return self.toolong or self.toolong80 or self.leadtabs or \
326               self.trailwhite or self.badcontrol or self.cret
327
328def validate(filename, stats, verbose, exit_code):
329    if lang_type(filename) not in format_types:
330        return
331
332    def msg(lineno, line, message):
333        print '%s:%d>' % (filename, lineno + 1), message
334        if verbose > 2:
335            print line
336
337    def bad():
338        if exit_code is not None:
339            sys.exit(exit_code)
340
341    try:
342        f = file(filename, 'r')
343    except OSError:
344        if verbose > 0:
345            print 'could not open file %s' % filename
346        bad()
347        return
348
349    for i,line in enumerate(f):
350        line = line.rstrip('\n')
351
352        # no carriage returns
353        if line.find('\r') != -1:
354            self.cret += 1
355            if verbose > 1:
356                msg(i, line, 'carriage return found')
357            bad()
358
359        # lines max out at 79 chars
360        llen = linelen(line)
361        if llen > 79:
362            stats.toolong += 1
363            if llen == 80:
364                stats.toolong80 += 1
365            if verbose > 1:
366                msg(i, line, 'line too long (%d chars)' % llen)
367            bad()
368
369        # no tabs used to indent
370        match = lead.search(line)
371        if match and match.group(1).find('\t') != -1:
372            stats.leadtabs += 1
373            if verbose > 1:
374                msg(i, line, 'using tabs to indent')
375            bad()
376
377        # no trailing whitespace
378        if trail.search(line):
379            stats.trailwhite +=1
380            if verbose > 1:
381                msg(i, line, 'trailing whitespace')
382            bad()
383
384        # for c++, exactly one space betwen if/while/for and (
385        if cpp:
386            match = any_control.search(line)
387            if match and not good_control.search(line):
388                stats.badcontrol += 1
389                if verbose > 1:
390                    msg(i, line, 'improper spacing after %s' % match.group(1))
391                bad()
392
393
394def do_check_style(hgui, repo, *pats, **opts):
395    """check files for proper m5 style guidelines
396
397    Without an argument, checks all modified and added files for gem5
398    coding style violations. A list of files can be specified to limit
399    the checker to a subset of the repository. The style rules are
400    normally applied on a diff of the repository state (i.e., added
401    files are checked in their entirety while only modifications of
402    modified files are checked).
403
404    The --all option can be specified to include clean files and check
405    modified files in their entirety.
406    """
407    from mercurial import mdiff, util
408
409    opt_fix_white = opts.get('fix_white', False)
410    opt_all = opts.get('all', False)
411    ui = MercurialUI(hgui, hgui.verbose, opt_fix_white)
412
413    def prompt(name, func, regions=all_regions):
414        result = ui.prompt("(a)bort, (i)gnore, or (f)ix?", 'aif', 'a')
415        if result == 'a':
416            return True
417        elif result == 'f':
418            func(repo.wjoin(name), regions)
419
420        return False
421
422
423    # Import the match (repository file name matching helper)
424    # function. Different versions of Mercurial keep it in different
425    # modules and implement them differently.
426    try:
427        from mercurial import scmutil
428        m = scmutil.match(repo[None], pats, opts)
429    except ImportError:
430        from mercurial import cmdutil
431        m = cmdutil.match(repo, pats, opts)
432
433    modified, added, removed, deleted, unknown, ignore, clean = \
434        repo.status(match=m, clean=opt_all)
435    if not opt_all:
436        try:
437            wctx = repo.workingctx()
438        except:
439            from mercurial import context
440            wctx = context.workingctx(repo)
441
442        files = [ (fn, all_regions) for fn in added ] + \
443            [ (fn,  modregions(wctx, fn)) for fn in modified ]
444    else:
445        files = [ (fn, all_regions) for fn in added + modified + clean ]
446
447    whitespace = Whitespace(ui)
448    sorted_includes = SortedIncludes(ui)
449    for fname, mod_regions in files:
450        fpath = joinpath(repo.root, fname)
451
452        if whitespace.apply(fpath, prompt, mod_regions):
453            return True
454
455        if sorted_includes.apply(fpath, prompt, mod_regions):
456            return True
457
458    return False
459
460def do_check_format(hgui, repo, **args):
461    ui = MercurialUI(hgui, hgui.verbose, auto)
462
463    modified, added, removed, deleted, unknown, ignore, clean = repo.status()
464
465    verbose = 0
466    stats = ValidationStats()
467    for f in modified + added:
468        validate(joinpath(repo.root, f), stats, verbose, None)
469
470    if stats:
471        stats.dump()
472        result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
473                           'ai', 'a')
474        if result == 'a':
475            return True
476
477    return False
478
479def check_hook(hooktype):
480    if hooktype not in ('pretxncommit', 'pre-qrefresh'):
481        raise AttributeError, \
482              "This hook is not meant for %s" % hooktype
483
484def check_style(ui, repo, hooktype, **kwargs):
485    check_hook(hooktype)
486    args = {}
487
488    try:
489        return do_check_style(ui, repo, **args)
490    except Exception, e:
491        import traceback
492        traceback.print_exc()
493        return True
494
495def check_format(ui, repo, hooktype, **kwargs):
496    check_hook(hooktype)
497    args = {}
498
499    try:
500        return do_check_format(ui, repo, **args)
501    except Exception, e:
502        import traceback
503        traceback.print_exc()
504        return True
505
506try:
507    from mercurial.i18n import _
508except ImportError:
509    def _(arg):
510        return arg
511
512cmdtable = {
513    '^m5style' : (
514        do_check_style, [
515            ('w', 'fix-white', False, _("automatically fix whitespace")),
516            ('a', 'all', False,
517             _("include clean files and unmodified parts of modified files")),
518            ] +  commands.walkopts,
519        _('hg m5style [-a] [FILE]...')),
520    '^m5format' :
521    ( do_check_format,
522      [ ],
523      _('hg m5format [FILE]...')),
524}
525
526if __name__ == '__main__':
527    import getopt
528
529    progname = sys.argv[0]
530    if len(sys.argv) < 2:
531        sys.exit('usage: %s <command> [<command args>]' % progname)
532
533    fixwhite_usage = '%s fixwhite [-t <tabsize> ] <path> [...] \n' % progname
534    chkformat_usage = '%s chkformat <path> [...] \n' % progname
535    chkwhite_usage = '%s chkwhite <path> [...] \n' % progname
536
537    command = sys.argv[1]
538    if command == 'fixwhite':
539        flags = 't:'
540        usage = fixwhite_usage
541    elif command == 'chkwhite':
542        flags = 'nv'
543        usage = chkwhite_usage
544    elif command == 'chkformat':
545        flags = 'nv'
546        usage = chkformat_usage
547    else:
548        sys.exit(fixwhite_usage + chkwhite_usage + chkformat_usage)
549
550    opts, args = getopt.getopt(sys.argv[2:], flags)
551
552    code = 1
553    verbose = 1
554    for opt,arg in opts:
555        if opt == '-n':
556            code = None
557        if opt == '-t':
558            tabsize = int(arg)
559        if opt == '-v':
560            verbose += 1
561
562    if command == 'fixwhite':
563        for filename in args:
564            fixwhite(filename, tabsize)
565    elif command == 'chkwhite':
566        for filename in args:
567            for line,num in checkwhite(filename):
568                print 'invalid whitespace: %s:%d' % (filename, num)
569                if verbose:
570                    print '>>%s<<' % line[:-1]
571    elif command == 'chkformat':
572        stats = ValidationStats()
573        for filename in args:
574            validate(filename, stats=stats, verbose=verbose, exit_code=code)
575
576        if verbose > 0:
577            stats.dump()
578    else:
579        sys.exit("command '%s' not found" % command)
580