gem5/util/find_copyrights.py

13540Sandrea.mondelli@ucf.edu#!/usr/bin/env python2.7
8333Snate@binkert.org
8333Snate@binkert.orgimport os
8333Snate@binkert.orgimport re
8333Snate@binkert.orgimport sys
8333Snate@binkert.org
8333Snate@binkert.orgfrom file_types import lang_type, find_files
8333Snate@binkert.org
8333Snate@binkert.orgmode_line = re.compile('(-\*- *mode:.* *-\*-)')
8333Snate@binkert.orgshell_comment = re.compile(r'^\s*#')
8333Snate@binkert.orglisp_comment = re.compile(r';')
8333Snate@binkert.orgcpp_comment = re.compile(r'//')
8333Snate@binkert.orgc_comment_start = re.compile(r'/\*')
8333Snate@binkert.orgc_comment_end   = re.compile(r'\*/')
8333Snate@binkert.orgdef find_copyright_block(lines, lang_type):
8333Snate@binkert.org    start = None
8333Snate@binkert.org    if lang_type in ('python', 'make', 'shell', 'perl', 'scons'):
8333Snate@binkert.org        for i,line in enumerate(lines):
8333Snate@binkert.org            if i == 0 and (line.startswith('#!') or mode_line.search(line)):
8333Snate@binkert.org                continue
8333Snate@binkert.org
8333Snate@binkert.org            if shell_comment.search(line):
8333Snate@binkert.org                if start is None:
8333Snate@binkert.org                    start = i
8333Snate@binkert.org            elif start is None:
8333Snate@binkert.org                if line.strip():
8333Snate@binkert.org                    return
8333Snate@binkert.org            else:
8333Snate@binkert.org                yield start, i-1
8333Snate@binkert.org                start = None
8333Snate@binkert.org
8333Snate@binkert.org    elif lang_type in ('lisp', ):
8333Snate@binkert.org        for i,line in enumerate(lines):
8333Snate@binkert.org            if i == 0 and mode_line.search(line):
8333Snate@binkert.org                continue
8333Snate@binkert.org
8333Snate@binkert.org            if lisp_comment.search(line):
8333Snate@binkert.org                if start is None:
8333Snate@binkert.org                    start = i
8333Snate@binkert.org            elif start is None:
8333Snate@binkert.org                if line.strip():
8333Snate@binkert.org                    return
8333Snate@binkert.org            else:
8333Snate@binkert.org                yield start, i-1
8333Snate@binkert.org                start = None
8333Snate@binkert.org
8333Snate@binkert.org    elif lang_type in ('C', 'C++', 'swig', 'isa', 'asm', 'slicc',
8333Snate@binkert.org                       'lex', 'yacc'):
8333Snate@binkert.org        mode = None
8333Snate@binkert.org        for i,line in enumerate(lines):
8333Snate@binkert.org            if i == 0 and mode_line.search(line):
8333Snate@binkert.org                continue
8333Snate@binkert.org
8333Snate@binkert.org            if mode == 'C':
8333Snate@binkert.org                assert start is not None, 'on line %d' % (i + 1)
8333Snate@binkert.org                match = c_comment_end.search(line)
8333Snate@binkert.org                if match:
8333Snate@binkert.org                    yield start, i
8333Snate@binkert.org                    mode = None
8333Snate@binkert.org                continue
8333Snate@binkert.org
8333Snate@binkert.org            cpp_match = cpp_comment.search(line)
8333Snate@binkert.org            c_match = c_comment_start.search(line)
8333Snate@binkert.org
8333Snate@binkert.org            if cpp_match:
8333Snate@binkert.org                assert not c_match, 'on line %d' % (i + 1)
8333Snate@binkert.org                if line[:cpp_match.start()].strip():
8333Snate@binkert.org                    return
8333Snate@binkert.org                if mode is None:
8333Snate@binkert.org                    mode = 'CPP'
8333Snate@binkert.org                    start = i
8333Snate@binkert.org                else:
8333Snate@binkert.org                    text = line[cpp_match.end():].lstrip()
8333Snate@binkert.org                    if text.startswith("Copyright") > 0:
8333Snate@binkert.org                        yield start, i-1
8333Snate@binkert.org                        start = i
8333Snate@binkert.org                continue
8333Snate@binkert.org            elif mode == 'CPP':
8333Snate@binkert.org                assert start is not None, 'on line %d' % (i + 1)
8333Snate@binkert.org                if not line.strip():
8333Snate@binkert.org                    continue
8333Snate@binkert.org                yield start, i-1
8333Snate@binkert.org                mode = None
8333Snate@binkert.org                if not c_match:
8333Snate@binkert.org                    return
8333Snate@binkert.org
8333Snate@binkert.org            if c_match:
8333Snate@binkert.org                assert mode is None, 'on line %d' % (i + 1)
8333Snate@binkert.org                mode = 'C'
8333Snate@binkert.org                start = i
8333Snate@binkert.org
8333Snate@binkert.org            if mode is None and line.strip():
8333Snate@binkert.org                return
8333Snate@binkert.org
8333Snate@binkert.org    else:
8333Snate@binkert.org        raise AttributeError, "Could not handle language %s" % lang_type
8333Snate@binkert.org
8333Snate@binkert.orgdate_range_re = re.compile(r'([0-9]{4})\s*-\s*([0-9]{4})')
8333Snate@binkert.orgdef process_dates(dates):
8333Snate@binkert.org    dates = [ d.strip() for d in dates.split(',') ]
8333Snate@binkert.org
8333Snate@binkert.org    output = set()
8333Snate@binkert.org    for date in dates:
8333Snate@binkert.org        match = date_range_re.match(date)
8333Snate@binkert.org        if match:
8333Snate@binkert.org            f,l = [ int(d) for d in match.groups() ]
8333Snate@binkert.org            for i in xrange(f, l+1):
8333Snate@binkert.org                output.add(i)
8333Snate@binkert.org        else:
8333Snate@binkert.org            try:
8333Snate@binkert.org                date = int(date)
8333Snate@binkert.org                output.add(date)
8333Snate@binkert.org            except ValueError:
8333Snate@binkert.org                pass
8333Snate@binkert.org
8333Snate@binkert.org    return output
8333Snate@binkert.org
8333Snate@binkert.orgcopyright_re = \
8333Snate@binkert.org    re.compile(r'Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)',
8333Snate@binkert.org               re.DOTALL)
8333Snate@binkert.org
8333Snate@binkert.orgauthors_re = re.compile(r'^[\s*#/]*Authors:\s*([A-z .]+)\s*$')
8333Snate@binkert.orgmore_authors_re = re.compile(r'^[\s*#/]*([A-z .]+)\s*$')
8333Snate@binkert.org
8333Snate@binkert.orgall_owners = set()
8333Snate@binkert.orgdef get_data(lang_type, lines):
8333Snate@binkert.org    data = []
8333Snate@binkert.org    last = None
8333Snate@binkert.org    for start,end in find_copyright_block(lines, lang_type):
8333Snate@binkert.org        joined = ''.join(lines[start:end+1])
8333Snate@binkert.org        match = copyright_re.search(joined)
8333Snate@binkert.org        if not match:
8333Snate@binkert.org            continue
8333Snate@binkert.org
8333Snate@binkert.org        c,dates,owner = match.groups()
8333Snate@binkert.org        dates = dates.strip()
8333Snate@binkert.org        owner = owner.strip()
8333Snate@binkert.org
8333Snate@binkert.org        all_owners.add(owner)
8333Snate@binkert.org        try:
8333Snate@binkert.org            dates = process_dates(dates)
8333Snate@binkert.org        except Exception:
8333Snate@binkert.org            print dates
8333Snate@binkert.org            print owner
8333Snate@binkert.org            raise
8333Snate@binkert.org
8333Snate@binkert.org        authors = []
8333Snate@binkert.org        for i in xrange(start,end+1):
8333Snate@binkert.org            line = lines[i]
8333Snate@binkert.org            if not authors:
8333Snate@binkert.org                match = authors_re.search(line)
8333Snate@binkert.org                if match:
8333Snate@binkert.org                    authors.append(match.group(1).strip())
8333Snate@binkert.org            else:
8333Snate@binkert.org                match = more_authors_re.search(line)
8333Snate@binkert.org                if not match:
8333Snate@binkert.org                    for j in xrange(i, end+1):
8333Snate@binkert.org                        line = lines[j].strip()
8333Snate@binkert.org                        if not line:
8333Snate@binkert.org                            end = j
8333Snate@binkert.org                            break
8333Snate@binkert.org                        if line.startswith('//'):
8333Snate@binkert.org                            line = line[2:].lstrip()
8333Snate@binkert.org                            if line:
8333Snate@binkert.org                                end = j - 1
8333Snate@binkert.org                                break
8333Snate@binkert.org                    break
8333Snate@binkert.org                authors.append(match.group(1).strip())
8333Snate@binkert.org
8333Snate@binkert.org        info = (owner, dates, authors, start, end)
8333Snate@binkert.org        data.append(info)
8333Snate@binkert.org
8333Snate@binkert.org    return data
8333Snate@binkert.org
8333Snate@binkert.orgdef datestr(dates):
8333Snate@binkert.org    dates = list(dates)
8333Snate@binkert.org    dates.sort()
8333Snate@binkert.org
8333Snate@binkert.org    output = []
8333Snate@binkert.org    def add_output(first, second):
8333Snate@binkert.org        if first == second:
8333Snate@binkert.org            output.append('%d' % (first))
8333Snate@binkert.org        else:
8333Snate@binkert.org            output.append('%d-%d' % (first, second))
8333Snate@binkert.org
8333Snate@binkert.org    first = dates.pop(0)
8333Snate@binkert.org    second = first
8333Snate@binkert.org    while dates:
8333Snate@binkert.org        next = dates.pop(0)
8333Snate@binkert.org        if next == second + 1:
8333Snate@binkert.org            second = next
8333Snate@binkert.org        else:
8333Snate@binkert.org            add_output(first, second)
8333Snate@binkert.org            first = next
8333Snate@binkert.org            second = next
8333Snate@binkert.org
8333Snate@binkert.org    add_output(first, second)
8333Snate@binkert.org
8333Snate@binkert.org    return ','.join(output)
8333Snate@binkert.org
8333Snate@binkert.orgusage_str = """usage:
8333Snate@binkert.org%s [-v] <directory>"""
8333Snate@binkert.org
8333Snate@binkert.orgdef usage(exitcode):
8333Snate@binkert.org    print usage_str % sys.argv[0]
8333Snate@binkert.org    if exitcode is not None:
8333Snate@binkert.org        sys.exit(exitcode)
8333Snate@binkert.org
8333Snate@binkert.orgif __name__ == '__main__':
8333Snate@binkert.org    import getopt
8333Snate@binkert.org
8333Snate@binkert.org    show_counts = False
8333Snate@binkert.org    ignore = set()
8333Snate@binkert.org    verbose = False
8333Snate@binkert.org    try:
8333Snate@binkert.org        opts, args = getopt.getopt(sys.argv[1:], "ci:v")
8333Snate@binkert.org    except getopt.GetoptError:
8333Snate@binkert.org        usage(1)
8333Snate@binkert.org
8333Snate@binkert.org    for o,a in opts:
8333Snate@binkert.org        if o == '-c':
8333Snate@binkert.org            show_counts = True
8333Snate@binkert.org        if o == '-i':
8333Snate@binkert.org            ignore.add(a)
8333Snate@binkert.org        if o == '-v':
8333Snate@binkert.org            verbose = True
8333Snate@binkert.org
8333Snate@binkert.org    files = []
8333Snate@binkert.org
8333Snate@binkert.org    for base in args:
8333Snate@binkert.org        if os.path.isfile(base):
8333Snate@binkert.org            files += [ (base, lang_type(base)) ]
8333Snate@binkert.org        elif os.path.isdir(base):
8333Snate@binkert.org            files += find_files(base)
8333Snate@binkert.org        else:
8333Snate@binkert.org            raise AttributeError, "can't access '%s'" %  base
8333Snate@binkert.org
8333Snate@binkert.org    copyrights = {}
8333Snate@binkert.org    counts = {}
8333Snate@binkert.org
8333Snate@binkert.org    for filename, lang in files:
8333Snate@binkert.org        f = file(filename, 'r')
8333Snate@binkert.org        lines = f.readlines()
8333Snate@binkert.org        if not lines:
8333Snate@binkert.org            continue
8333Snate@binkert.org
8333Snate@binkert.org        lines = [ line.rstrip('\r\n') for line in lines ]
8333Snate@binkert.org
8333Snate@binkert.org        lt = lang_type(filename, lines[0])
8333Snate@binkert.org        try:
8333Snate@binkert.org            data = get_data(lt, lines)
8333Snate@binkert.org        except Exception, e:
8333Snate@binkert.org            if verbose:
8333Snate@binkert.org                if len(e.args) == 1:
8333Snate@binkert.org                    e.args = ('%s (%s))' % (e, filename), )
8333Snate@binkert.org                print "could not parse %s: %s" % (filename, e)
8333Snate@binkert.org            continue
8333Snate@binkert.org
8333Snate@binkert.org        for owner, dates, authors, start, end in data:
8333Snate@binkert.org            if owner not in copyrights:
8333Snate@binkert.org                copyrights[owner] = set()
8333Snate@binkert.org            if owner not in counts:
8333Snate@binkert.org                counts[owner] = 0
8333Snate@binkert.org
8333Snate@binkert.org            copyrights[owner] |= dates
8333Snate@binkert.org            counts[owner] += 1
8333Snate@binkert.org
8333Snate@binkert.org    info = [ (counts[o], d, o) for o,d in copyrights.items() ]
8333Snate@binkert.org
8333Snate@binkert.org    for count,dates,owner in sorted(info, reverse=True):
8333Snate@binkert.org        if show_counts:
8333Snate@binkert.org            owner = '%s (%s files)' % (owner, count)
8333Snate@binkert.org        print 'Copyright (c) %s %s' % (datestr(dates), owner)