find_copyrights.py revision 8333
111308Santhony.gutierrez@amd.com#!/usr/bin/env python
212697Santhony.gutierrez@amd.com
311308Santhony.gutierrez@amd.comimport os
411308Santhony.gutierrez@amd.comimport re
511308Santhony.gutierrez@amd.comimport sys
611308Santhony.gutierrez@amd.com
711308Santhony.gutierrez@amd.comfrom file_types import lang_type, find_files
811308Santhony.gutierrez@amd.com
911308Santhony.gutierrez@amd.commode_line = re.compile('(-\*- *mode:.* *-\*-)')
1011308Santhony.gutierrez@amd.comshell_comment = re.compile(r'^\s*#')
1111308Santhony.gutierrez@amd.comlisp_comment = re.compile(r';')
1211308Santhony.gutierrez@amd.comcpp_comment = re.compile(r'//')
1311308Santhony.gutierrez@amd.comc_comment_start = re.compile(r'/\*')
1411308Santhony.gutierrez@amd.comc_comment_end   = re.compile(r'\*/')
1511308Santhony.gutierrez@amd.comdef find_copyright_block(lines, lang_type):
1611308Santhony.gutierrez@amd.com    start = None
1712697Santhony.gutierrez@amd.com    if lang_type in ('python', 'make', 'shell', 'perl', 'scons'):
1812697Santhony.gutierrez@amd.com        for i,line in enumerate(lines):
1912697Santhony.gutierrez@amd.com            if i == 0 and (line.startswith('#!') or mode_line.search(line)):
2011308Santhony.gutierrez@amd.com                continue
2111308Santhony.gutierrez@amd.com
2211308Santhony.gutierrez@amd.com            if shell_comment.search(line):
2311308Santhony.gutierrez@amd.com                if start is None:
2411308Santhony.gutierrez@amd.com                    start = i
2511308Santhony.gutierrez@amd.com            elif start is None:
2611308Santhony.gutierrez@amd.com                if line.strip():
2711308Santhony.gutierrez@amd.com                    return
2811308Santhony.gutierrez@amd.com            else:
2911308Santhony.gutierrez@amd.com                yield start, i-1
3011308Santhony.gutierrez@amd.com                start = None
3111308Santhony.gutierrez@amd.com
3211308Santhony.gutierrez@amd.com    elif lang_type in ('lisp', ):
3312697Santhony.gutierrez@amd.com        for i,line in enumerate(lines):
3412697Santhony.gutierrez@amd.com            if i == 0 and mode_line.search(line):
3512697Santhony.gutierrez@amd.com                continue
3611308Santhony.gutierrez@amd.com
3711308Santhony.gutierrez@amd.com            if lisp_comment.search(line):
3811308Santhony.gutierrez@amd.com                if start is None:
3911308Santhony.gutierrez@amd.com                    start = i
4011308Santhony.gutierrez@amd.com            elif start is None:
4111308Santhony.gutierrez@amd.com                if line.strip():
4211308Santhony.gutierrez@amd.com                    return
4311308Santhony.gutierrez@amd.com            else:
4411308Santhony.gutierrez@amd.com                yield start, i-1
4511308Santhony.gutierrez@amd.com                start = None
4611308Santhony.gutierrez@amd.com
4711308Santhony.gutierrez@amd.com    elif lang_type in ('C', 'C++', 'swig', 'isa', 'asm', 'slicc',
4811308Santhony.gutierrez@amd.com                       'lex', 'yacc'):
4911308Santhony.gutierrez@amd.com        mode = None
5011308Santhony.gutierrez@amd.com        for i,line in enumerate(lines):
5111308Santhony.gutierrez@amd.com            if i == 0 and mode_line.search(line):
5212680Sgiacomo.travaglini@arm.com                continue
5311308Santhony.gutierrez@amd.com
5411308Santhony.gutierrez@amd.com            if mode == 'C':
5512126Sspwilson2@wisc.edu                assert start is not None, 'on line %d' % (i + 1)
5612126Sspwilson2@wisc.edu                match = c_comment_end.search(line)
5712126Sspwilson2@wisc.edu                if match:
5811308Santhony.gutierrez@amd.com                    yield start, i
5911308Santhony.gutierrez@amd.com                    mode = None
6011308Santhony.gutierrez@amd.com                continue
6111308Santhony.gutierrez@amd.com
6211308Santhony.gutierrez@amd.com            cpp_match = cpp_comment.search(line)
6311308Santhony.gutierrez@amd.com            c_match = c_comment_start.search(line)
6411308Santhony.gutierrez@amd.com
6511308Santhony.gutierrez@amd.com            if cpp_match:
6611308Santhony.gutierrez@amd.com                assert not c_match, 'on line %d' % (i + 1)
6711308Santhony.gutierrez@amd.com                if line[:cpp_match.start()].strip():
6811308Santhony.gutierrez@amd.com                    return
6911308Santhony.gutierrez@amd.com                if mode is None:
7011308Santhony.gutierrez@amd.com                    mode = 'CPP'
7111308Santhony.gutierrez@amd.com                    start = i
7211308Santhony.gutierrez@amd.com                else:
7311308Santhony.gutierrez@amd.com                    text = line[cpp_match.end():].lstrip()
7411308Santhony.gutierrez@amd.com                    if text.startswith("Copyright") > 0:
7511308Santhony.gutierrez@amd.com                        yield start, i-1
7611308Santhony.gutierrez@amd.com                        start = i
7711308Santhony.gutierrez@amd.com                continue
7811308Santhony.gutierrez@amd.com            elif mode == 'CPP':
7911308Santhony.gutierrez@amd.com                assert start is not None, 'on line %d' % (i + 1)
8011308Santhony.gutierrez@amd.com                if not line.strip():
8111308Santhony.gutierrez@amd.com                    continue
8211308Santhony.gutierrez@amd.com                yield start, i-1
8311308Santhony.gutierrez@amd.com                mode = None
8411308Santhony.gutierrez@amd.com                if not c_match:
8511308Santhony.gutierrez@amd.com                    return
8611308Santhony.gutierrez@amd.com
8711308Santhony.gutierrez@amd.com            if c_match:
8811308Santhony.gutierrez@amd.com                assert mode is None, 'on line %d' % (i + 1)
8911308Santhony.gutierrez@amd.com                mode = 'C'
9011308Santhony.gutierrez@amd.com                start = i
9111308Santhony.gutierrez@amd.com
9211308Santhony.gutierrez@amd.com            if mode is None and line.strip():
9311308Santhony.gutierrez@amd.com                return
9411308Santhony.gutierrez@amd.com
9511308Santhony.gutierrez@amd.com    else:
9611308Santhony.gutierrez@amd.com        raise AttributeError, "Could not handle language %s" % lang_type
9711308Santhony.gutierrez@amd.com
9811308Santhony.gutierrez@amd.comdate_range_re = re.compile(r'([0-9]{4})\s*-\s*([0-9]{4})')
9911308Santhony.gutierrez@amd.comdef process_dates(dates):
10011308Santhony.gutierrez@amd.com    dates = [ d.strip() for d in dates.split(',') ]
10111308Santhony.gutierrez@amd.com
10211308Santhony.gutierrez@amd.com    output = set()
10311308Santhony.gutierrez@amd.com    for date in dates:
10411308Santhony.gutierrez@amd.com        match = date_range_re.match(date)
10511308Santhony.gutierrez@amd.com        if match:
10611308Santhony.gutierrez@amd.com            f,l = [ int(d) for d in match.groups() ]
10711308Santhony.gutierrez@amd.com            for i in xrange(f, l+1):
10811308Santhony.gutierrez@amd.com                output.add(i)
10911308Santhony.gutierrez@amd.com        else:
11011308Santhony.gutierrez@amd.com            try:
11111308Santhony.gutierrez@amd.com                date = int(date)
11211308Santhony.gutierrez@amd.com                output.add(date)
11311308Santhony.gutierrez@amd.com            except ValueError:
11411308Santhony.gutierrez@amd.com                pass
11511308Santhony.gutierrez@amd.com
11611308Santhony.gutierrez@amd.com    return output
11711308Santhony.gutierrez@amd.com
11811308Santhony.gutierrez@amd.comcopyright_re = \
11911308Santhony.gutierrez@amd.com    re.compile(r'Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)',
12011308Santhony.gutierrez@amd.com               re.DOTALL)
12111308Santhony.gutierrez@amd.com
12211308Santhony.gutierrez@amd.comauthors_re = re.compile(r'^[\s*#/]*Authors:\s*([A-z .]+)\s*$')
12311308Santhony.gutierrez@amd.commore_authors_re = re.compile(r'^[\s*#/]*([A-z .]+)\s*$')
12411308Santhony.gutierrez@amd.com
12511308Santhony.gutierrez@amd.comall_owners = set()
12611308Santhony.gutierrez@amd.comdef get_data(lang_type, lines):
12711308Santhony.gutierrez@amd.com    data = []
12811308Santhony.gutierrez@amd.com    last = None
12911308Santhony.gutierrez@amd.com    for start,end in find_copyright_block(lines, lang_type):
13011308Santhony.gutierrez@amd.com        joined = ''.join(lines[start:end+1])
13111308Santhony.gutierrez@amd.com        match = copyright_re.search(joined)
13211308Santhony.gutierrez@amd.com        if not match:
13311308Santhony.gutierrez@amd.com            continue
13411308Santhony.gutierrez@amd.com
13511308Santhony.gutierrez@amd.com        c,dates,owner = match.groups()
13611308Santhony.gutierrez@amd.com        dates = dates.strip()
13711308Santhony.gutierrez@amd.com        owner = owner.strip()
13811308Santhony.gutierrez@amd.com
13911308Santhony.gutierrez@amd.com        all_owners.add(owner)
14011308Santhony.gutierrez@amd.com        try:
14111308Santhony.gutierrez@amd.com            dates = process_dates(dates)
14211308Santhony.gutierrez@amd.com        except Exception:
14311308Santhony.gutierrez@amd.com            print dates
14411308Santhony.gutierrez@amd.com            print owner
14511308Santhony.gutierrez@amd.com            raise
14611308Santhony.gutierrez@amd.com
14711308Santhony.gutierrez@amd.com        authors = []
14811308Santhony.gutierrez@amd.com        for i in xrange(start,end+1):
14911308Santhony.gutierrez@amd.com            line = lines[i]
15011308Santhony.gutierrez@amd.com            if not authors:
15111308Santhony.gutierrez@amd.com                match = authors_re.search(line)
15211308Santhony.gutierrez@amd.com                if match:
15311308Santhony.gutierrez@amd.com                    authors.append(match.group(1).strip())
15411308Santhony.gutierrez@amd.com            else:
15511308Santhony.gutierrez@amd.com                match = more_authors_re.search(line)
15611308Santhony.gutierrez@amd.com                if not match:
15711308Santhony.gutierrez@amd.com                    for j in xrange(i, end+1):
15811308Santhony.gutierrez@amd.com                        line = lines[j].strip()
15911308Santhony.gutierrez@amd.com                        if not line:
16011308Santhony.gutierrez@amd.com                            end = j
16111308Santhony.gutierrez@amd.com                            break
16211308Santhony.gutierrez@amd.com                        if line.startswith('//'):
16311308Santhony.gutierrez@amd.com                            line = line[2:].lstrip()
16411308Santhony.gutierrez@amd.com                            if line:
16511308Santhony.gutierrez@amd.com                                end = j - 1
16611308Santhony.gutierrez@amd.com                                break
16711308Santhony.gutierrez@amd.com                    break
16811308Santhony.gutierrez@amd.com                authors.append(match.group(1).strip())
16911308Santhony.gutierrez@amd.com
17011308Santhony.gutierrez@amd.com        info = (owner, dates, authors, start, end)
17111308Santhony.gutierrez@amd.com        data.append(info)
17211308Santhony.gutierrez@amd.com
17311308Santhony.gutierrez@amd.com    return data
17411308Santhony.gutierrez@amd.com
17511308Santhony.gutierrez@amd.comdef datestr(dates):
17611308Santhony.gutierrez@amd.com    dates = list(dates)
17711308Santhony.gutierrez@amd.com    dates.sort()
17811308Santhony.gutierrez@amd.com
17911308Santhony.gutierrez@amd.com    output = []
18011308Santhony.gutierrez@amd.com    def add_output(first, second):
18111308Santhony.gutierrez@amd.com        if first == second:
18211308Santhony.gutierrez@amd.com            output.append('%d' % (first))
18311308Santhony.gutierrez@amd.com        else:
18411308Santhony.gutierrez@amd.com            output.append('%d-%d' % (first, second))
18511308Santhony.gutierrez@amd.com
18611308Santhony.gutierrez@amd.com    first = dates.pop(0)
18711308Santhony.gutierrez@amd.com    second = first
18811308Santhony.gutierrez@amd.com    while dates:
18911308Santhony.gutierrez@amd.com        next = dates.pop(0)
19011308Santhony.gutierrez@amd.com        if next == second + 1:
19111308Santhony.gutierrez@amd.com            second = next
19211308Santhony.gutierrez@amd.com        else:
19311308Santhony.gutierrez@amd.com            add_output(first, second)
19411308Santhony.gutierrez@amd.com            first = next
19511308Santhony.gutierrez@amd.com            second = next
19611308Santhony.gutierrez@amd.com
19711308Santhony.gutierrez@amd.com    add_output(first, second)
19811308Santhony.gutierrez@amd.com
19911308Santhony.gutierrez@amd.com    return ','.join(output)
20011308Santhony.gutierrez@amd.com
20111308Santhony.gutierrez@amd.comusage_str = """usage:
20211308Santhony.gutierrez@amd.com%s [-v] <directory>"""
20311308Santhony.gutierrez@amd.com
20411308Santhony.gutierrez@amd.comdef usage(exitcode):
20511308Santhony.gutierrez@amd.com    print usage_str % sys.argv[0]
20611308Santhony.gutierrez@amd.com    if exitcode is not None:
20711308Santhony.gutierrez@amd.com        sys.exit(exitcode)
20811308Santhony.gutierrez@amd.com
20911308Santhony.gutierrez@amd.comif __name__ == '__main__':
21011308Santhony.gutierrez@amd.com    import getopt
21111308Santhony.gutierrez@amd.com
21211308Santhony.gutierrez@amd.com    show_counts = False
21311308Santhony.gutierrez@amd.com    ignore = set()
21411308Santhony.gutierrez@amd.com    verbose = False
21511308Santhony.gutierrez@amd.com    try:
21611308Santhony.gutierrez@amd.com        opts, args = getopt.getopt(sys.argv[1:], "ci:v")
21711308Santhony.gutierrez@amd.com    except getopt.GetoptError:
21811308Santhony.gutierrez@amd.com        usage(1)
21911308Santhony.gutierrez@amd.com
22011308Santhony.gutierrez@amd.com    for o,a in opts:
22111308Santhony.gutierrez@amd.com        if o == '-c':
22211308Santhony.gutierrez@amd.com            show_counts = True
22311308Santhony.gutierrez@amd.com        if o == '-i':
22411308Santhony.gutierrez@amd.com            ignore.add(a)
22511308Santhony.gutierrez@amd.com        if o == '-v':
22611308Santhony.gutierrez@amd.com            verbose = True
22711308Santhony.gutierrez@amd.com
22811308Santhony.gutierrez@amd.com    files = []
22911435Smitch.hayenga@arm.com
23011308Santhony.gutierrez@amd.com    for base in args:
23111308Santhony.gutierrez@amd.com        if os.path.isfile(base):
23211308Santhony.gutierrez@amd.com            files += [ (base, lang_type(base)) ]
23311308Santhony.gutierrez@amd.com        elif os.path.isdir(base):
23411308Santhony.gutierrez@amd.com            files += find_files(base)
23511308Santhony.gutierrez@amd.com        else:
23611308Santhony.gutierrez@amd.com            raise AttributeError, "can't access '%s'" %  base
23711308Santhony.gutierrez@amd.com
23811308Santhony.gutierrez@amd.com    copyrights = {}
23911308Santhony.gutierrez@amd.com    counts = {}
24011308Santhony.gutierrez@amd.com
24111308Santhony.gutierrez@amd.com    for filename, lang in files:
24211308Santhony.gutierrez@amd.com        f = file(filename, 'r')
24311308Santhony.gutierrez@amd.com        lines = f.readlines()
24411308Santhony.gutierrez@amd.com        if not lines:
24511308Santhony.gutierrez@amd.com            continue
24611308Santhony.gutierrez@amd.com
24711308Santhony.gutierrez@amd.com        lines = [ line.rstrip('\r\n') for line in lines ]
24811308Santhony.gutierrez@amd.com
24911308Santhony.gutierrez@amd.com        lt = lang_type(filename, lines[0])
25011308Santhony.gutierrez@amd.com        try:
25111308Santhony.gutierrez@amd.com            data = get_data(lt, lines)
25211308Santhony.gutierrez@amd.com        except Exception, e:
25311308Santhony.gutierrez@amd.com            if verbose:
25411308Santhony.gutierrez@amd.com                if len(e.args) == 1:
25511308Santhony.gutierrez@amd.com                    e.args = ('%s (%s))' % (e, filename), )
25611308Santhony.gutierrez@amd.com                print "could not parse %s: %s" % (filename, e)
25711308Santhony.gutierrez@amd.com            continue
25811308Santhony.gutierrez@amd.com
25911308Santhony.gutierrez@amd.com        for owner, dates, authors, start, end in data:
26011308Santhony.gutierrez@amd.com            if owner not in copyrights:
26111308Santhony.gutierrez@amd.com                copyrights[owner] = set()
26211308Santhony.gutierrez@amd.com            if owner not in counts:
26311308Santhony.gutierrez@amd.com                counts[owner] = 0
26411308Santhony.gutierrez@amd.com
26511308Santhony.gutierrez@amd.com            copyrights[owner] |= dates
26611308Santhony.gutierrez@amd.com            counts[owner] += 1
26711308Santhony.gutierrez@amd.com
26811308Santhony.gutierrez@amd.com    info = [ (counts[o], d, o) for o,d in copyrights.items() ]
26911308Santhony.gutierrez@amd.com
27011308Santhony.gutierrez@amd.com    for count,dates,owner in sorted(info, reverse=True):
27111308Santhony.gutierrez@amd.com        if show_counts:
27211308Santhony.gutierrez@amd.com            owner = '%s (%s files)' % (owner, count)
27311308Santhony.gutierrez@amd.com        print 'Copyright (c) %s %s' % (datestr(dates), owner)
27411308Santhony.gutierrez@amd.com