113540Sandrea.mondelli@ucf.edu#!/usr/bin/env python2.7
28333Snate@binkert.org
38333Snate@binkert.orgimport os
48333Snate@binkert.orgimport re
58333Snate@binkert.orgimport sys
68333Snate@binkert.org
78333Snate@binkert.orgfrom file_types import lang_type, find_files
88333Snate@binkert.org
98333Snate@binkert.orgmode_line = re.compile('(-\*- *mode:.* *-\*-)')
108333Snate@binkert.orgshell_comment = re.compile(r'^\s*#')
118333Snate@binkert.orglisp_comment = re.compile(r';')
128333Snate@binkert.orgcpp_comment = re.compile(r'//')
138333Snate@binkert.orgc_comment_start = re.compile(r'/\*')
148333Snate@binkert.orgc_comment_end   = re.compile(r'\*/')
158333Snate@binkert.orgdef find_copyright_block(lines, lang_type):
168333Snate@binkert.org    start = None
178333Snate@binkert.org    if lang_type in ('python', 'make', 'shell', 'perl', 'scons'):
188333Snate@binkert.org        for i,line in enumerate(lines):
198333Snate@binkert.org            if i == 0 and (line.startswith('#!') or mode_line.search(line)):
208333Snate@binkert.org                continue
218333Snate@binkert.org
228333Snate@binkert.org            if shell_comment.search(line):
238333Snate@binkert.org                if start is None:
248333Snate@binkert.org                    start = i
258333Snate@binkert.org            elif start is None:
268333Snate@binkert.org                if line.strip():
278333Snate@binkert.org                    return
288333Snate@binkert.org            else:
298333Snate@binkert.org                yield start, i-1
308333Snate@binkert.org                start = None
318333Snate@binkert.org
328333Snate@binkert.org    elif lang_type in ('lisp', ):
338333Snate@binkert.org        for i,line in enumerate(lines):
348333Snate@binkert.org            if i == 0 and mode_line.search(line):
358333Snate@binkert.org                continue
368333Snate@binkert.org
378333Snate@binkert.org            if lisp_comment.search(line):
388333Snate@binkert.org                if start is None:
398333Snate@binkert.org                    start = i
408333Snate@binkert.org            elif start is None:
418333Snate@binkert.org                if line.strip():
428333Snate@binkert.org                    return
438333Snate@binkert.org            else:
448333Snate@binkert.org                yield start, i-1
458333Snate@binkert.org                start = None
468333Snate@binkert.org
478333Snate@binkert.org    elif lang_type in ('C', 'C++', 'swig', 'isa', 'asm', 'slicc',
488333Snate@binkert.org                       'lex', 'yacc'):
498333Snate@binkert.org        mode = None
508333Snate@binkert.org        for i,line in enumerate(lines):
518333Snate@binkert.org            if i == 0 and mode_line.search(line):
528333Snate@binkert.org                continue
538333Snate@binkert.org
548333Snate@binkert.org            if mode == 'C':
558333Snate@binkert.org                assert start is not None, 'on line %d' % (i + 1)
568333Snate@binkert.org                match = c_comment_end.search(line)
578333Snate@binkert.org                if match:
588333Snate@binkert.org                    yield start, i
598333Snate@binkert.org                    mode = None
608333Snate@binkert.org                continue
618333Snate@binkert.org
628333Snate@binkert.org            cpp_match = cpp_comment.search(line)
638333Snate@binkert.org            c_match = c_comment_start.search(line)
648333Snate@binkert.org
658333Snate@binkert.org            if cpp_match:
668333Snate@binkert.org                assert not c_match, 'on line %d' % (i + 1)
678333Snate@binkert.org                if line[:cpp_match.start()].strip():
688333Snate@binkert.org                    return
698333Snate@binkert.org                if mode is None:
708333Snate@binkert.org                    mode = 'CPP'
718333Snate@binkert.org                    start = i
728333Snate@binkert.org                else:
738333Snate@binkert.org                    text = line[cpp_match.end():].lstrip()
748333Snate@binkert.org                    if text.startswith("Copyright") > 0:
758333Snate@binkert.org                        yield start, i-1
768333Snate@binkert.org                        start = i
778333Snate@binkert.org                continue
788333Snate@binkert.org            elif mode == 'CPP':
798333Snate@binkert.org                assert start is not None, 'on line %d' % (i + 1)
808333Snate@binkert.org                if not line.strip():
818333Snate@binkert.org                    continue
828333Snate@binkert.org                yield start, i-1
838333Snate@binkert.org                mode = None
848333Snate@binkert.org                if not c_match:
858333Snate@binkert.org                    return
868333Snate@binkert.org
878333Snate@binkert.org            if c_match:
888333Snate@binkert.org                assert mode is None, 'on line %d' % (i + 1)
898333Snate@binkert.org                mode = 'C'
908333Snate@binkert.org                start = i
918333Snate@binkert.org
928333Snate@binkert.org            if mode is None and line.strip():
938333Snate@binkert.org                return
948333Snate@binkert.org
958333Snate@binkert.org    else:
968333Snate@binkert.org        raise AttributeError, "Could not handle language %s" % lang_type
978333Snate@binkert.org
988333Snate@binkert.orgdate_range_re = re.compile(r'([0-9]{4})\s*-\s*([0-9]{4})')
998333Snate@binkert.orgdef process_dates(dates):
1008333Snate@binkert.org    dates = [ d.strip() for d in dates.split(',') ]
1018333Snate@binkert.org
1028333Snate@binkert.org    output = set()
1038333Snate@binkert.org    for date in dates:
1048333Snate@binkert.org        match = date_range_re.match(date)
1058333Snate@binkert.org        if match:
1068333Snate@binkert.org            f,l = [ int(d) for d in match.groups() ]
1078333Snate@binkert.org            for i in xrange(f, l+1):
1088333Snate@binkert.org                output.add(i)
1098333Snate@binkert.org        else:
1108333Snate@binkert.org            try:
1118333Snate@binkert.org                date = int(date)
1128333Snate@binkert.org                output.add(date)
1138333Snate@binkert.org            except ValueError:
1148333Snate@binkert.org                pass
1158333Snate@binkert.org
1168333Snate@binkert.org    return output
1178333Snate@binkert.org
1188333Snate@binkert.orgcopyright_re = \
1198333Snate@binkert.org    re.compile(r'Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)',
1208333Snate@binkert.org               re.DOTALL)
1218333Snate@binkert.org
1228333Snate@binkert.orgauthors_re = re.compile(r'^[\s*#/]*Authors:\s*([A-z .]+)\s*$')
1238333Snate@binkert.orgmore_authors_re = re.compile(r'^[\s*#/]*([A-z .]+)\s*$')
1248333Snate@binkert.org
1258333Snate@binkert.orgall_owners = set()
1268333Snate@binkert.orgdef get_data(lang_type, lines):
1278333Snate@binkert.org    data = []
1288333Snate@binkert.org    last = None
1298333Snate@binkert.org    for start,end in find_copyright_block(lines, lang_type):
1308333Snate@binkert.org        joined = ''.join(lines[start:end+1])
1318333Snate@binkert.org        match = copyright_re.search(joined)
1328333Snate@binkert.org        if not match:
1338333Snate@binkert.org            continue
1348333Snate@binkert.org
1358333Snate@binkert.org        c,dates,owner = match.groups()
1368333Snate@binkert.org        dates = dates.strip()
1378333Snate@binkert.org        owner = owner.strip()
1388333Snate@binkert.org
1398333Snate@binkert.org        all_owners.add(owner)
1408333Snate@binkert.org        try:
1418333Snate@binkert.org            dates = process_dates(dates)
1428333Snate@binkert.org        except Exception:
1438333Snate@binkert.org            print dates
1448333Snate@binkert.org            print owner
1458333Snate@binkert.org            raise
1468333Snate@binkert.org
1478333Snate@binkert.org        authors = []
1488333Snate@binkert.org        for i in xrange(start,end+1):
1498333Snate@binkert.org            line = lines[i]
1508333Snate@binkert.org            if not authors:
1518333Snate@binkert.org                match = authors_re.search(line)
1528333Snate@binkert.org                if match:
1538333Snate@binkert.org                    authors.append(match.group(1).strip())
1548333Snate@binkert.org            else:
1558333Snate@binkert.org                match = more_authors_re.search(line)
1568333Snate@binkert.org                if not match:
1578333Snate@binkert.org                    for j in xrange(i, end+1):
1588333Snate@binkert.org                        line = lines[j].strip()
1598333Snate@binkert.org                        if not line:
1608333Snate@binkert.org                            end = j
1618333Snate@binkert.org                            break
1628333Snate@binkert.org                        if line.startswith('//'):
1638333Snate@binkert.org                            line = line[2:].lstrip()
1648333Snate@binkert.org                            if line:
1658333Snate@binkert.org                                end = j - 1
1668333Snate@binkert.org                                break
1678333Snate@binkert.org                    break
1688333Snate@binkert.org                authors.append(match.group(1).strip())
1698333Snate@binkert.org
1708333Snate@binkert.org        info = (owner, dates, authors, start, end)
1718333Snate@binkert.org        data.append(info)
1728333Snate@binkert.org
1738333Snate@binkert.org    return data
1748333Snate@binkert.org
1758333Snate@binkert.orgdef datestr(dates):
1768333Snate@binkert.org    dates = list(dates)
1778333Snate@binkert.org    dates.sort()
1788333Snate@binkert.org
1798333Snate@binkert.org    output = []
1808333Snate@binkert.org    def add_output(first, second):
1818333Snate@binkert.org        if first == second:
1828333Snate@binkert.org            output.append('%d' % (first))
1838333Snate@binkert.org        else:
1848333Snate@binkert.org            output.append('%d-%d' % (first, second))
1858333Snate@binkert.org
1868333Snate@binkert.org    first = dates.pop(0)
1878333Snate@binkert.org    second = first
1888333Snate@binkert.org    while dates:
1898333Snate@binkert.org        next = dates.pop(0)
1908333Snate@binkert.org        if next == second + 1:
1918333Snate@binkert.org            second = next
1928333Snate@binkert.org        else:
1938333Snate@binkert.org            add_output(first, second)
1948333Snate@binkert.org            first = next
1958333Snate@binkert.org            second = next
1968333Snate@binkert.org
1978333Snate@binkert.org    add_output(first, second)
1988333Snate@binkert.org
1998333Snate@binkert.org    return ','.join(output)
2008333Snate@binkert.org
2018333Snate@binkert.orgusage_str = """usage:
2028333Snate@binkert.org%s [-v] <directory>"""
2038333Snate@binkert.org
2048333Snate@binkert.orgdef usage(exitcode):
2058333Snate@binkert.org    print usage_str % sys.argv[0]
2068333Snate@binkert.org    if exitcode is not None:
2078333Snate@binkert.org        sys.exit(exitcode)
2088333Snate@binkert.org
2098333Snate@binkert.orgif __name__ == '__main__':
2108333Snate@binkert.org    import getopt
2118333Snate@binkert.org
2128333Snate@binkert.org    show_counts = False
2138333Snate@binkert.org    ignore = set()
2148333Snate@binkert.org    verbose = False
2158333Snate@binkert.org    try:
2168333Snate@binkert.org        opts, args = getopt.getopt(sys.argv[1:], "ci:v")
2178333Snate@binkert.org    except getopt.GetoptError:
2188333Snate@binkert.org        usage(1)
2198333Snate@binkert.org
2208333Snate@binkert.org    for o,a in opts:
2218333Snate@binkert.org        if o == '-c':
2228333Snate@binkert.org            show_counts = True
2238333Snate@binkert.org        if o == '-i':
2248333Snate@binkert.org            ignore.add(a)
2258333Snate@binkert.org        if o == '-v':
2268333Snate@binkert.org            verbose = True
2278333Snate@binkert.org
2288333Snate@binkert.org    files = []
2298333Snate@binkert.org
2308333Snate@binkert.org    for base in args:
2318333Snate@binkert.org        if os.path.isfile(base):
2328333Snate@binkert.org            files += [ (base, lang_type(base)) ]
2338333Snate@binkert.org        elif os.path.isdir(base):
2348333Snate@binkert.org            files += find_files(base)
2358333Snate@binkert.org        else:
2368333Snate@binkert.org            raise AttributeError, "can't access '%s'" %  base
2378333Snate@binkert.org
2388333Snate@binkert.org    copyrights = {}
2398333Snate@binkert.org    counts = {}
2408333Snate@binkert.org
2418333Snate@binkert.org    for filename, lang in files:
2428333Snate@binkert.org        f = file(filename, 'r')
2438333Snate@binkert.org        lines = f.readlines()
2448333Snate@binkert.org        if not lines:
2458333Snate@binkert.org            continue
2468333Snate@binkert.org
2478333Snate@binkert.org        lines = [ line.rstrip('\r\n') for line in lines ]
2488333Snate@binkert.org
2498333Snate@binkert.org        lt = lang_type(filename, lines[0])
2508333Snate@binkert.org        try:
2518333Snate@binkert.org            data = get_data(lt, lines)
2528333Snate@binkert.org        except Exception, e:
2538333Snate@binkert.org            if verbose:
2548333Snate@binkert.org                if len(e.args) == 1:
2558333Snate@binkert.org                    e.args = ('%s (%s))' % (e, filename), )
2568333Snate@binkert.org                print "could not parse %s: %s" % (filename, e)
2578333Snate@binkert.org            continue
2588333Snate@binkert.org
2598333Snate@binkert.org        for owner, dates, authors, start, end in data:
2608333Snate@binkert.org            if owner not in copyrights:
2618333Snate@binkert.org                copyrights[owner] = set()
2628333Snate@binkert.org            if owner not in counts:
2638333Snate@binkert.org                counts[owner] = 0
2648333Snate@binkert.org
2658333Snate@binkert.org            copyrights[owner] |= dates
2668333Snate@binkert.org            counts[owner] += 1
2678333Snate@binkert.org
2688333Snate@binkert.org    info = [ (counts[o], d, o) for o,d in copyrights.items() ]
2698333Snate@binkert.org
2708333Snate@binkert.org    for count,dates,owner in sorted(info, reverse=True):
2718333Snate@binkert.org        if show_counts:
2728333Snate@binkert.org            owner = '%s (%s files)' % (owner, count)
2738333Snate@binkert.org        print 'Copyright (c) %s %s' % (datestr(dates), owner)
274