find_copyrights.py revision 13540
113540Sandrea.mondelli@ucf.edu#!/usr/bin/env python2.7 28333Snate@binkert.org 38333Snate@binkert.orgimport os 48333Snate@binkert.orgimport re 58333Snate@binkert.orgimport sys 68333Snate@binkert.org 78333Snate@binkert.orgfrom file_types import lang_type, find_files 88333Snate@binkert.org 98333Snate@binkert.orgmode_line = re.compile('(-\*- *mode:.* *-\*-)') 108333Snate@binkert.orgshell_comment = re.compile(r'^\s*#') 118333Snate@binkert.orglisp_comment = re.compile(r';') 128333Snate@binkert.orgcpp_comment = re.compile(r'//') 138333Snate@binkert.orgc_comment_start = re.compile(r'/\*') 148333Snate@binkert.orgc_comment_end = re.compile(r'\*/') 158333Snate@binkert.orgdef find_copyright_block(lines, lang_type): 168333Snate@binkert.org start = None 178333Snate@binkert.org if lang_type in ('python', 'make', 'shell', 'perl', 'scons'): 188333Snate@binkert.org for i,line in enumerate(lines): 198333Snate@binkert.org if i == 0 and (line.startswith('#!') or mode_line.search(line)): 208333Snate@binkert.org continue 218333Snate@binkert.org 228333Snate@binkert.org if shell_comment.search(line): 238333Snate@binkert.org if start is None: 248333Snate@binkert.org start = i 258333Snate@binkert.org elif start is None: 268333Snate@binkert.org if line.strip(): 278333Snate@binkert.org return 288333Snate@binkert.org else: 298333Snate@binkert.org yield start, i-1 308333Snate@binkert.org start = None 318333Snate@binkert.org 328333Snate@binkert.org elif lang_type in ('lisp', ): 338333Snate@binkert.org for i,line in enumerate(lines): 348333Snate@binkert.org if i == 0 and mode_line.search(line): 358333Snate@binkert.org continue 368333Snate@binkert.org 378333Snate@binkert.org if lisp_comment.search(line): 388333Snate@binkert.org if start is None: 398333Snate@binkert.org start = i 408333Snate@binkert.org elif start is None: 418333Snate@binkert.org if line.strip(): 428333Snate@binkert.org return 438333Snate@binkert.org else: 448333Snate@binkert.org yield start, i-1 458333Snate@binkert.org start = None 468333Snate@binkert.org 478333Snate@binkert.org elif lang_type in ('C', 'C++', 'swig', 'isa', 'asm', 'slicc', 488333Snate@binkert.org 'lex', 'yacc'): 498333Snate@binkert.org mode = None 508333Snate@binkert.org for i,line in enumerate(lines): 518333Snate@binkert.org if i == 0 and mode_line.search(line): 528333Snate@binkert.org continue 538333Snate@binkert.org 548333Snate@binkert.org if mode == 'C': 558333Snate@binkert.org assert start is not None, 'on line %d' % (i + 1) 568333Snate@binkert.org match = c_comment_end.search(line) 578333Snate@binkert.org if match: 588333Snate@binkert.org yield start, i 598333Snate@binkert.org mode = None 608333Snate@binkert.org continue 618333Snate@binkert.org 628333Snate@binkert.org cpp_match = cpp_comment.search(line) 638333Snate@binkert.org c_match = c_comment_start.search(line) 648333Snate@binkert.org 658333Snate@binkert.org if cpp_match: 668333Snate@binkert.org assert not c_match, 'on line %d' % (i + 1) 678333Snate@binkert.org if line[:cpp_match.start()].strip(): 688333Snate@binkert.org return 698333Snate@binkert.org if mode is None: 708333Snate@binkert.org mode = 'CPP' 718333Snate@binkert.org start = i 728333Snate@binkert.org else: 738333Snate@binkert.org text = line[cpp_match.end():].lstrip() 748333Snate@binkert.org if text.startswith("Copyright") > 0: 758333Snate@binkert.org yield start, i-1 768333Snate@binkert.org start = i 778333Snate@binkert.org continue 788333Snate@binkert.org elif mode == 'CPP': 798333Snate@binkert.org assert start is not None, 'on line %d' % (i + 1) 808333Snate@binkert.org if not line.strip(): 818333Snate@binkert.org continue 828333Snate@binkert.org yield start, i-1 838333Snate@binkert.org mode = None 848333Snate@binkert.org if not c_match: 858333Snate@binkert.org return 868333Snate@binkert.org 878333Snate@binkert.org if c_match: 888333Snate@binkert.org assert mode is None, 'on line %d' % (i + 1) 898333Snate@binkert.org mode = 'C' 908333Snate@binkert.org start = i 918333Snate@binkert.org 928333Snate@binkert.org if mode is None and line.strip(): 938333Snate@binkert.org return 948333Snate@binkert.org 958333Snate@binkert.org else: 968333Snate@binkert.org raise AttributeError, "Could not handle language %s" % lang_type 978333Snate@binkert.org 988333Snate@binkert.orgdate_range_re = re.compile(r'([0-9]{4})\s*-\s*([0-9]{4})') 998333Snate@binkert.orgdef process_dates(dates): 1008333Snate@binkert.org dates = [ d.strip() for d in dates.split(',') ] 1018333Snate@binkert.org 1028333Snate@binkert.org output = set() 1038333Snate@binkert.org for date in dates: 1048333Snate@binkert.org match = date_range_re.match(date) 1058333Snate@binkert.org if match: 1068333Snate@binkert.org f,l = [ int(d) for d in match.groups() ] 1078333Snate@binkert.org for i in xrange(f, l+1): 1088333Snate@binkert.org output.add(i) 1098333Snate@binkert.org else: 1108333Snate@binkert.org try: 1118333Snate@binkert.org date = int(date) 1128333Snate@binkert.org output.add(date) 1138333Snate@binkert.org except ValueError: 1148333Snate@binkert.org pass 1158333Snate@binkert.org 1168333Snate@binkert.org return output 1178333Snate@binkert.org 1188333Snate@binkert.orgcopyright_re = \ 1198333Snate@binkert.org re.compile(r'Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)', 1208333Snate@binkert.org re.DOTALL) 1218333Snate@binkert.org 1228333Snate@binkert.orgauthors_re = re.compile(r'^[\s*#/]*Authors:\s*([A-z .]+)\s*$') 1238333Snate@binkert.orgmore_authors_re = re.compile(r'^[\s*#/]*([A-z .]+)\s*$') 1248333Snate@binkert.org 1258333Snate@binkert.orgall_owners = set() 1268333Snate@binkert.orgdef get_data(lang_type, lines): 1278333Snate@binkert.org data = [] 1288333Snate@binkert.org last = None 1298333Snate@binkert.org for start,end in find_copyright_block(lines, lang_type): 1308333Snate@binkert.org joined = ''.join(lines[start:end+1]) 1318333Snate@binkert.org match = copyright_re.search(joined) 1328333Snate@binkert.org if not match: 1338333Snate@binkert.org continue 1348333Snate@binkert.org 1358333Snate@binkert.org c,dates,owner = match.groups() 1368333Snate@binkert.org dates = dates.strip() 1378333Snate@binkert.org owner = owner.strip() 1388333Snate@binkert.org 1398333Snate@binkert.org all_owners.add(owner) 1408333Snate@binkert.org try: 1418333Snate@binkert.org dates = process_dates(dates) 1428333Snate@binkert.org except Exception: 1438333Snate@binkert.org print dates 1448333Snate@binkert.org print owner 1458333Snate@binkert.org raise 1468333Snate@binkert.org 1478333Snate@binkert.org authors = [] 1488333Snate@binkert.org for i in xrange(start,end+1): 1498333Snate@binkert.org line = lines[i] 1508333Snate@binkert.org if not authors: 1518333Snate@binkert.org match = authors_re.search(line) 1528333Snate@binkert.org if match: 1538333Snate@binkert.org authors.append(match.group(1).strip()) 1548333Snate@binkert.org else: 1558333Snate@binkert.org match = more_authors_re.search(line) 1568333Snate@binkert.org if not match: 1578333Snate@binkert.org for j in xrange(i, end+1): 1588333Snate@binkert.org line = lines[j].strip() 1598333Snate@binkert.org if not line: 1608333Snate@binkert.org end = j 1618333Snate@binkert.org break 1628333Snate@binkert.org if line.startswith('//'): 1638333Snate@binkert.org line = line[2:].lstrip() 1648333Snate@binkert.org if line: 1658333Snate@binkert.org end = j - 1 1668333Snate@binkert.org break 1678333Snate@binkert.org break 1688333Snate@binkert.org authors.append(match.group(1).strip()) 1698333Snate@binkert.org 1708333Snate@binkert.org info = (owner, dates, authors, start, end) 1718333Snate@binkert.org data.append(info) 1728333Snate@binkert.org 1738333Snate@binkert.org return data 1748333Snate@binkert.org 1758333Snate@binkert.orgdef datestr(dates): 1768333Snate@binkert.org dates = list(dates) 1778333Snate@binkert.org dates.sort() 1788333Snate@binkert.org 1798333Snate@binkert.org output = [] 1808333Snate@binkert.org def add_output(first, second): 1818333Snate@binkert.org if first == second: 1828333Snate@binkert.org output.append('%d' % (first)) 1838333Snate@binkert.org else: 1848333Snate@binkert.org output.append('%d-%d' % (first, second)) 1858333Snate@binkert.org 1868333Snate@binkert.org first = dates.pop(0) 1878333Snate@binkert.org second = first 1888333Snate@binkert.org while dates: 1898333Snate@binkert.org next = dates.pop(0) 1908333Snate@binkert.org if next == second + 1: 1918333Snate@binkert.org second = next 1928333Snate@binkert.org else: 1938333Snate@binkert.org add_output(first, second) 1948333Snate@binkert.org first = next 1958333Snate@binkert.org second = next 1968333Snate@binkert.org 1978333Snate@binkert.org add_output(first, second) 1988333Snate@binkert.org 1998333Snate@binkert.org return ','.join(output) 2008333Snate@binkert.org 2018333Snate@binkert.orgusage_str = """usage: 2028333Snate@binkert.org%s [-v] <directory>""" 2038333Snate@binkert.org 2048333Snate@binkert.orgdef usage(exitcode): 2058333Snate@binkert.org print usage_str % sys.argv[0] 2068333Snate@binkert.org if exitcode is not None: 2078333Snate@binkert.org sys.exit(exitcode) 2088333Snate@binkert.org 2098333Snate@binkert.orgif __name__ == '__main__': 2108333Snate@binkert.org import getopt 2118333Snate@binkert.org 2128333Snate@binkert.org show_counts = False 2138333Snate@binkert.org ignore = set() 2148333Snate@binkert.org verbose = False 2158333Snate@binkert.org try: 2168333Snate@binkert.org opts, args = getopt.getopt(sys.argv[1:], "ci:v") 2178333Snate@binkert.org except getopt.GetoptError: 2188333Snate@binkert.org usage(1) 2198333Snate@binkert.org 2208333Snate@binkert.org for o,a in opts: 2218333Snate@binkert.org if o == '-c': 2228333Snate@binkert.org show_counts = True 2238333Snate@binkert.org if o == '-i': 2248333Snate@binkert.org ignore.add(a) 2258333Snate@binkert.org if o == '-v': 2268333Snate@binkert.org verbose = True 2278333Snate@binkert.org 2288333Snate@binkert.org files = [] 2298333Snate@binkert.org 2308333Snate@binkert.org for base in args: 2318333Snate@binkert.org if os.path.isfile(base): 2328333Snate@binkert.org files += [ (base, lang_type(base)) ] 2338333Snate@binkert.org elif os.path.isdir(base): 2348333Snate@binkert.org files += find_files(base) 2358333Snate@binkert.org else: 2368333Snate@binkert.org raise AttributeError, "can't access '%s'" % base 2378333Snate@binkert.org 2388333Snate@binkert.org copyrights = {} 2398333Snate@binkert.org counts = {} 2408333Snate@binkert.org 2418333Snate@binkert.org for filename, lang in files: 2428333Snate@binkert.org f = file(filename, 'r') 2438333Snate@binkert.org lines = f.readlines() 2448333Snate@binkert.org if not lines: 2458333Snate@binkert.org continue 2468333Snate@binkert.org 2478333Snate@binkert.org lines = [ line.rstrip('\r\n') for line in lines ] 2488333Snate@binkert.org 2498333Snate@binkert.org lt = lang_type(filename, lines[0]) 2508333Snate@binkert.org try: 2518333Snate@binkert.org data = get_data(lt, lines) 2528333Snate@binkert.org except Exception, e: 2538333Snate@binkert.org if verbose: 2548333Snate@binkert.org if len(e.args) == 1: 2558333Snate@binkert.org e.args = ('%s (%s))' % (e, filename), ) 2568333Snate@binkert.org print "could not parse %s: %s" % (filename, e) 2578333Snate@binkert.org continue 2588333Snate@binkert.org 2598333Snate@binkert.org for owner, dates, authors, start, end in data: 2608333Snate@binkert.org if owner not in copyrights: 2618333Snate@binkert.org copyrights[owner] = set() 2628333Snate@binkert.org if owner not in counts: 2638333Snate@binkert.org counts[owner] = 0 2648333Snate@binkert.org 2658333Snate@binkert.org copyrights[owner] |= dates 2668333Snate@binkert.org counts[owner] += 1 2678333Snate@binkert.org 2688333Snate@binkert.org info = [ (counts[o], d, o) for o,d in copyrights.items() ] 2698333Snate@binkert.org 2708333Snate@binkert.org for count,dates,owner in sorted(info, reverse=True): 2718333Snate@binkert.org if show_counts: 2728333Snate@binkert.org owner = '%s (%s files)' % (owner, count) 2738333Snate@binkert.org print 'Copyright (c) %s %s' % (datestr(dates), owner) 274