find_copyrights.py revision 8333
12650Ssaidi@eecs.umich.edu#!/usr/bin/env python 22650Ssaidi@eecs.umich.edu 32650Ssaidi@eecs.umich.eduimport os 42650Ssaidi@eecs.umich.eduimport re 52650Ssaidi@eecs.umich.eduimport sys 62650Ssaidi@eecs.umich.edu 72650Ssaidi@eecs.umich.edufrom file_types import lang_type, find_files 82650Ssaidi@eecs.umich.edu 92650Ssaidi@eecs.umich.edumode_line = re.compile('(-\*- *mode:.* *-\*-)') 102650Ssaidi@eecs.umich.edushell_comment = re.compile(r'^\s*#') 112650Ssaidi@eecs.umich.edulisp_comment = re.compile(r';') 122650Ssaidi@eecs.umich.educpp_comment = re.compile(r'//') 132650Ssaidi@eecs.umich.educ_comment_start = re.compile(r'/\*') 142650Ssaidi@eecs.umich.educ_comment_end = re.compile(r'\*/') 152650Ssaidi@eecs.umich.edudef find_copyright_block(lines, lang_type): 162650Ssaidi@eecs.umich.edu start = None 172650Ssaidi@eecs.umich.edu if lang_type in ('python', 'make', 'shell', 'perl', 'scons'): 182650Ssaidi@eecs.umich.edu for i,line in enumerate(lines): 192650Ssaidi@eecs.umich.edu if i == 0 and (line.startswith('#!') or mode_line.search(line)): 202650Ssaidi@eecs.umich.edu continue 212650Ssaidi@eecs.umich.edu 222650Ssaidi@eecs.umich.edu if shell_comment.search(line): 232650Ssaidi@eecs.umich.edu if start is None: 242650Ssaidi@eecs.umich.edu start = i 252650Ssaidi@eecs.umich.edu elif start is None: 262650Ssaidi@eecs.umich.edu if line.strip(): 272650Ssaidi@eecs.umich.edu return 282650Ssaidi@eecs.umich.edu else: 292650Ssaidi@eecs.umich.edu yield start, i-1 302650Ssaidi@eecs.umich.edu start = None 312650Ssaidi@eecs.umich.edu 322650Ssaidi@eecs.umich.edu elif lang_type in ('lisp', ): 332650Ssaidi@eecs.umich.edu for i,line in enumerate(lines): 343836Ssaidi@eecs.umich.edu if i == 0 and mode_line.search(line): 353804Ssaidi@eecs.umich.edu continue 363602Sgblack@eecs.umich.edu 374997Sgblack@eecs.umich.edu if lisp_comment.search(line): 383569Sgblack@eecs.umich.edu if start is None: 396022Sgblack@eecs.umich.edu start = i 407878Sgblack@eecs.umich.edu elif start is None: 415358Sgblack@eecs.umich.edu if line.strip(): 423468Sgblack@eecs.umich.edu return 433468Sgblack@eecs.umich.edu else: 443806Ssaidi@eecs.umich.edu yield start, i-1 453468Sgblack@eecs.umich.edu start = None 463468Sgblack@eecs.umich.edu 473468Sgblack@eecs.umich.edu elif lang_type in ('C', 'C++', 'swig', 'isa', 'asm', 'slicc', 483603Ssaidi@eecs.umich.edu 'lex', 'yacc'): 495358Sgblack@eecs.umich.edu mode = None 503804Ssaidi@eecs.umich.edu for i,line in enumerate(lines): 514997Sgblack@eecs.umich.edu if i == 0 and mode_line.search(line): 527741Sgblack@eecs.umich.edu continue 534997Sgblack@eecs.umich.edu 544997Sgblack@eecs.umich.edu if mode == 'C': 554997Sgblack@eecs.umich.edu assert start is not None, 'on line %d' % (i + 1) 564997Sgblack@eecs.umich.edu match = c_comment_end.search(line) 577741Sgblack@eecs.umich.edu if match: 584990Sgblack@eecs.umich.edu yield start, i 596022Sgblack@eecs.umich.edu mode = None 606022Sgblack@eecs.umich.edu continue 614990Sgblack@eecs.umich.edu 624990Sgblack@eecs.umich.edu cpp_match = cpp_comment.search(line) 634990Sgblack@eecs.umich.edu c_match = c_comment_start.search(line) 644990Sgblack@eecs.umich.edu 654990Sgblack@eecs.umich.edu if cpp_match: 664990Sgblack@eecs.umich.edu assert not c_match, 'on line %d' % (i + 1) 674990Sgblack@eecs.umich.edu if line[:cpp_match.start()].strip(): 684990Sgblack@eecs.umich.edu return 694990Sgblack@eecs.umich.edu if mode is None: 703804Ssaidi@eecs.umich.edu mode = 'CPP' 713804Ssaidi@eecs.umich.edu start = i 723804Ssaidi@eecs.umich.edu else: 733804Ssaidi@eecs.umich.edu text = line[cpp_match.end():].lstrip() 743804Ssaidi@eecs.umich.edu if text.startswith("Copyright") > 0: 753804Ssaidi@eecs.umich.edu yield start, i-1 763804Ssaidi@eecs.umich.edu start = i 773804Ssaidi@eecs.umich.edu continue 783881Ssaidi@eecs.umich.edu elif mode == 'CPP': 793804Ssaidi@eecs.umich.edu assert start is not None, 'on line %d' % (i + 1) 803836Ssaidi@eecs.umich.edu if not line.strip(): 813836Ssaidi@eecs.umich.edu continue 823836Ssaidi@eecs.umich.edu yield start, i-1 833881Ssaidi@eecs.umich.edu mode = None 843881Ssaidi@eecs.umich.edu if not c_match: 853804Ssaidi@eecs.umich.edu return 863804Ssaidi@eecs.umich.edu 873804Ssaidi@eecs.umich.edu if c_match: 883804Ssaidi@eecs.umich.edu assert mode is None, 'on line %d' % (i + 1) 893804Ssaidi@eecs.umich.edu mode = 'C' 903804Ssaidi@eecs.umich.edu start = i 913804Ssaidi@eecs.umich.edu 923804Ssaidi@eecs.umich.edu if mode is None and line.strip(): 933804Ssaidi@eecs.umich.edu return 943468Sgblack@eecs.umich.edu 953468Sgblack@eecs.umich.edu else: 963804Ssaidi@eecs.umich.edu raise AttributeError, "Could not handle language %s" % lang_type 973804Ssaidi@eecs.umich.edu 983804Ssaidi@eecs.umich.edudate_range_re = re.compile(r'([0-9]{4})\s*-\s*([0-9]{4})') 993804Ssaidi@eecs.umich.edudef process_dates(dates): 1003468Sgblack@eecs.umich.edu dates = [ d.strip() for d in dates.split(',') ] 1013468Sgblack@eecs.umich.edu 1024070Ssaidi@eecs.umich.edu output = set() 1034070Ssaidi@eecs.umich.edu for date in dates: 1044070Ssaidi@eecs.umich.edu match = date_range_re.match(date) 1054070Ssaidi@eecs.umich.edu if match: 1064070Ssaidi@eecs.umich.edu f,l = [ int(d) for d in match.groups() ] 1073804Ssaidi@eecs.umich.edu for i in xrange(f, l+1): 1083804Ssaidi@eecs.umich.edu output.add(i) 1093804Ssaidi@eecs.umich.edu else: 1103804Ssaidi@eecs.umich.edu try: 1113804Ssaidi@eecs.umich.edu date = int(date) 1123804Ssaidi@eecs.umich.edu output.add(date) 1135555Snate@binkert.org except ValueError: 1145555Snate@binkert.org pass 1155555Snate@binkert.org 1163804Ssaidi@eecs.umich.edu return output 1173804Ssaidi@eecs.umich.edu 1184070Ssaidi@eecs.umich.educopyright_re = \ 1194070Ssaidi@eecs.umich.edu re.compile(r'Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)', 1204070Ssaidi@eecs.umich.edu re.DOTALL) 1213804Ssaidi@eecs.umich.edu 1223804Ssaidi@eecs.umich.eduauthors_re = re.compile(r'^[\s*#/]*Authors:\s*([A-z .]+)\s*$') 1233826Ssaidi@eecs.umich.edumore_authors_re = re.compile(r'^[\s*#/]*([A-z .]+)\s*$') 1243804Ssaidi@eecs.umich.edu 1253804Ssaidi@eecs.umich.eduall_owners = set() 1263804Ssaidi@eecs.umich.edudef get_data(lang_type, lines): 1273804Ssaidi@eecs.umich.edu data = [] 1283804Ssaidi@eecs.umich.edu last = None 1293804Ssaidi@eecs.umich.edu for start,end in find_copyright_block(lines, lang_type): 1303804Ssaidi@eecs.umich.edu joined = ''.join(lines[start:end+1]) 1313804Ssaidi@eecs.umich.edu match = copyright_re.search(joined) 1323804Ssaidi@eecs.umich.edu if not match: 1333804Ssaidi@eecs.umich.edu continue 1343804Ssaidi@eecs.umich.edu 1353804Ssaidi@eecs.umich.edu c,dates,owner = match.groups() 1363804Ssaidi@eecs.umich.edu dates = dates.strip() 1373804Ssaidi@eecs.umich.edu owner = owner.strip() 1383804Ssaidi@eecs.umich.edu 1393804Ssaidi@eecs.umich.edu all_owners.add(owner) 1403804Ssaidi@eecs.umich.edu try: 1413804Ssaidi@eecs.umich.edu dates = process_dates(dates) 1423804Ssaidi@eecs.umich.edu except Exception: 1433804Ssaidi@eecs.umich.edu print dates 1444990Sgblack@eecs.umich.edu print owner 1453804Ssaidi@eecs.umich.edu raise 1463804Ssaidi@eecs.umich.edu 1473834Sgblack@eecs.umich.edu authors = [] 1483804Ssaidi@eecs.umich.edu for i in xrange(start,end+1): 1493804Ssaidi@eecs.umich.edu line = lines[i] 1504990Sgblack@eecs.umich.edu if not authors: 1513826Ssaidi@eecs.umich.edu match = authors_re.search(line) 1526022Sgblack@eecs.umich.edu if match: 1536022Sgblack@eecs.umich.edu authors.append(match.group(1).strip()) 1546022Sgblack@eecs.umich.edu else: 1553804Ssaidi@eecs.umich.edu match = more_authors_re.search(line) 1565034Smilesck@eecs.umich.edu if not match: 1575034Smilesck@eecs.umich.edu for j in xrange(i, end+1): 1583804Ssaidi@eecs.umich.edu line = lines[j].strip() 1597741Sgblack@eecs.umich.edu if not line: 1607741Sgblack@eecs.umich.edu end = j 1615358Sgblack@eecs.umich.edu break 1625358Sgblack@eecs.umich.edu if line.startswith('//'): 1635358Sgblack@eecs.umich.edu line = line[2:].lstrip() 1645358Sgblack@eecs.umich.edu if line: 1653826Ssaidi@eecs.umich.edu end = j - 1 1663826Ssaidi@eecs.umich.edu break 1676023Snate@binkert.org break 1685894Sgblack@eecs.umich.edu authors.append(match.group(1).strip()) 1696023Snate@binkert.org 1704997Sgblack@eecs.umich.edu info = (owner, dates, authors, start, end) 1713806Ssaidi@eecs.umich.edu data.append(info) 1723806Ssaidi@eecs.umich.edu 1734997Sgblack@eecs.umich.edu return data 1744070Ssaidi@eecs.umich.edu 1753804Ssaidi@eecs.umich.edudef datestr(dates): 1764990Sgblack@eecs.umich.edu dates = list(dates) 1774990Sgblack@eecs.umich.edu dates.sort() 1784990Sgblack@eecs.umich.edu 1794990Sgblack@eecs.umich.edu output = [] 1806022Sgblack@eecs.umich.edu def add_output(first, second): 1816022Sgblack@eecs.umich.edu if first == second: 1826022Sgblack@eecs.umich.edu output.append('%d' % (first)) 1833804Ssaidi@eecs.umich.edu else: 1844990Sgblack@eecs.umich.edu output.append('%d-%d' % (first, second)) 1853804Ssaidi@eecs.umich.edu 1863826Ssaidi@eecs.umich.edu first = dates.pop(0) 1874070Ssaidi@eecs.umich.edu second = first 1884070Ssaidi@eecs.umich.edu while dates: 1894070Ssaidi@eecs.umich.edu next = dates.pop(0) 1904070Ssaidi@eecs.umich.edu if next == second + 1: 1913836Ssaidi@eecs.umich.edu second = next 1923836Ssaidi@eecs.umich.edu else: 1933804Ssaidi@eecs.umich.edu add_output(first, second) 1943804Ssaidi@eecs.umich.edu first = next 1953468Sgblack@eecs.umich.edu second = next 1962650Ssaidi@eecs.umich.edu 1972650Ssaidi@eecs.umich.edu add_output(first, second) 198 199 return ','.join(output) 200 201usage_str = """usage: 202%s [-v] <directory>""" 203 204def usage(exitcode): 205 print usage_str % sys.argv[0] 206 if exitcode is not None: 207 sys.exit(exitcode) 208 209if __name__ == '__main__': 210 import getopt 211 212 show_counts = False 213 ignore = set() 214 verbose = False 215 try: 216 opts, args = getopt.getopt(sys.argv[1:], "ci:v") 217 except getopt.GetoptError: 218 usage(1) 219 220 for o,a in opts: 221 if o == '-c': 222 show_counts = True 223 if o == '-i': 224 ignore.add(a) 225 if o == '-v': 226 verbose = True 227 228 files = [] 229 230 for base in args: 231 if os.path.isfile(base): 232 files += [ (base, lang_type(base)) ] 233 elif os.path.isdir(base): 234 files += find_files(base) 235 else: 236 raise AttributeError, "can't access '%s'" % base 237 238 copyrights = {} 239 counts = {} 240 241 for filename, lang in files: 242 f = file(filename, 'r') 243 lines = f.readlines() 244 if not lines: 245 continue 246 247 lines = [ line.rstrip('\r\n') for line in lines ] 248 249 lt = lang_type(filename, lines[0]) 250 try: 251 data = get_data(lt, lines) 252 except Exception, e: 253 if verbose: 254 if len(e.args) == 1: 255 e.args = ('%s (%s))' % (e, filename), ) 256 print "could not parse %s: %s" % (filename, e) 257 continue 258 259 for owner, dates, authors, start, end in data: 260 if owner not in copyrights: 261 copyrights[owner] = set() 262 if owner not in counts: 263 counts[owner] = 0 264 265 copyrights[owner] |= dates 266 counts[owner] += 1 267 268 info = [ (counts[o], d, o) for o,d in copyrights.items() ] 269 270 for count,dates,owner in sorted(info, reverse=True): 271 if show_counts: 272 owner = '%s (%s files)' % (owner, count) 273 print 'Copyright (c) %s %s' % (datestr(dates), owner) 274