find_copyrights.py revision 8333
111308Santhony.gutierrez@amd.com#!/usr/bin/env python 212697Santhony.gutierrez@amd.com 311308Santhony.gutierrez@amd.comimport os 411308Santhony.gutierrez@amd.comimport re 511308Santhony.gutierrez@amd.comimport sys 611308Santhony.gutierrez@amd.com 711308Santhony.gutierrez@amd.comfrom file_types import lang_type, find_files 811308Santhony.gutierrez@amd.com 911308Santhony.gutierrez@amd.commode_line = re.compile('(-\*- *mode:.* *-\*-)') 1011308Santhony.gutierrez@amd.comshell_comment = re.compile(r'^\s*#') 1111308Santhony.gutierrez@amd.comlisp_comment = re.compile(r';') 1211308Santhony.gutierrez@amd.comcpp_comment = re.compile(r'//') 1311308Santhony.gutierrez@amd.comc_comment_start = re.compile(r'/\*') 1411308Santhony.gutierrez@amd.comc_comment_end = re.compile(r'\*/') 1511308Santhony.gutierrez@amd.comdef find_copyright_block(lines, lang_type): 1611308Santhony.gutierrez@amd.com start = None 1712697Santhony.gutierrez@amd.com if lang_type in ('python', 'make', 'shell', 'perl', 'scons'): 1812697Santhony.gutierrez@amd.com for i,line in enumerate(lines): 1912697Santhony.gutierrez@amd.com if i == 0 and (line.startswith('#!') or mode_line.search(line)): 2011308Santhony.gutierrez@amd.com continue 2111308Santhony.gutierrez@amd.com 2211308Santhony.gutierrez@amd.com if shell_comment.search(line): 2311308Santhony.gutierrez@amd.com if start is None: 2411308Santhony.gutierrez@amd.com start = i 2511308Santhony.gutierrez@amd.com elif start is None: 2611308Santhony.gutierrez@amd.com if line.strip(): 2711308Santhony.gutierrez@amd.com return 2811308Santhony.gutierrez@amd.com else: 2911308Santhony.gutierrez@amd.com yield start, i-1 3011308Santhony.gutierrez@amd.com start = None 3111308Santhony.gutierrez@amd.com 3211308Santhony.gutierrez@amd.com elif lang_type in ('lisp', ): 3312697Santhony.gutierrez@amd.com for i,line in enumerate(lines): 3412697Santhony.gutierrez@amd.com if i == 0 and mode_line.search(line): 3512697Santhony.gutierrez@amd.com continue 3611308Santhony.gutierrez@amd.com 3711308Santhony.gutierrez@amd.com if lisp_comment.search(line): 3811308Santhony.gutierrez@amd.com if start is None: 3911308Santhony.gutierrez@amd.com start = i 4011308Santhony.gutierrez@amd.com elif start is None: 4111308Santhony.gutierrez@amd.com if line.strip(): 4211308Santhony.gutierrez@amd.com return 4311308Santhony.gutierrez@amd.com else: 4411308Santhony.gutierrez@amd.com yield start, i-1 4511308Santhony.gutierrez@amd.com start = None 4611308Santhony.gutierrez@amd.com 4711308Santhony.gutierrez@amd.com elif lang_type in ('C', 'C++', 'swig', 'isa', 'asm', 'slicc', 4811308Santhony.gutierrez@amd.com 'lex', 'yacc'): 4911308Santhony.gutierrez@amd.com mode = None 5011308Santhony.gutierrez@amd.com for i,line in enumerate(lines): 5111308Santhony.gutierrez@amd.com if i == 0 and mode_line.search(line): 5212680Sgiacomo.travaglini@arm.com continue 5311308Santhony.gutierrez@amd.com 5411308Santhony.gutierrez@amd.com if mode == 'C': 5512126Sspwilson2@wisc.edu assert start is not None, 'on line %d' % (i + 1) 5612126Sspwilson2@wisc.edu match = c_comment_end.search(line) 5712126Sspwilson2@wisc.edu if match: 5811308Santhony.gutierrez@amd.com yield start, i 5911308Santhony.gutierrez@amd.com mode = None 6011308Santhony.gutierrez@amd.com continue 6111308Santhony.gutierrez@amd.com 6211308Santhony.gutierrez@amd.com cpp_match = cpp_comment.search(line) 6311308Santhony.gutierrez@amd.com c_match = c_comment_start.search(line) 6411308Santhony.gutierrez@amd.com 6511308Santhony.gutierrez@amd.com if cpp_match: 6611308Santhony.gutierrez@amd.com assert not c_match, 'on line %d' % (i + 1) 6711308Santhony.gutierrez@amd.com if line[:cpp_match.start()].strip(): 6811308Santhony.gutierrez@amd.com return 6911308Santhony.gutierrez@amd.com if mode is None: 7011308Santhony.gutierrez@amd.com mode = 'CPP' 7111308Santhony.gutierrez@amd.com start = i 7211308Santhony.gutierrez@amd.com else: 7311308Santhony.gutierrez@amd.com text = line[cpp_match.end():].lstrip() 7411308Santhony.gutierrez@amd.com if text.startswith("Copyright") > 0: 7511308Santhony.gutierrez@amd.com yield start, i-1 7611308Santhony.gutierrez@amd.com start = i 7711308Santhony.gutierrez@amd.com continue 7811308Santhony.gutierrez@amd.com elif mode == 'CPP': 7911308Santhony.gutierrez@amd.com assert start is not None, 'on line %d' % (i + 1) 8011308Santhony.gutierrez@amd.com if not line.strip(): 8111308Santhony.gutierrez@amd.com continue 8211308Santhony.gutierrez@amd.com yield start, i-1 8311308Santhony.gutierrez@amd.com mode = None 8411308Santhony.gutierrez@amd.com if not c_match: 8511308Santhony.gutierrez@amd.com return 8611308Santhony.gutierrez@amd.com 8711308Santhony.gutierrez@amd.com if c_match: 8811308Santhony.gutierrez@amd.com assert mode is None, 'on line %d' % (i + 1) 8911308Santhony.gutierrez@amd.com mode = 'C' 9011308Santhony.gutierrez@amd.com start = i 9111308Santhony.gutierrez@amd.com 9211308Santhony.gutierrez@amd.com if mode is None and line.strip(): 9311308Santhony.gutierrez@amd.com return 9411308Santhony.gutierrez@amd.com 9511308Santhony.gutierrez@amd.com else: 9611308Santhony.gutierrez@amd.com raise AttributeError, "Could not handle language %s" % lang_type 9711308Santhony.gutierrez@amd.com 9811308Santhony.gutierrez@amd.comdate_range_re = re.compile(r'([0-9]{4})\s*-\s*([0-9]{4})') 9911308Santhony.gutierrez@amd.comdef process_dates(dates): 10011308Santhony.gutierrez@amd.com dates = [ d.strip() for d in dates.split(',') ] 10111308Santhony.gutierrez@amd.com 10211308Santhony.gutierrez@amd.com output = set() 10311308Santhony.gutierrez@amd.com for date in dates: 10411308Santhony.gutierrez@amd.com match = date_range_re.match(date) 10511308Santhony.gutierrez@amd.com if match: 10611308Santhony.gutierrez@amd.com f,l = [ int(d) for d in match.groups() ] 10711308Santhony.gutierrez@amd.com for i in xrange(f, l+1): 10811308Santhony.gutierrez@amd.com output.add(i) 10911308Santhony.gutierrez@amd.com else: 11011308Santhony.gutierrez@amd.com try: 11111308Santhony.gutierrez@amd.com date = int(date) 11211308Santhony.gutierrez@amd.com output.add(date) 11311308Santhony.gutierrez@amd.com except ValueError: 11411308Santhony.gutierrez@amd.com pass 11511308Santhony.gutierrez@amd.com 11611308Santhony.gutierrez@amd.com return output 11711308Santhony.gutierrez@amd.com 11811308Santhony.gutierrez@amd.comcopyright_re = \ 11911308Santhony.gutierrez@amd.com re.compile(r'Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)', 12011308Santhony.gutierrez@amd.com re.DOTALL) 12111308Santhony.gutierrez@amd.com 12211308Santhony.gutierrez@amd.comauthors_re = re.compile(r'^[\s*#/]*Authors:\s*([A-z .]+)\s*$') 12311308Santhony.gutierrez@amd.commore_authors_re = re.compile(r'^[\s*#/]*([A-z .]+)\s*$') 12411308Santhony.gutierrez@amd.com 12511308Santhony.gutierrez@amd.comall_owners = set() 12611308Santhony.gutierrez@amd.comdef get_data(lang_type, lines): 12711308Santhony.gutierrez@amd.com data = [] 12811308Santhony.gutierrez@amd.com last = None 12911308Santhony.gutierrez@amd.com for start,end in find_copyright_block(lines, lang_type): 13011308Santhony.gutierrez@amd.com joined = ''.join(lines[start:end+1]) 13111308Santhony.gutierrez@amd.com match = copyright_re.search(joined) 13211308Santhony.gutierrez@amd.com if not match: 13311308Santhony.gutierrez@amd.com continue 13411308Santhony.gutierrez@amd.com 13511308Santhony.gutierrez@amd.com c,dates,owner = match.groups() 13611308Santhony.gutierrez@amd.com dates = dates.strip() 13711308Santhony.gutierrez@amd.com owner = owner.strip() 13811308Santhony.gutierrez@amd.com 13911308Santhony.gutierrez@amd.com all_owners.add(owner) 14011308Santhony.gutierrez@amd.com try: 14111308Santhony.gutierrez@amd.com dates = process_dates(dates) 14211308Santhony.gutierrez@amd.com except Exception: 14311308Santhony.gutierrez@amd.com print dates 14411308Santhony.gutierrez@amd.com print owner 14511308Santhony.gutierrez@amd.com raise 14611308Santhony.gutierrez@amd.com 14711308Santhony.gutierrez@amd.com authors = [] 14811308Santhony.gutierrez@amd.com for i in xrange(start,end+1): 14911308Santhony.gutierrez@amd.com line = lines[i] 15011308Santhony.gutierrez@amd.com if not authors: 15111308Santhony.gutierrez@amd.com match = authors_re.search(line) 15211308Santhony.gutierrez@amd.com if match: 15311308Santhony.gutierrez@amd.com authors.append(match.group(1).strip()) 15411308Santhony.gutierrez@amd.com else: 15511308Santhony.gutierrez@amd.com match = more_authors_re.search(line) 15611308Santhony.gutierrez@amd.com if not match: 15711308Santhony.gutierrez@amd.com for j in xrange(i, end+1): 15811308Santhony.gutierrez@amd.com line = lines[j].strip() 15911308Santhony.gutierrez@amd.com if not line: 16011308Santhony.gutierrez@amd.com end = j 16111308Santhony.gutierrez@amd.com break 16211308Santhony.gutierrez@amd.com if line.startswith('//'): 16311308Santhony.gutierrez@amd.com line = line[2:].lstrip() 16411308Santhony.gutierrez@amd.com if line: 16511308Santhony.gutierrez@amd.com end = j - 1 16611308Santhony.gutierrez@amd.com break 16711308Santhony.gutierrez@amd.com break 16811308Santhony.gutierrez@amd.com authors.append(match.group(1).strip()) 16911308Santhony.gutierrez@amd.com 17011308Santhony.gutierrez@amd.com info = (owner, dates, authors, start, end) 17111308Santhony.gutierrez@amd.com data.append(info) 17211308Santhony.gutierrez@amd.com 17311308Santhony.gutierrez@amd.com return data 17411308Santhony.gutierrez@amd.com 17511308Santhony.gutierrez@amd.comdef datestr(dates): 17611308Santhony.gutierrez@amd.com dates = list(dates) 17711308Santhony.gutierrez@amd.com dates.sort() 17811308Santhony.gutierrez@amd.com 17911308Santhony.gutierrez@amd.com output = [] 18011308Santhony.gutierrez@amd.com def add_output(first, second): 18111308Santhony.gutierrez@amd.com if first == second: 18211308Santhony.gutierrez@amd.com output.append('%d' % (first)) 18311308Santhony.gutierrez@amd.com else: 18411308Santhony.gutierrez@amd.com output.append('%d-%d' % (first, second)) 18511308Santhony.gutierrez@amd.com 18611308Santhony.gutierrez@amd.com first = dates.pop(0) 18711308Santhony.gutierrez@amd.com second = first 18811308Santhony.gutierrez@amd.com while dates: 18911308Santhony.gutierrez@amd.com next = dates.pop(0) 19011308Santhony.gutierrez@amd.com if next == second + 1: 19111308Santhony.gutierrez@amd.com second = next 19211308Santhony.gutierrez@amd.com else: 19311308Santhony.gutierrez@amd.com add_output(first, second) 19411308Santhony.gutierrez@amd.com first = next 19511308Santhony.gutierrez@amd.com second = next 19611308Santhony.gutierrez@amd.com 19711308Santhony.gutierrez@amd.com add_output(first, second) 19811308Santhony.gutierrez@amd.com 19911308Santhony.gutierrez@amd.com return ','.join(output) 20011308Santhony.gutierrez@amd.com 20111308Santhony.gutierrez@amd.comusage_str = """usage: 20211308Santhony.gutierrez@amd.com%s [-v] <directory>""" 20311308Santhony.gutierrez@amd.com 20411308Santhony.gutierrez@amd.comdef usage(exitcode): 20511308Santhony.gutierrez@amd.com print usage_str % sys.argv[0] 20611308Santhony.gutierrez@amd.com if exitcode is not None: 20711308Santhony.gutierrez@amd.com sys.exit(exitcode) 20811308Santhony.gutierrez@amd.com 20911308Santhony.gutierrez@amd.comif __name__ == '__main__': 21011308Santhony.gutierrez@amd.com import getopt 21111308Santhony.gutierrez@amd.com 21211308Santhony.gutierrez@amd.com show_counts = False 21311308Santhony.gutierrez@amd.com ignore = set() 21411308Santhony.gutierrez@amd.com verbose = False 21511308Santhony.gutierrez@amd.com try: 21611308Santhony.gutierrez@amd.com opts, args = getopt.getopt(sys.argv[1:], "ci:v") 21711308Santhony.gutierrez@amd.com except getopt.GetoptError: 21811308Santhony.gutierrez@amd.com usage(1) 21911308Santhony.gutierrez@amd.com 22011308Santhony.gutierrez@amd.com for o,a in opts: 22111308Santhony.gutierrez@amd.com if o == '-c': 22211308Santhony.gutierrez@amd.com show_counts = True 22311308Santhony.gutierrez@amd.com if o == '-i': 22411308Santhony.gutierrez@amd.com ignore.add(a) 22511308Santhony.gutierrez@amd.com if o == '-v': 22611308Santhony.gutierrez@amd.com verbose = True 22711308Santhony.gutierrez@amd.com 22811308Santhony.gutierrez@amd.com files = [] 22911435Smitch.hayenga@arm.com 23011308Santhony.gutierrez@amd.com for base in args: 23111308Santhony.gutierrez@amd.com if os.path.isfile(base): 23211308Santhony.gutierrez@amd.com files += [ (base, lang_type(base)) ] 23311308Santhony.gutierrez@amd.com elif os.path.isdir(base): 23411308Santhony.gutierrez@amd.com files += find_files(base) 23511308Santhony.gutierrez@amd.com else: 23611308Santhony.gutierrez@amd.com raise AttributeError, "can't access '%s'" % base 23711308Santhony.gutierrez@amd.com 23811308Santhony.gutierrez@amd.com copyrights = {} 23911308Santhony.gutierrez@amd.com counts = {} 24011308Santhony.gutierrez@amd.com 24111308Santhony.gutierrez@amd.com for filename, lang in files: 24211308Santhony.gutierrez@amd.com f = file(filename, 'r') 24311308Santhony.gutierrez@amd.com lines = f.readlines() 24411308Santhony.gutierrez@amd.com if not lines: 24511308Santhony.gutierrez@amd.com continue 24611308Santhony.gutierrez@amd.com 24711308Santhony.gutierrez@amd.com lines = [ line.rstrip('\r\n') for line in lines ] 24811308Santhony.gutierrez@amd.com 24911308Santhony.gutierrez@amd.com lt = lang_type(filename, lines[0]) 25011308Santhony.gutierrez@amd.com try: 25111308Santhony.gutierrez@amd.com data = get_data(lt, lines) 25211308Santhony.gutierrez@amd.com except Exception, e: 25311308Santhony.gutierrez@amd.com if verbose: 25411308Santhony.gutierrez@amd.com if len(e.args) == 1: 25511308Santhony.gutierrez@amd.com e.args = ('%s (%s))' % (e, filename), ) 25611308Santhony.gutierrez@amd.com print "could not parse %s: %s" % (filename, e) 25711308Santhony.gutierrez@amd.com continue 25811308Santhony.gutierrez@amd.com 25911308Santhony.gutierrez@amd.com for owner, dates, authors, start, end in data: 26011308Santhony.gutierrez@amd.com if owner not in copyrights: 26111308Santhony.gutierrez@amd.com copyrights[owner] = set() 26211308Santhony.gutierrez@amd.com if owner not in counts: 26311308Santhony.gutierrez@amd.com counts[owner] = 0 26411308Santhony.gutierrez@amd.com 26511308Santhony.gutierrez@amd.com copyrights[owner] |= dates 26611308Santhony.gutierrez@amd.com counts[owner] += 1 26711308Santhony.gutierrez@amd.com 26811308Santhony.gutierrez@amd.com info = [ (counts[o], d, o) for o,d in copyrights.items() ] 26911308Santhony.gutierrez@amd.com 27011308Santhony.gutierrez@amd.com for count,dates,owner in sorted(info, reverse=True): 27111308Santhony.gutierrez@amd.com if show_counts: 27211308Santhony.gutierrez@amd.com owner = '%s (%s files)' % (owner, count) 27311308Santhony.gutierrez@amd.com print 'Copyright (c) %s %s' % (datestr(dates), owner) 27411308Santhony.gutierrez@amd.com