2017-02-10 16:00:18 +01:00
|
|
|
#!/usr/bin/env python2
|
2011-06-03 02:36:07 +02:00
|
|
|
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
|
|
|
|
from file_types import lang_type, find_files
|
|
|
|
|
|
|
|
mode_line = re.compile('(-\*- *mode:.* *-\*-)')
|
|
|
|
shell_comment = re.compile(r'^\s*#')
|
|
|
|
lisp_comment = re.compile(r';')
|
|
|
|
cpp_comment = re.compile(r'//')
|
|
|
|
c_comment_start = re.compile(r'/\*')
|
|
|
|
c_comment_end = re.compile(r'\*/')
|
|
|
|
def find_copyright_block(lines, lang_type):
|
|
|
|
start = None
|
|
|
|
if lang_type in ('python', 'make', 'shell', 'perl', 'scons'):
|
|
|
|
for i,line in enumerate(lines):
|
|
|
|
if i == 0 and (line.startswith('#!') or mode_line.search(line)):
|
|
|
|
continue
|
|
|
|
|
|
|
|
if shell_comment.search(line):
|
|
|
|
if start is None:
|
|
|
|
start = i
|
|
|
|
elif start is None:
|
|
|
|
if line.strip():
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
yield start, i-1
|
|
|
|
start = None
|
|
|
|
|
|
|
|
elif lang_type in ('lisp', ):
|
|
|
|
for i,line in enumerate(lines):
|
|
|
|
if i == 0 and mode_line.search(line):
|
|
|
|
continue
|
|
|
|
|
|
|
|
if lisp_comment.search(line):
|
|
|
|
if start is None:
|
|
|
|
start = i
|
|
|
|
elif start is None:
|
|
|
|
if line.strip():
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
yield start, i-1
|
|
|
|
start = None
|
|
|
|
|
|
|
|
elif lang_type in ('C', 'C++', 'swig', 'isa', 'asm', 'slicc',
|
|
|
|
'lex', 'yacc'):
|
|
|
|
mode = None
|
|
|
|
for i,line in enumerate(lines):
|
|
|
|
if i == 0 and mode_line.search(line):
|
|
|
|
continue
|
|
|
|
|
|
|
|
if mode == 'C':
|
|
|
|
assert start is not None, 'on line %d' % (i + 1)
|
|
|
|
match = c_comment_end.search(line)
|
|
|
|
if match:
|
|
|
|
yield start, i
|
|
|
|
mode = None
|
|
|
|
continue
|
|
|
|
|
|
|
|
cpp_match = cpp_comment.search(line)
|
|
|
|
c_match = c_comment_start.search(line)
|
|
|
|
|
|
|
|
if cpp_match:
|
|
|
|
assert not c_match, 'on line %d' % (i + 1)
|
|
|
|
if line[:cpp_match.start()].strip():
|
|
|
|
return
|
|
|
|
if mode is None:
|
|
|
|
mode = 'CPP'
|
|
|
|
start = i
|
|
|
|
else:
|
|
|
|
text = line[cpp_match.end():].lstrip()
|
|
|
|
if text.startswith("Copyright") > 0:
|
|
|
|
yield start, i-1
|
|
|
|
start = i
|
|
|
|
continue
|
|
|
|
elif mode == 'CPP':
|
|
|
|
assert start is not None, 'on line %d' % (i + 1)
|
|
|
|
if not line.strip():
|
|
|
|
continue
|
|
|
|
yield start, i-1
|
|
|
|
mode = None
|
|
|
|
if not c_match:
|
|
|
|
return
|
|
|
|
|
|
|
|
if c_match:
|
|
|
|
assert mode is None, 'on line %d' % (i + 1)
|
|
|
|
mode = 'C'
|
|
|
|
start = i
|
|
|
|
|
|
|
|
if mode is None and line.strip():
|
|
|
|
return
|
|
|
|
|
|
|
|
else:
|
|
|
|
raise AttributeError, "Could not handle language %s" % lang_type
|
|
|
|
|
|
|
|
date_range_re = re.compile(r'([0-9]{4})\s*-\s*([0-9]{4})')
|
|
|
|
def process_dates(dates):
|
|
|
|
dates = [ d.strip() for d in dates.split(',') ]
|
|
|
|
|
|
|
|
output = set()
|
|
|
|
for date in dates:
|
|
|
|
match = date_range_re.match(date)
|
|
|
|
if match:
|
|
|
|
f,l = [ int(d) for d in match.groups() ]
|
|
|
|
for i in xrange(f, l+1):
|
|
|
|
output.add(i)
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
date = int(date)
|
|
|
|
output.add(date)
|
|
|
|
except ValueError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
return output
|
|
|
|
|
|
|
|
copyright_re = \
|
|
|
|
re.compile(r'Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)',
|
|
|
|
re.DOTALL)
|
|
|
|
|
|
|
|
authors_re = re.compile(r'^[\s*#/]*Authors:\s*([A-z .]+)\s*$')
|
|
|
|
more_authors_re = re.compile(r'^[\s*#/]*([A-z .]+)\s*$')
|
|
|
|
|
|
|
|
all_owners = set()
|
|
|
|
def get_data(lang_type, lines):
|
|
|
|
data = []
|
|
|
|
last = None
|
|
|
|
for start,end in find_copyright_block(lines, lang_type):
|
|
|
|
joined = ''.join(lines[start:end+1])
|
|
|
|
match = copyright_re.search(joined)
|
|
|
|
if not match:
|
|
|
|
continue
|
|
|
|
|
|
|
|
c,dates,owner = match.groups()
|
|
|
|
dates = dates.strip()
|
|
|
|
owner = owner.strip()
|
|
|
|
|
|
|
|
all_owners.add(owner)
|
|
|
|
try:
|
|
|
|
dates = process_dates(dates)
|
|
|
|
except Exception:
|
|
|
|
print dates
|
|
|
|
print owner
|
|
|
|
raise
|
|
|
|
|
|
|
|
authors = []
|
|
|
|
for i in xrange(start,end+1):
|
|
|
|
line = lines[i]
|
|
|
|
if not authors:
|
|
|
|
match = authors_re.search(line)
|
|
|
|
if match:
|
|
|
|
authors.append(match.group(1).strip())
|
|
|
|
else:
|
|
|
|
match = more_authors_re.search(line)
|
|
|
|
if not match:
|
|
|
|
for j in xrange(i, end+1):
|
|
|
|
line = lines[j].strip()
|
|
|
|
if not line:
|
|
|
|
end = j
|
|
|
|
break
|
|
|
|
if line.startswith('//'):
|
|
|
|
line = line[2:].lstrip()
|
|
|
|
if line:
|
|
|
|
end = j - 1
|
|
|
|
break
|
|
|
|
break
|
|
|
|
authors.append(match.group(1).strip())
|
|
|
|
|
|
|
|
info = (owner, dates, authors, start, end)
|
|
|
|
data.append(info)
|
|
|
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
def datestr(dates):
|
|
|
|
dates = list(dates)
|
|
|
|
dates.sort()
|
|
|
|
|
|
|
|
output = []
|
|
|
|
def add_output(first, second):
|
|
|
|
if first == second:
|
|
|
|
output.append('%d' % (first))
|
|
|
|
else:
|
|
|
|
output.append('%d-%d' % (first, second))
|
|
|
|
|
|
|
|
first = dates.pop(0)
|
|
|
|
second = first
|
|
|
|
while dates:
|
|
|
|
next = dates.pop(0)
|
|
|
|
if next == second + 1:
|
|
|
|
second = next
|
|
|
|
else:
|
|
|
|
add_output(first, second)
|
|
|
|
first = next
|
|
|
|
second = next
|
|
|
|
|
|
|
|
add_output(first, second)
|
|
|
|
|
|
|
|
return ','.join(output)
|
|
|
|
|
|
|
|
usage_str = """usage:
|
|
|
|
%s [-v] <directory>"""
|
|
|
|
|
|
|
|
def usage(exitcode):
|
|
|
|
print usage_str % sys.argv[0]
|
|
|
|
if exitcode is not None:
|
|
|
|
sys.exit(exitcode)
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
import getopt
|
|
|
|
|
|
|
|
show_counts = False
|
|
|
|
ignore = set()
|
|
|
|
verbose = False
|
|
|
|
try:
|
|
|
|
opts, args = getopt.getopt(sys.argv[1:], "ci:v")
|
|
|
|
except getopt.GetoptError:
|
|
|
|
usage(1)
|
|
|
|
|
|
|
|
for o,a in opts:
|
|
|
|
if o == '-c':
|
|
|
|
show_counts = True
|
|
|
|
if o == '-i':
|
|
|
|
ignore.add(a)
|
|
|
|
if o == '-v':
|
|
|
|
verbose = True
|
|
|
|
|
|
|
|
files = []
|
|
|
|
|
|
|
|
for base in args:
|
|
|
|
if os.path.isfile(base):
|
|
|
|
files += [ (base, lang_type(base)) ]
|
|
|
|
elif os.path.isdir(base):
|
|
|
|
files += find_files(base)
|
|
|
|
else:
|
|
|
|
raise AttributeError, "can't access '%s'" % base
|
|
|
|
|
|
|
|
copyrights = {}
|
|
|
|
counts = {}
|
|
|
|
|
|
|
|
for filename, lang in files:
|
|
|
|
f = file(filename, 'r')
|
|
|
|
lines = f.readlines()
|
|
|
|
if not lines:
|
|
|
|
continue
|
|
|
|
|
|
|
|
lines = [ line.rstrip('\r\n') for line in lines ]
|
|
|
|
|
|
|
|
lt = lang_type(filename, lines[0])
|
|
|
|
try:
|
|
|
|
data = get_data(lt, lines)
|
|
|
|
except Exception, e:
|
|
|
|
if verbose:
|
|
|
|
if len(e.args) == 1:
|
|
|
|
e.args = ('%s (%s))' % (e, filename), )
|
|
|
|
print "could not parse %s: %s" % (filename, e)
|
|
|
|
continue
|
|
|
|
|
|
|
|
for owner, dates, authors, start, end in data:
|
|
|
|
if owner not in copyrights:
|
|
|
|
copyrights[owner] = set()
|
|
|
|
if owner not in counts:
|
|
|
|
counts[owner] = 0
|
|
|
|
|
|
|
|
copyrights[owner] |= dates
|
|
|
|
counts[owner] += 1
|
|
|
|
|
|
|
|
info = [ (counts[o], d, o) for o,d in copyrights.items() ]
|
|
|
|
|
|
|
|
for count,dates,owner in sorted(info, reverse=True):
|
|
|
|
if show_counts:
|
|
|
|
owner = '%s (%s files)' % (owner, count)
|
|
|
|
print 'Copyright (c) %s %s' % (datestr(dates), owner)
|