copyright: Add code for finding all copyright blocks and create a COPYING file

The end of the COPYING file was generated with: % python ./util/find_copyrights.py configs src system tests util Update -C command line option to spit out COPYING file
2011-06-02 17:36:07 -07:00 · 2011-06-02 17:36:07 -07:00 · 0c424344fa
commit 0c424344fa
parent f656787edb
6 changed files with 326 additions and 127 deletions
--- a/109
+++ b/109
@ -1,109 +0,0 @@
-
-Nathan L. Binkert
-----------------------
-* Alpha full system support
-* Statistics package
-* Event queue
-* Pseudo instructions
-* Remote GDB facilities
-* PC sampling
-* Trace facilities
-* Tru64 support
-* Ethernet (Link, NSGIGE, Sinic) device support
-* PCI device support
-* Checkpoint framework
-* Configuration system
-
-Steven K. Reinhardt
-----------------------
-* Alpha support
-* ISA parsing
-* SWIG intergration
-* New memory system
-* New Caches
-* Simple CPU
-* Instruction tracing
-* PC sampling
-* Deprecated detailed CPU
-* Binary Loading
-* Configuration system
-
-Ali G. Saidi
-----------------------
-* SPARC Full System Support
-* Alpha Linux support
-* Alpha (Tsunami) platform and devices
-* I/O <-> memory interface
-* PCI device interface
-* Multiple ISA support
-* Ethernet (Intel NIC) device model
-* Memory bridge, bus, packet, port interfaces
-
-Kevin T. Lim
-----------------------
-* New CPU model
-* CPU checker
-* CPU class restructuring
-* Quiecsing/Draining
-
-Ronald G. Dreslinski Jr
-----------------------
-* Caches/Cache coherence
-* Prefetching
-* New memory system (port, request, packet, cache porting)
-* Tru64 MP support
-
-Lisa R. Hsu
-----------------------
-* DP83820 NIC device model
-* Kernel stats
-* Linux Dist disk image building (current)
-
-Gabriel Black
-----------------------
-* Multiple ISA support
-* SPARC ISA support
-* X86 ISA support
-* Alpha support reorganization
-* SPARC SE support
-* X86 SE support
-* Remote GDB in SE support
-* TLB based translation in SE
-* Statetrace debugging tool
-* Microcode system
-
-Korey L. Sewell
-----------------------
-* O3CPU SMT support
-* MIPS ISA support
-* Multiple ISA support in O3CPU
-
-Andrew L. Schultz
-----------------------
-* IDE controller/disk model
-* PCI devices interface
-* Linux Dist disk image building (deprecated)
-
-Erik G. Hallnor
-----------------------
-* Caches
-* Trace reader support
-* Checkpoint framework
-
-Steve E. Raasch
-----------------------
-* Deprecated CPU model
-* Generic CPU structures
-
-David Green
-----------------------
-* Deprecated CPU model
-* Caches
-
-Benjamin S. Nash
-----------------------
-* Alpha FreeBSD support
-
-Miguel J. Serrano
-----------------------
-* Alpha FreeBSD support
--- a/47
+++ b/47
@ -0,0 +1,47 @@
+Please see individual files for details of the license on each file.
+The preferred license can be found in LICENSE.
+
+All files in this distribution (other than in the ext directory) have
+licenses based on the BSD or MIT licenses.  Some files in the ext
+directory are GNU LGPL.  No other licenses are found in this
+distribution.
+
+Beyond the BSD license, some files include the following clarification
+of the license as required by the copyright holder:
+
+    The license below extends only to copyright in the software and
+    shall not be construed as granting a license to any other
+    intellectual property including but not limited to intellectual
+    property relating to a hardware implementation of the
+    functionality of the software licensed hereunder.  You may use the
+    software subject to the license terms below provided that you
+    ensure that this notice is replicated unmodified and in its
+    entirety in all distributions of the software, modified or
+    unmodified, in source code or in binary form.
+
+The copyright holders include (not counting the ext directory):
+
+Copyright (c) 2000-2011 The Regents of The University of Michigan
+Copyright (c) 1990,1993-1995,2007-2010 The Hewlett-Packard Development Company
+Copyright (c) 1999-2009,2011 Mark D. Hill and David A. Wood
+Copyright (c) 2009-2011 ARM Limited
+Copyright (c) 2008-2009 Princeton University
+Copyright (c) 2007 MIPS Technologies, Inc.
+Copyright (c) 2009-2011 Advanced Micro Devices, Inc.
+Copyright (c) 2009 The University of Edinburgh
+Copyright (c) 2007-2008 The Florida State University
+Copyright (c) 2010 Massachusetts Institute of Technology
+Copyright (c) 1990-1993 The Regents of the University of California
+Copyright (c) 2006-2009 Nathan Binkert
+Copyright (c) 2001 The NetBSD Foundation, Inc.
+Copyright (c) 2010-2011 Gabe Black
+Copyright (c) 1994 Adam Glass
+Copyright (c) 1990-1992 MIPS Computer Systems, Inc.
+Copyright (c) 2004 Richard J. Wagner
+Copyright (c) 2000 Computer Engineering and Communication Networks Lab
+Copyright (c) 2001 Eric Jackson
+Copyright (c) 1990 Hewlett-Packard Development Company
+Copyright (c) 1994-1996 Carnegie-Mellon University.
+Copyright (c) 1993-1994 Christopher G. Demetriou
+Copyright (c) 1997-2002 Makoto Matsumoto and Takuji Nishimura
+Copyright (c) 1998,2001 Manuel Bouyer.
--- a/2
+++ b/2
@ -1,4 +1,4 @@
-Copyright (c) 2000-2011 The Regents of The University of Michigan
+Copyright (c) <date> <copyright holder>
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
--- a/src/SConscript
+++ b/src/SConscript
@ -516,7 +516,7 @@ def makeInfoPyFile(target, source, env):

 # Generate a file that wraps the basic top level files
 env.Command('python/m5/info.py',
-            [ '#/AUTHORS', '#/LICENSE', '#/README', ],
+            [ '#/COPYING', '#/LICENSE', '#/README', ],
            MakeAction(makeInfoPyFile, Transform("INFO")))
 PySource('m5', 'python/m5/info.py')

--- a/src/python/m5/main.py
+++ b/src/python/m5/main.py
@ -36,11 +36,8 @@ __all__ = [ 'options', 'arguments', 'main' ]

 usage="%prog [gem5 options] script.py [script options]"
 version="%prog 2.0"
-brief_copyright='''
-Copyright (c) 2001-2011
-The Regents of The University of Michigan
-All Rights Reserved
-'''
+brief_copyright=\
+    "gem5 is copyrighted software; use the --copyright option for details."

 def parse_options():
    import config
@ -52,8 +49,6 @@ def parse_options():
    group = options.set_group

    # Help options
-    option('-A', "--authors", action="store_true", default=False,
-        help="Show author information")
    option('-B', "--build-info", action="store_true", default=False,
        help="Show build information")
    option('-C', "--copyright", action="store_true", default=False,
@ -211,14 +206,7 @@ def main(*args):

    if options.copyright:
        done = True
-        print info.LICENSE
-        print
-
-    if options.authors:
-        done = True
-        print 'Author information:'
-        print
-        print info.AUTHORS
+        print info.COPYING
        print

    if options.readme:
@ -263,7 +251,7 @@ def main(*args):

    verbose = options.verbose - options.quiet
    if options.verbose >= 0:
-        print "gem5 Simulator System"
+        print "gem5 Simulator System.  http://gem5.org"
        print brief_copyright
        print

--- a/util/find_copyrights.py
+++ b/util/find_copyrights.py
@ -0,0 +1,273 @@
+#!/usr/bin/env python
+
+import os
+import re
+import sys
+
+from file_types import lang_type, find_files
+
+mode_line = re.compile('(-\*- *mode:.* *-\*-)')
+shell_comment = re.compile(r'^\s*#')
+lisp_comment = re.compile(r';')
+cpp_comment = re.compile(r'//')
+c_comment_start = re.compile(r'/\*')
+c_comment_end   = re.compile(r'\*/')
+def find_copyright_block(lines, lang_type):
+    start = None
+    if lang_type in ('python', 'make', 'shell', 'perl', 'scons'):
+        for i,line in enumerate(lines):
+            if i == 0 and (line.startswith('#!') or mode_line.search(line)):
+                continue
+
+            if shell_comment.search(line):
+                if start is None:
+                    start = i
+            elif start is None:
+                if line.strip():
+                    return
+            else:
+                yield start, i-1
+                start = None
+
+    elif lang_type in ('lisp', ):
+        for i,line in enumerate(lines):
+            if i == 0 and mode_line.search(line):
+                continue
+
+            if lisp_comment.search(line):
+                if start is None:
+                    start = i
+            elif start is None:
+                if line.strip():
+                    return
+            else:
+                yield start, i-1
+                start = None
+
+    elif lang_type in ('C', 'C++', 'swig', 'isa', 'asm', 'slicc',
+                       'lex', 'yacc'):
+        mode = None
+        for i,line in enumerate(lines):
+            if i == 0 and mode_line.search(line):
+                continue
+
+            if mode == 'C':
+                assert start is not None, 'on line %d' % (i + 1)
+                match = c_comment_end.search(line)
+                if match:
+                    yield start, i
+                    mode = None
+                continue
+
+            cpp_match = cpp_comment.search(line)
+            c_match = c_comment_start.search(line)
+
+            if cpp_match:
+                assert not c_match, 'on line %d' % (i + 1)
+                if line[:cpp_match.start()].strip():
+                    return
+                if mode is None:
+                    mode = 'CPP'
+                    start = i
+                else:
+                    text = line[cpp_match.end():].lstrip()
+                    if text.startswith("Copyright") > 0:
+                        yield start, i-1
+                        start = i
+                continue
+            elif mode == 'CPP':
+                assert start is not None, 'on line %d' % (i + 1)
+                if not line.strip():
+                    continue
+                yield start, i-1
+                mode = None
+                if not c_match:
+                    return
+
+            if c_match:
+                assert mode is None, 'on line %d' % (i + 1)
+                mode = 'C'
+                start = i
+
+            if mode is None and line.strip():
+                return
+
+    else:
+        raise AttributeError, "Could not handle language %s" % lang_type
+
+date_range_re = re.compile(r'([0-9]{4})\s*-\s*([0-9]{4})')
+def process_dates(dates):
+    dates = [ d.strip() for d in dates.split(',') ]
+
+    output = set()
+    for date in dates:
+        match = date_range_re.match(date)
+        if match:
+            f,l = [ int(d) for d in match.groups() ]
+            for i in xrange(f, l+1):
+                output.add(i)
+        else:
+            try:
+                date = int(date)
+                output.add(date)
+            except ValueError:
+                pass
+
+    return output
+
+copyright_re = \
+    re.compile(r'Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)',
+               re.DOTALL)
+
+authors_re = re.compile(r'^[\s*#/]*Authors:\s*([A-z .]+)\s*$')
+more_authors_re = re.compile(r'^[\s*#/]*([A-z .]+)\s*$')
+
+all_owners = set()
+def get_data(lang_type, lines):
+    data = []
+    last = None
+    for start,end in find_copyright_block(lines, lang_type):
+        joined = ''.join(lines[start:end+1])
+        match = copyright_re.search(joined)
+        if not match:
+            continue
+
+        c,dates,owner = match.groups()
+        dates = dates.strip()
+        owner = owner.strip()
+
+        all_owners.add(owner)
+        try:
+            dates = process_dates(dates)
+        except Exception:
+            print dates
+            print owner
+            raise
+
+        authors = []
+        for i in xrange(start,end+1):
+            line = lines[i]
+            if not authors:
+                match = authors_re.search(line)
+                if match:
+                    authors.append(match.group(1).strip())
+            else:
+                match = more_authors_re.search(line)
+                if not match:
+                    for j in xrange(i, end+1):
+                        line = lines[j].strip()
+                        if not line:
+                            end = j
+                            break
+                        if line.startswith('//'):
+                            line = line[2:].lstrip()
+                            if line:
+                                end = j - 1
+                                break
+                    break
+                authors.append(match.group(1).strip())
+
+        info = (owner, dates, authors, start, end)
+        data.append(info)
+
+    return data
+
+def datestr(dates):
+    dates = list(dates)
+    dates.sort()
+
+    output = []
+    def add_output(first, second):
+        if first == second:
+            output.append('%d' % (first))
+        else:
+            output.append('%d-%d' % (first, second))
+
+    first = dates.pop(0)
+    second = first
+    while dates:
+        next = dates.pop(0)
+        if next == second + 1:
+            second = next
+        else:
+            add_output(first, second)
+            first = next
+            second = next
+
+    add_output(first, second)
+
+    return ','.join(output)
+
+usage_str = """usage:
+%s [-v] <directory>"""
+
+def usage(exitcode):
+    print usage_str % sys.argv[0]
+    if exitcode is not None:
+        sys.exit(exitcode)
+
+if __name__ == '__main__':
+    import getopt
+
+    show_counts = False
+    ignore = set()
+    verbose = False
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "ci:v")
+    except getopt.GetoptError:
+        usage(1)
+
+    for o,a in opts:
+        if o == '-c':
+            show_counts = True
+        if o == '-i':
+            ignore.add(a)
+        if o == '-v':
+            verbose = True
+
+    files = []
+
+    for base in args:
+        if os.path.isfile(base):
+            files += [ (base, lang_type(base)) ]
+        elif os.path.isdir(base):
+            files += find_files(base)
+        else:
+            raise AttributeError, "can't access '%s'" %  base
+
+    copyrights = {}
+    counts = {}
+
+    for filename, lang in files:
+        f = file(filename, 'r')
+        lines = f.readlines()
+        if not lines:
+            continue
+
+        lines = [ line.rstrip('\r\n') for line in lines ]
+
+        lt = lang_type(filename, lines[0])
+        try:
+            data = get_data(lt, lines)
+        except Exception, e:
+            if verbose:
+                if len(e.args) == 1:
+                    e.args = ('%s (%s))' % (e, filename), )
+                print "could not parse %s: %s" % (filename, e)
+            continue
+
+        for owner, dates, authors, start, end in data:
+            if owner not in copyrights:
+                copyrights[owner] = set()
+            if owner not in counts:
+                counts[owner] = 0
+
+            copyrights[owner] |= dates
+            counts[owner] += 1
+
+    info = [ (counts[o], d, o) for o,d in copyrights.items() ]
+
+    for count,dates,owner in sorted(info, reverse=True):
+        if show_counts:
+            owner = '%s (%s files)' % (owner, count)
+        print 'Copyright (c) %s %s' % (datestr(dates), owner)