From e5ecfde222d6b76de7320750c219960e6f6ec3ca Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Fri, 15 Apr 2011 10:43:06 -0700 Subject: [PATCH] util: python implementation of a routine that will sort includes I didn't realize that the perl version existed when I started this, this version has a lot more features than the previous one since it will sort and separate python, system, and m5 headers in separate groups, it will remove duplicates, it will also convert c headers to stl headers --- util/file_types.py | 82 ++++++++++++++++ util/sort-includes | 91 ----------------- util/sort_includes.py | 220 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 302 insertions(+), 91 deletions(-) delete mode 100755 util/sort-includes create mode 100644 util/sort_includes.py diff --git a/util/file_types.py b/util/file_types.py index 8fc2b1af4..85e058db0 100644 --- a/util/file_types.py +++ b/util/file_types.py @@ -87,3 +87,85 @@ def lang_type(filename, firstline=None, openok=True): # sorry, we couldn't detect the language return None + +# directories and files to ignore by default +default_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext')) +default_file_ignore = frozenset(('parsetab.py', )) + +def find_files(base, languages=all_languages, + dir_ignore=default_dir_ignore, + file_ignore=default_file_ignore): + '''find all files in a directory and its subdirectories based on a + set of languages, ignore directories specified in dir_ignore and + files specified in file_ignore''' + if base[-1] != '/': + base += '/' + + def update_dirs(dirs): + '''strip the ignored directories out of the provided list''' + index = len(dirs) - 1 + for i,d in enumerate(reversed(dirs)): + if d in dir_ignore: + del dirs[index - i] + + # walk over base + for root,dirs,files in os.walk(base): + root = root.replace(base, '', 1) + + # strip ignored directories from the list + update_dirs(dirs) + + for filename in files: + if filename in file_ignore: + # skip ignored files + continue + + # try to figure out the language of the specified file + fullpath = os.path.join(base, root, filename) + language = lang_type(fullpath) + + # if the file is one of the langauges that we want return + # its name and the language + if language in languages: + yield fullpath, language + +def update_file(dst, src, language, mutator): + '''update a file of the specified language with the provided + mutator generator. If inplace is provided, update the file in + place and return the handle to the updated file. If inplace is + false, write the updated file to cStringIO''' + + # if the source and destination are the same, we're updating in place + inplace = dst == src + + if isinstance(src, str): + # if a filename was provided, open the file + mode = 'r+' if inplace else 'r' + src = file(src, mode) + + orig_lines = [] + + # grab all of the lines of the file and strip them of their line ending + old_lines = list(line.rstrip('\r\n') for line in src.xreadlines()) + new_lines = list(mutator(old_lines, src.name, language)) + + for line in src.xreadlines(): + line = line + + if inplace: + # if we're updating in place and the file hasn't changed, do nothing + if old_lines == new_lines: + return + + # otherwise, truncate the file and seek to the beginning. + dst = src + dst.truncate(0) + dst.seek(0) + elif isinstance(dst, str): + # if we're not updating in place and a destination file name + # was provided, create a file object + dst = file(dst, 'w') + + for line in new_lines: + dst.write(line) + dst.write('\n') diff --git a/util/sort-includes b/util/sort-includes deleted file mode 100755 index 8ae40be52..000000000 --- a/util/sort-includes +++ /dev/null @@ -1,91 +0,0 @@ -#! /usr/bin/env perl -# Copyright (c) 2003 The Regents of The University of Michigan -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Authors: Steve Reinhardt - -open (FOO, "<$ARGV[0]") or die; - -# Suck in everything before the first include -# (line-by-line into @before list). -while (($_ = ) && !/^#include/) { - push @before, $_; -} - -#print join("", @before); -#print "##########################\n"; - -# Suck in include lines into @includes list. -# Skip blank lines (keep processing, but don't put in @includes). -# End on first non-blank, non-include line. -# Note that this means that files with comments or #ifdefs -# interspersed among their #includes will only get the initial -# set of #includes sorted. -do { - push @includes, $_ unless /^\s*$/; -} while (($_ = ) && /^#include|^\s*$/); - -# Now sort the includes. This simple ordering function -# puts system includes first, followed by non-system includes. -# Within each group the sort is alphabetical. -# We may want something a little more sophisticated. -# Personally, I'd like to see something like: -# - header files from sys subdir -# <*.h> - other system headers -# <*> - STL headers -# "base/*" - M5 base headers -# "sim/*" - M5 sim headers -# "*" - other M5 headers -# ...but I didn't have the energy to code that up. -sub sortorder { - my $sysa = ($a =~ /<.*>/); - my $sysb = ($b =~ /<.*>/); - return -1 if ($sysa && !$sysb); - return 1 if ($sysb && !$sysa); - return $a cmp $b; -} - -@includes = sort sortorder @includes; -#print join("", @includes); -#print "##########################\n"; - -# Put everything after the includes in the @after list. -do { - push @after, $_; - if (/^#include/) { - print "$ARGV[0]: "; - print $after[0]; - exit 0; - } -} while ($_ = ); - -#print join("", @after); -#print "##########################\n"; - -# Print out the file with sorted includes. - -print join("", @before, @includes, @after); - diff --git a/util/sort_includes.py b/util/sort_includes.py new file mode 100644 index 000000000..15d1f2788 --- /dev/null +++ b/util/sort_includes.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python + +import os +import re +import sys + +from file_types import * + +cpp_c_headers = { + 'assert.h' : 'cassert', + 'ctype.h' : 'cctype', + 'errno.h' : 'cerrno', + 'float.h' : 'cfloat', + 'limits.h' : 'climits', + 'locale.h' : 'clocale', + 'math.h' : 'cmath', + 'setjmp.h' : 'csetjmp', + 'signal.h' : 'csignal', + 'stdarg.h' : 'cstdarg', + 'stddef.h' : 'cstddef', + 'stdio.h' : 'cstdio', + 'stdlib.h' : 'cstdlib', + 'string.h' : 'cstring', + 'time.h' : 'ctime', + 'wchar.h' : 'cwchar', + 'wctype.h' : 'cwctype', +} + +include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]') +def include_key(line): + '''Mark directories with a leading space so directories + are sorted before files''' + + match = include_re.match(line) + assert match, line + keyword = match.group(2) + include = match.group(3) + + # Everything but the file part needs to have a space prepended + parts = include.split('/') + if len(parts) == 2 and parts[0] == 'dnet': + # Don't sort the dnet includes with respect to each other, but + # make them sorted with respect to non dnet includes. Python + # guarantees that sorting is stable, so just clear the + # basename part of the filename. + parts[1] = ' ' + parts[0:-1] = [ ' ' + s for s in parts[0:-1] ] + key = '/'.join(parts) + + return key + +class SortIncludes(object): + # different types of includes for different sorting of headers + # - Python header needs to be first if it exists + # <*.h> - system headers (directories before files) + # <*> - STL headers + # <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files) + # "*" - M5 headers (directories before files) + includes_re = ( + ('python', '<>', r'^(#include)[ \t]+<(Python.*\.h)>(.*)'), + ('c', '<>', r'^(#include)[ \t]<(.+\.h)>(.*)'), + ('stl', '<>', r'^(#include)[ \t]+<([0-9A-z_]+)>(.*)'), + ('cc', '<>', r'^(#include)[ \t]+<([0-9A-z_]+\.(hh|hxx|hpp|H))>(.*)'), + ('m5cc', '""', r'^(#include)[ \t]"(.+\.h{1,2})"(.*)'), + ('swig0', '<>', r'^(%import)[ \t]<(.+)>(.*)'), + ('swig1', '<>', r'^(%include)[ \t]<(.+)>(.*)'), + ('swig2', '""', r'^(%import)[ \t]"(.+)"(.*)'), + ('swig3', '""', r'^(%include)[ \t]"(.+)"(.*)'), + ) + + # compile the regexes + includes_re = tuple((a, b, re.compile(c)) for a,b,c in includes_re) + + def __init__(self): + self.reset() + + def reset(self): + # clear all stored headers + self.includes = {} + for include_type,_,_ in self.includes_re: + self.includes[include_type] = [] + + def dump_block(self): + '''dump the includes''' + first = True + for include,_,_ in self.includes_re: + if not self.includes[include]: + continue + + if not first: + # print a newline between groups of + # include types + yield '' + first = False + + # print out the includes in the current group + # and sort them according to include_key() + prev = None + for l in sorted(self.includes[include], + key=include_key): + if l != prev: + yield l + prev = l + + def __call__(self, lines, filename, language): + leading_blank = False + blanks = 0 + block = False + + for line in lines: + if not line: + blanks += 1 + if not block: + # if we're not in an include block, spit out the + # newline otherwise, skip it since we're going to + # control newlines withinin include block + yield '' + continue + + # Try to match each of the include types + for include_type,(ldelim,rdelim),include_re in self.includes_re: + match = include_re.match(line) + if not match: + continue + + # if we've got a match, clean up the #include line, + # fix up stl headers and store it in the proper category + groups = match.groups() + keyword = groups[0] + include = groups[1] + extra = groups[-1] + if include_type == 'c' and language == 'C++': + stl_inc = cpp_c_headers.get(include, None) + if stl_inc: + include = stl_inc + include_type = 'stl' + + line = keyword + ' ' + ldelim + include + rdelim + extra + + self.includes[include_type].append(line) + + # We've entered a block, don't keep track of blank + # lines while in a block + block = True + blanks = 0 + break + else: + # this line did not match a #include + assert not include_re.match(line) + + # if we're not in a block and we didn't match an include + # to enter a block, just emit the line and continue + if not block: + yield line + continue + + # We've exited an include block. + for block_line in self.dump_block(): + yield block_line + + # if there are any newlines after the include block, + # emit a single newline (removing extras) + if blanks and block: + yield '' + + blanks = 0 + block = False + self.reset() + + # emit the line that ended the block + yield line + + if block: + # We've exited an include block. + for block_line in self.dump_block(): + yield block_line + + + +# default language types to try to apply our sorting rules to +default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig')) + +def options(): + import optparse + options = optparse.OptionParser() + add_option = options.add_option + add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string', + default=','.join(default_dir_ignore), + help="ignore directories") + add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string', + default=','.join(default_file_ignore), + help="ignore files") + add_option('-l', '--languages', metavar="LANG[,LANG]", type='string', + default=','.join(default_languages), + help="languages") + add_option('-n', '--dry-run', action='store_true', + help="don't overwrite files") + + return options + +def parse_args(parser): + opts,args = parser.parse_args() + + opts.dir_ignore = frozenset(opts.dir_ignore.split(',')) + opts.file_ignore = frozenset(opts.file_ignore.split(',')) + opts.languages = frozenset(opts.languages.split(',')) + + return opts,args + +if __name__ == '__main__': + parser = options() + opts, args = parse_args(parser) + + for base in args: + for filename,language in find_files(base, languages=opts.languages, + file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore): + if opts.dry_run: + print "%s: %s" % (filename, language) + else: + update_file(filename, filename, language, SortIncludes())