util: python implementation of a routine that will sort includes

I didn't realize that the perl version existed when I started this,
this version has a lot more features than the previous one since it will
sort and separate python, system, and m5 headers in separate groups, it
will remove duplicates, it will also convert c headers to stl headers
This commit is contained in:
Nathan Binkert 2011-04-15 10:43:06 -07:00
parent 07815c3379
commit e5ecfde222
3 changed files with 302 additions and 91 deletions

View file

@ -87,3 +87,85 @@ def lang_type(filename, firstline=None, openok=True):
# sorry, we couldn't detect the language
return None
# directories and files to ignore by default
default_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext'))
default_file_ignore = frozenset(('parsetab.py', ))
def find_files(base, languages=all_languages,
dir_ignore=default_dir_ignore,
file_ignore=default_file_ignore):
'''find all files in a directory and its subdirectories based on a
set of languages, ignore directories specified in dir_ignore and
files specified in file_ignore'''
if base[-1] != '/':
base += '/'
def update_dirs(dirs):
'''strip the ignored directories out of the provided list'''
index = len(dirs) - 1
for i,d in enumerate(reversed(dirs)):
if d in dir_ignore:
del dirs[index - i]
# walk over base
for root,dirs,files in os.walk(base):
root = root.replace(base, '', 1)
# strip ignored directories from the list
update_dirs(dirs)
for filename in files:
if filename in file_ignore:
# skip ignored files
continue
# try to figure out the language of the specified file
fullpath = os.path.join(base, root, filename)
language = lang_type(fullpath)
# if the file is one of the langauges that we want return
# its name and the language
if language in languages:
yield fullpath, language
def update_file(dst, src, language, mutator):
'''update a file of the specified language with the provided
mutator generator. If inplace is provided, update the file in
place and return the handle to the updated file. If inplace is
false, write the updated file to cStringIO'''
# if the source and destination are the same, we're updating in place
inplace = dst == src
if isinstance(src, str):
# if a filename was provided, open the file
mode = 'r+' if inplace else 'r'
src = file(src, mode)
orig_lines = []
# grab all of the lines of the file and strip them of their line ending
old_lines = list(line.rstrip('\r\n') for line in src.xreadlines())
new_lines = list(mutator(old_lines, src.name, language))
for line in src.xreadlines():
line = line
if inplace:
# if we're updating in place and the file hasn't changed, do nothing
if old_lines == new_lines:
return
# otherwise, truncate the file and seek to the beginning.
dst = src
dst.truncate(0)
dst.seek(0)
elif isinstance(dst, str):
# if we're not updating in place and a destination file name
# was provided, create a file object
dst = file(dst, 'w')
for line in new_lines:
dst.write(line)
dst.write('\n')

View file

@ -1,91 +0,0 @@
#! /usr/bin/env perl
# Copyright (c) 2003 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Steve Reinhardt
open (FOO, "<$ARGV[0]") or die;
# Suck in everything before the first include
# (line-by-line into @before list).
while (($_ = <FOO>) && !/^#include/) {
push @before, $_;
}
#print join("", @before);
#print "##########################\n";
# Suck in include lines into @includes list.
# Skip blank lines (keep processing, but don't put in @includes).
# End on first non-blank, non-include line.
# Note that this means that files with comments or #ifdefs
# interspersed among their #includes will only get the initial
# set of #includes sorted.
do {
push @includes, $_ unless /^\s*$/;
} while (($_ = <FOO>) && /^#include|^\s*$/);
# Now sort the includes. This simple ordering function
# puts system includes first, followed by non-system includes.
# Within each group the sort is alphabetical.
# We may want something a little more sophisticated.
# Personally, I'd like to see something like:
# <sys/*.h> - header files from sys subdir
# <*.h> - other system headers
# <*> - STL headers
# "base/*" - M5 base headers
# "sim/*" - M5 sim headers
# "*" - other M5 headers
# ...but I didn't have the energy to code that up.
sub sortorder {
my $sysa = ($a =~ /<.*>/);
my $sysb = ($b =~ /<.*>/);
return -1 if ($sysa && !$sysb);
return 1 if ($sysb && !$sysa);
return $a cmp $b;
}
@includes = sort sortorder @includes;
#print join("", @includes);
#print "##########################\n";
# Put everything after the includes in the @after list.
do {
push @after, $_;
if (/^#include/) {
print "$ARGV[0]: ";
print $after[0];
exit 0;
}
} while ($_ = <FOO>);
#print join("", @after);
#print "##########################\n";
# Print out the file with sorted includes.
print join("", @before, @includes, @after);

220
util/sort_includes.py Normal file
View file

@ -0,0 +1,220 @@
#!/usr/bin/env python
import os
import re
import sys
from file_types import *
cpp_c_headers = {
'assert.h' : 'cassert',
'ctype.h' : 'cctype',
'errno.h' : 'cerrno',
'float.h' : 'cfloat',
'limits.h' : 'climits',
'locale.h' : 'clocale',
'math.h' : 'cmath',
'setjmp.h' : 'csetjmp',
'signal.h' : 'csignal',
'stdarg.h' : 'cstdarg',
'stddef.h' : 'cstddef',
'stdio.h' : 'cstdio',
'stdlib.h' : 'cstdlib',
'string.h' : 'cstring',
'time.h' : 'ctime',
'wchar.h' : 'cwchar',
'wctype.h' : 'cwctype',
}
include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
def include_key(line):
'''Mark directories with a leading space so directories
are sorted before files'''
match = include_re.match(line)
assert match, line
keyword = match.group(2)
include = match.group(3)
# Everything but the file part needs to have a space prepended
parts = include.split('/')
if len(parts) == 2 and parts[0] == 'dnet':
# Don't sort the dnet includes with respect to each other, but
# make them sorted with respect to non dnet includes. Python
# guarantees that sorting is stable, so just clear the
# basename part of the filename.
parts[1] = ' '
parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
key = '/'.join(parts)
return key
class SortIncludes(object):
# different types of includes for different sorting of headers
# <Python.h> - Python header needs to be first if it exists
# <*.h> - system headers (directories before files)
# <*> - STL headers
# <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
# "*" - M5 headers (directories before files)
includes_re = (
('python', '<>', r'^(#include)[ \t]+<(Python.*\.h)>(.*)'),
('c', '<>', r'^(#include)[ \t]<(.+\.h)>(.*)'),
('stl', '<>', r'^(#include)[ \t]+<([0-9A-z_]+)>(.*)'),
('cc', '<>', r'^(#include)[ \t]+<([0-9A-z_]+\.(hh|hxx|hpp|H))>(.*)'),
('m5cc', '""', r'^(#include)[ \t]"(.+\.h{1,2})"(.*)'),
('swig0', '<>', r'^(%import)[ \t]<(.+)>(.*)'),
('swig1', '<>', r'^(%include)[ \t]<(.+)>(.*)'),
('swig2', '""', r'^(%import)[ \t]"(.+)"(.*)'),
('swig3', '""', r'^(%include)[ \t]"(.+)"(.*)'),
)
# compile the regexes
includes_re = tuple((a, b, re.compile(c)) for a,b,c in includes_re)
def __init__(self):
self.reset()
def reset(self):
# clear all stored headers
self.includes = {}
for include_type,_,_ in self.includes_re:
self.includes[include_type] = []
def dump_block(self):
'''dump the includes'''
first = True
for include,_,_ in self.includes_re:
if not self.includes[include]:
continue
if not first:
# print a newline between groups of
# include types
yield ''
first = False
# print out the includes in the current group
# and sort them according to include_key()
prev = None
for l in sorted(self.includes[include],
key=include_key):
if l != prev:
yield l
prev = l
def __call__(self, lines, filename, language):
leading_blank = False
blanks = 0
block = False
for line in lines:
if not line:
blanks += 1
if not block:
# if we're not in an include block, spit out the
# newline otherwise, skip it since we're going to
# control newlines withinin include block
yield ''
continue
# Try to match each of the include types
for include_type,(ldelim,rdelim),include_re in self.includes_re:
match = include_re.match(line)
if not match:
continue
# if we've got a match, clean up the #include line,
# fix up stl headers and store it in the proper category
groups = match.groups()
keyword = groups[0]
include = groups[1]
extra = groups[-1]
if include_type == 'c' and language == 'C++':
stl_inc = cpp_c_headers.get(include, None)
if stl_inc:
include = stl_inc
include_type = 'stl'
line = keyword + ' ' + ldelim + include + rdelim + extra
self.includes[include_type].append(line)
# We've entered a block, don't keep track of blank
# lines while in a block
block = True
blanks = 0
break
else:
# this line did not match a #include
assert not include_re.match(line)
# if we're not in a block and we didn't match an include
# to enter a block, just emit the line and continue
if not block:
yield line
continue
# We've exited an include block.
for block_line in self.dump_block():
yield block_line
# if there are any newlines after the include block,
# emit a single newline (removing extras)
if blanks and block:
yield ''
blanks = 0
block = False
self.reset()
# emit the line that ended the block
yield line
if block:
# We've exited an include block.
for block_line in self.dump_block():
yield block_line
# default language types to try to apply our sorting rules to
default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
def options():
import optparse
options = optparse.OptionParser()
add_option = options.add_option
add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
default=','.join(default_dir_ignore),
help="ignore directories")
add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
default=','.join(default_file_ignore),
help="ignore files")
add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
default=','.join(default_languages),
help="languages")
add_option('-n', '--dry-run', action='store_true',
help="don't overwrite files")
return options
def parse_args(parser):
opts,args = parser.parse_args()
opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
opts.file_ignore = frozenset(opts.file_ignore.split(','))
opts.languages = frozenset(opts.languages.split(','))
return opts,args
if __name__ == '__main__':
parser = options()
opts, args = parse_args(parser)
for base in args:
for filename,language in find_files(base, languages=opts.languages,
file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
if opts.dry_run:
print "%s: %s" % (filename, language)
else:
update_file(filename, filename, language, SortIncludes())