util: python implementation of a routine that will sort includes
I didn't realize that the perl version existed when I started this, this version has a lot more features than the previous one since it will sort and separate python, system, and m5 headers in separate groups, it will remove duplicates, it will also convert c headers to stl headers
This commit is contained in:
parent
07815c3379
commit
e5ecfde222
3 changed files with 302 additions and 91 deletions
|
@ -87,3 +87,85 @@ def lang_type(filename, firstline=None, openok=True):
|
|||
|
||||
# sorry, we couldn't detect the language
|
||||
return None
|
||||
|
||||
# directories and files to ignore by default
|
||||
default_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext'))
|
||||
default_file_ignore = frozenset(('parsetab.py', ))
|
||||
|
||||
def find_files(base, languages=all_languages,
|
||||
dir_ignore=default_dir_ignore,
|
||||
file_ignore=default_file_ignore):
|
||||
'''find all files in a directory and its subdirectories based on a
|
||||
set of languages, ignore directories specified in dir_ignore and
|
||||
files specified in file_ignore'''
|
||||
if base[-1] != '/':
|
||||
base += '/'
|
||||
|
||||
def update_dirs(dirs):
|
||||
'''strip the ignored directories out of the provided list'''
|
||||
index = len(dirs) - 1
|
||||
for i,d in enumerate(reversed(dirs)):
|
||||
if d in dir_ignore:
|
||||
del dirs[index - i]
|
||||
|
||||
# walk over base
|
||||
for root,dirs,files in os.walk(base):
|
||||
root = root.replace(base, '', 1)
|
||||
|
||||
# strip ignored directories from the list
|
||||
update_dirs(dirs)
|
||||
|
||||
for filename in files:
|
||||
if filename in file_ignore:
|
||||
# skip ignored files
|
||||
continue
|
||||
|
||||
# try to figure out the language of the specified file
|
||||
fullpath = os.path.join(base, root, filename)
|
||||
language = lang_type(fullpath)
|
||||
|
||||
# if the file is one of the langauges that we want return
|
||||
# its name and the language
|
||||
if language in languages:
|
||||
yield fullpath, language
|
||||
|
||||
def update_file(dst, src, language, mutator):
|
||||
'''update a file of the specified language with the provided
|
||||
mutator generator. If inplace is provided, update the file in
|
||||
place and return the handle to the updated file. If inplace is
|
||||
false, write the updated file to cStringIO'''
|
||||
|
||||
# if the source and destination are the same, we're updating in place
|
||||
inplace = dst == src
|
||||
|
||||
if isinstance(src, str):
|
||||
# if a filename was provided, open the file
|
||||
mode = 'r+' if inplace else 'r'
|
||||
src = file(src, mode)
|
||||
|
||||
orig_lines = []
|
||||
|
||||
# grab all of the lines of the file and strip them of their line ending
|
||||
old_lines = list(line.rstrip('\r\n') for line in src.xreadlines())
|
||||
new_lines = list(mutator(old_lines, src.name, language))
|
||||
|
||||
for line in src.xreadlines():
|
||||
line = line
|
||||
|
||||
if inplace:
|
||||
# if we're updating in place and the file hasn't changed, do nothing
|
||||
if old_lines == new_lines:
|
||||
return
|
||||
|
||||
# otherwise, truncate the file and seek to the beginning.
|
||||
dst = src
|
||||
dst.truncate(0)
|
||||
dst.seek(0)
|
||||
elif isinstance(dst, str):
|
||||
# if we're not updating in place and a destination file name
|
||||
# was provided, create a file object
|
||||
dst = file(dst, 'w')
|
||||
|
||||
for line in new_lines:
|
||||
dst.write(line)
|
||||
dst.write('\n')
|
||||
|
|
|
@ -1,91 +0,0 @@
|
|||
#! /usr/bin/env perl
|
||||
# Copyright (c) 2003 The Regents of The University of Michigan
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Authors: Steve Reinhardt
|
||||
|
||||
open (FOO, "<$ARGV[0]") or die;
|
||||
|
||||
# Suck in everything before the first include
|
||||
# (line-by-line into @before list).
|
||||
while (($_ = <FOO>) && !/^#include/) {
|
||||
push @before, $_;
|
||||
}
|
||||
|
||||
#print join("", @before);
|
||||
#print "##########################\n";
|
||||
|
||||
# Suck in include lines into @includes list.
|
||||
# Skip blank lines (keep processing, but don't put in @includes).
|
||||
# End on first non-blank, non-include line.
|
||||
# Note that this means that files with comments or #ifdefs
|
||||
# interspersed among their #includes will only get the initial
|
||||
# set of #includes sorted.
|
||||
do {
|
||||
push @includes, $_ unless /^\s*$/;
|
||||
} while (($_ = <FOO>) && /^#include|^\s*$/);
|
||||
|
||||
# Now sort the includes. This simple ordering function
|
||||
# puts system includes first, followed by non-system includes.
|
||||
# Within each group the sort is alphabetical.
|
||||
# We may want something a little more sophisticated.
|
||||
# Personally, I'd like to see something like:
|
||||
# <sys/*.h> - header files from sys subdir
|
||||
# <*.h> - other system headers
|
||||
# <*> - STL headers
|
||||
# "base/*" - M5 base headers
|
||||
# "sim/*" - M5 sim headers
|
||||
# "*" - other M5 headers
|
||||
# ...but I didn't have the energy to code that up.
|
||||
sub sortorder {
|
||||
my $sysa = ($a =~ /<.*>/);
|
||||
my $sysb = ($b =~ /<.*>/);
|
||||
return -1 if ($sysa && !$sysb);
|
||||
return 1 if ($sysb && !$sysa);
|
||||
return $a cmp $b;
|
||||
}
|
||||
|
||||
@includes = sort sortorder @includes;
|
||||
#print join("", @includes);
|
||||
#print "##########################\n";
|
||||
|
||||
# Put everything after the includes in the @after list.
|
||||
do {
|
||||
push @after, $_;
|
||||
if (/^#include/) {
|
||||
print "$ARGV[0]: ";
|
||||
print $after[0];
|
||||
exit 0;
|
||||
}
|
||||
} while ($_ = <FOO>);
|
||||
|
||||
#print join("", @after);
|
||||
#print "##########################\n";
|
||||
|
||||
# Print out the file with sorted includes.
|
||||
|
||||
print join("", @before, @includes, @after);
|
||||
|
220
util/sort_includes.py
Normal file
220
util/sort_includes.py
Normal file
|
@ -0,0 +1,220 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from file_types import *
|
||||
|
||||
cpp_c_headers = {
|
||||
'assert.h' : 'cassert',
|
||||
'ctype.h' : 'cctype',
|
||||
'errno.h' : 'cerrno',
|
||||
'float.h' : 'cfloat',
|
||||
'limits.h' : 'climits',
|
||||
'locale.h' : 'clocale',
|
||||
'math.h' : 'cmath',
|
||||
'setjmp.h' : 'csetjmp',
|
||||
'signal.h' : 'csignal',
|
||||
'stdarg.h' : 'cstdarg',
|
||||
'stddef.h' : 'cstddef',
|
||||
'stdio.h' : 'cstdio',
|
||||
'stdlib.h' : 'cstdlib',
|
||||
'string.h' : 'cstring',
|
||||
'time.h' : 'ctime',
|
||||
'wchar.h' : 'cwchar',
|
||||
'wctype.h' : 'cwctype',
|
||||
}
|
||||
|
||||
include_re = re.compile(r'([#%])(include|import).*[<"](.*)[">]')
|
||||
def include_key(line):
|
||||
'''Mark directories with a leading space so directories
|
||||
are sorted before files'''
|
||||
|
||||
match = include_re.match(line)
|
||||
assert match, line
|
||||
keyword = match.group(2)
|
||||
include = match.group(3)
|
||||
|
||||
# Everything but the file part needs to have a space prepended
|
||||
parts = include.split('/')
|
||||
if len(parts) == 2 and parts[0] == 'dnet':
|
||||
# Don't sort the dnet includes with respect to each other, but
|
||||
# make them sorted with respect to non dnet includes. Python
|
||||
# guarantees that sorting is stable, so just clear the
|
||||
# basename part of the filename.
|
||||
parts[1] = ' '
|
||||
parts[0:-1] = [ ' ' + s for s in parts[0:-1] ]
|
||||
key = '/'.join(parts)
|
||||
|
||||
return key
|
||||
|
||||
class SortIncludes(object):
|
||||
# different types of includes for different sorting of headers
|
||||
# <Python.h> - Python header needs to be first if it exists
|
||||
# <*.h> - system headers (directories before files)
|
||||
# <*> - STL headers
|
||||
# <*.(hh|hxx|hpp|H)> - C++ Headers (directories before files)
|
||||
# "*" - M5 headers (directories before files)
|
||||
includes_re = (
|
||||
('python', '<>', r'^(#include)[ \t]+<(Python.*\.h)>(.*)'),
|
||||
('c', '<>', r'^(#include)[ \t]<(.+\.h)>(.*)'),
|
||||
('stl', '<>', r'^(#include)[ \t]+<([0-9A-z_]+)>(.*)'),
|
||||
('cc', '<>', r'^(#include)[ \t]+<([0-9A-z_]+\.(hh|hxx|hpp|H))>(.*)'),
|
||||
('m5cc', '""', r'^(#include)[ \t]"(.+\.h{1,2})"(.*)'),
|
||||
('swig0', '<>', r'^(%import)[ \t]<(.+)>(.*)'),
|
||||
('swig1', '<>', r'^(%include)[ \t]<(.+)>(.*)'),
|
||||
('swig2', '""', r'^(%import)[ \t]"(.+)"(.*)'),
|
||||
('swig3', '""', r'^(%include)[ \t]"(.+)"(.*)'),
|
||||
)
|
||||
|
||||
# compile the regexes
|
||||
includes_re = tuple((a, b, re.compile(c)) for a,b,c in includes_re)
|
||||
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
# clear all stored headers
|
||||
self.includes = {}
|
||||
for include_type,_,_ in self.includes_re:
|
||||
self.includes[include_type] = []
|
||||
|
||||
def dump_block(self):
|
||||
'''dump the includes'''
|
||||
first = True
|
||||
for include,_,_ in self.includes_re:
|
||||
if not self.includes[include]:
|
||||
continue
|
||||
|
||||
if not first:
|
||||
# print a newline between groups of
|
||||
# include types
|
||||
yield ''
|
||||
first = False
|
||||
|
||||
# print out the includes in the current group
|
||||
# and sort them according to include_key()
|
||||
prev = None
|
||||
for l in sorted(self.includes[include],
|
||||
key=include_key):
|
||||
if l != prev:
|
||||
yield l
|
||||
prev = l
|
||||
|
||||
def __call__(self, lines, filename, language):
|
||||
leading_blank = False
|
||||
blanks = 0
|
||||
block = False
|
||||
|
||||
for line in lines:
|
||||
if not line:
|
||||
blanks += 1
|
||||
if not block:
|
||||
# if we're not in an include block, spit out the
|
||||
# newline otherwise, skip it since we're going to
|
||||
# control newlines withinin include block
|
||||
yield ''
|
||||
continue
|
||||
|
||||
# Try to match each of the include types
|
||||
for include_type,(ldelim,rdelim),include_re in self.includes_re:
|
||||
match = include_re.match(line)
|
||||
if not match:
|
||||
continue
|
||||
|
||||
# if we've got a match, clean up the #include line,
|
||||
# fix up stl headers and store it in the proper category
|
||||
groups = match.groups()
|
||||
keyword = groups[0]
|
||||
include = groups[1]
|
||||
extra = groups[-1]
|
||||
if include_type == 'c' and language == 'C++':
|
||||
stl_inc = cpp_c_headers.get(include, None)
|
||||
if stl_inc:
|
||||
include = stl_inc
|
||||
include_type = 'stl'
|
||||
|
||||
line = keyword + ' ' + ldelim + include + rdelim + extra
|
||||
|
||||
self.includes[include_type].append(line)
|
||||
|
||||
# We've entered a block, don't keep track of blank
|
||||
# lines while in a block
|
||||
block = True
|
||||
blanks = 0
|
||||
break
|
||||
else:
|
||||
# this line did not match a #include
|
||||
assert not include_re.match(line)
|
||||
|
||||
# if we're not in a block and we didn't match an include
|
||||
# to enter a block, just emit the line and continue
|
||||
if not block:
|
||||
yield line
|
||||
continue
|
||||
|
||||
# We've exited an include block.
|
||||
for block_line in self.dump_block():
|
||||
yield block_line
|
||||
|
||||
# if there are any newlines after the include block,
|
||||
# emit a single newline (removing extras)
|
||||
if blanks and block:
|
||||
yield ''
|
||||
|
||||
blanks = 0
|
||||
block = False
|
||||
self.reset()
|
||||
|
||||
# emit the line that ended the block
|
||||
yield line
|
||||
|
||||
if block:
|
||||
# We've exited an include block.
|
||||
for block_line in self.dump_block():
|
||||
yield block_line
|
||||
|
||||
|
||||
|
||||
# default language types to try to apply our sorting rules to
|
||||
default_languages = frozenset(('C', 'C++', 'isa', 'python', 'scons', 'swig'))
|
||||
|
||||
def options():
|
||||
import optparse
|
||||
options = optparse.OptionParser()
|
||||
add_option = options.add_option
|
||||
add_option('-d', '--dir_ignore', metavar="DIR[,DIR]", type='string',
|
||||
default=','.join(default_dir_ignore),
|
||||
help="ignore directories")
|
||||
add_option('-f', '--file_ignore', metavar="FILE[,FILE]", type='string',
|
||||
default=','.join(default_file_ignore),
|
||||
help="ignore files")
|
||||
add_option('-l', '--languages', metavar="LANG[,LANG]", type='string',
|
||||
default=','.join(default_languages),
|
||||
help="languages")
|
||||
add_option('-n', '--dry-run', action='store_true',
|
||||
help="don't overwrite files")
|
||||
|
||||
return options
|
||||
|
||||
def parse_args(parser):
|
||||
opts,args = parser.parse_args()
|
||||
|
||||
opts.dir_ignore = frozenset(opts.dir_ignore.split(','))
|
||||
opts.file_ignore = frozenset(opts.file_ignore.split(','))
|
||||
opts.languages = frozenset(opts.languages.split(','))
|
||||
|
||||
return opts,args
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = options()
|
||||
opts, args = parse_args(parser)
|
||||
|
||||
for base in args:
|
||||
for filename,language in find_files(base, languages=opts.languages,
|
||||
file_ignore=opts.file_ignore, dir_ignore=opts.dir_ignore):
|
||||
if opts.dry_run:
|
||||
print "%s: %s" % (filename, language)
|
||||
else:
|
||||
update_file(filename, filename, language, SortIncludes())
|
Loading…
Reference in a new issue