style: Refactor the style checker as a Python package

Refactor the style checker into a Python module that can be reused by command line tools that integrate with git. In particular: * Create a style package in util * Move style validators from style.py to the style/validators.py. * Move style verifiers from style.py to the style/verifiers.py. * Move utility functions (sort_includes, region handling, file_types) into the style package * Move generic code from style.py to style/style.py. Signed-off-by: Andreas Sandberg <andreas.sandberg@arm.com> Reviewed-by: Curtis Dunham <curtis.dunham@arm.com> Reviewed-by: Steve Reinhardt <steve.reinhardt@amd.com> --HG-- rename : util/style.py => util/hgstyle.py rename : util/sort_includes.py => util/style/sort_includes.py extra : rebase_source : ad6cf9b9a18c48350dfc7b7c77bea6c5344fb53c
2016-03-30 15:30:32 +01:00 · 2016-03-30 15:30:32 +01:00 · 2580fcd9d7
commit 2580fcd9d7
parent 062b6c4c9d
10 changed files with 988 additions and 644 deletions
--- a/src/python/m5/util/init.py
+++ b/src/python/m5/util/init.py
@ -40,7 +40,6 @@ from multidict import multidict
 from orderdict import orderdict
 from smartdict import SmartDict
 from sorteddict import SortedDict
-from region import neg_inf, pos_inf, Region, Regions

 # panic() should be called when something happens that should never
 # ever happen regardless of what the user does (i.e., an acutal m5
--- a/util/file_types.py
+++ b/util/file_types.py
@ -26,179 +26,3 @@
 #
 # Authors: Nathan Binkert

-import os
-
-# lanuage type for each file extension
-lang_types = {
-    '.c'     : "C",
-    '.cl'    : "C",
-    '.h'     : "C",
-    '.cc'    : "C++",
-    '.hh'    : "C++",
-    '.cxx'   : "C++",
-    '.hxx'   : "C++",
-    '.cpp'   : "C++",
-    '.hpp'   : "C++",
-    '.C'     : "C++",
-    '.H'     : "C++",
-    '.i'     : "swig",
-    '.py'    : "python",
-    '.pl'    : "perl",
-    '.pm'    : "perl",
-    '.s'     : "asm",
-    '.S'     : "asm",
-    '.l'     : "lex",
-    '.ll'    : "lex",
-    '.y'     : "yacc",
-    '.yy'    : "yacc",
-    '.isa'   : "isa",
-    '.sh'    : "shell",
-    '.slicc' : "slicc",
-    '.sm'    : "slicc",
-    '.awk'   : "awk",
-    '.el'    : "lisp",
-    '.txt'   : "text",
-    '.tex'   : "tex",
-    '.mk'    : "make",
-    }
-
-# languages based on file prefix
-lang_prefixes = (
-    ('SCons',    'scons'),
-    ('Make',     'make'),
-    ('make',     'make'),
-    ('Doxyfile', 'doxygen'),
-    )
-
-# languages based on #! line of first file
-hash_bang = (
-    ('python', 'python'),
-    ('perl',   'perl'),
-    ('sh',     'shell'),
-    )
-
-# the list of all languages that we detect
-all_languages = frozenset(lang_types.itervalues())
-all_languages |= frozenset(lang for start,lang in lang_prefixes)
-all_languages |= frozenset(lang for start,lang in hash_bang)
-
-def lang_type(filename, firstline=None, openok=True):
-    '''identify the language of a given filename and potentially the
-    firstline of the file.  If the firstline of the file is not
-    provided and openok is True, open the file and read the first line
-    if necessary'''
-
-    basename = os.path.basename(filename)
-    name,extension = os.path.splitext(basename)
-
-    # first try to detect language based on file extension
-    try:
-        return lang_types[extension]
-    except KeyError:
-        pass
-
-    # now try to detect language based on file prefix
-    for start,lang in lang_prefixes:
-        if basename.startswith(start):
-            return lang
-
-    # if a first line was not provided but the file is ok to open,
-    # grab the first line of the file.
-    if firstline is None and openok:
-        handle = file(filename, 'r')
-        firstline = handle.readline()
-        handle.close()
-
-    # try to detect language based on #! in first line
-    if firstline and firstline.startswith('#!'):
-        for string,lang in hash_bang:
-            if firstline.find(string) > 0:
-                return lang
-
-    # sorry, we couldn't detect the language
-    return None
-
-# directories and files to ignore by default
-default_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext'))
-default_file_ignore = frozenset(('parsetab.py', ))
-
-def find_files(base, languages=all_languages,
-               dir_ignore=default_dir_ignore,
-               file_ignore=default_file_ignore):
-    '''find all files in a directory and its subdirectories based on a
-    set of languages, ignore directories specified in dir_ignore and
-    files specified in file_ignore'''
-    if base[-1] != '/':
-        base += '/'
-
-    def update_dirs(dirs):
-        '''strip the ignored directories out of the provided list'''
-        index = len(dirs) - 1
-        for i,d in enumerate(reversed(dirs)):
-            if d in dir_ignore:
-                del dirs[index - i]
-
-    # walk over base
-    for root,dirs,files in os.walk(base):
-        root = root.replace(base, '', 1)
-
-        # strip ignored directories from the list
-        update_dirs(dirs)
-
-        for filename in files:
-            if filename in file_ignore:
-                # skip ignored files
-                continue
-
-            # try to figure out the language of the specified file
-            fullpath = os.path.join(base, root, filename)
-            language = lang_type(fullpath)
-
-            # if the file is one of the langauges that we want return
-            # its name and the language
-            if language in languages:
-                yield fullpath, language
-
-def update_file(dst, src, language, mutator):
-    '''update a file of the specified language with the provided
-    mutator generator.  If inplace is provided, update the file in
-    place and return the handle to the updated file.  If inplace is
-    false, write the updated file to cStringIO'''
-
-    # if the source and destination are the same, we're updating in place
-    inplace = dst == src
-
-    if isinstance(src, str):
-        # if a filename was provided, open the file
-        if inplace:
-            mode = 'r+'
-        else:
-            mode = 'r'
-        src = file(src, mode)
-
-    orig_lines = []
-
-    # grab all of the lines of the file and strip them of their line ending
-    old_lines = list(line.rstrip('\r\n') for line in src.xreadlines())
-    new_lines = list(mutator(old_lines, src.name, language))
-
-    for line in src.xreadlines():
-        line = line
-
-    if inplace:
-        # if we're updating in place and the file hasn't changed, do nothing
-        if old_lines == new_lines:
-            return
-
-        # otherwise, truncate the file and seek to the beginning.
-        dst = src
-        dst.truncate(0)
-        dst.seek(0)
-    elif isinstance(dst, str):
-        # if we're not updating in place and a destination file name
-        # was provided, create a file object
-        dst = file(dst, 'w')
-
-    for line in new_lines:
-        dst.write(line)
-        dst.write('\n')
--- a/util/hgstyle.py
+++ b/util/hgstyle.py
@ -42,62 +42,20 @@
 # Authors: Nathan Binkert
 #          Steve Reinhardt

-import heapq
-import os
-import re
 import sys
+import os
+from os.path import join as joinpath

-from os.path import dirname, join as joinpath
-from itertools import count
-from mercurial import bdiff, mdiff, commands
-
-current_dir = dirname(__file__)
+current_dir = os.path.dirname(__file__)
 sys.path.insert(0, current_dir)
-sys.path.insert(1, joinpath(dirname(current_dir), 'src', 'python'))

-from m5.util import neg_inf, pos_inf, Region, Regions
-import sort_includes
-from file_types import lang_type
-
-all_regions = Regions(Region(neg_inf, pos_inf))
-
-tabsize = 8
-lead = re.compile(r'^([ \t]+)')
-trail = re.compile(r'([ \t]+)$')
-any_control = re.compile(r'\b(if|while|for)([ \t]*)\(')
-
-format_types = set(('C', 'C++'))
-
-
-def re_ignore(expr):
-    """Helper function to create regular expression ignore file
-    matcher functions"""
-
-    rex = re.compile(expr)
-    def match_re(fname):
-        return rex.match(fname)
-    return match_re
-
-# This list contains a list of functions that are called to determine
-# if a file should be excluded from the style matching rules or
-# not. The functions are called with the file name relative to the
-# repository root (without a leading slash) as their argument. A file
-# is excluded if any function in the list returns true.
-style_ignores = [
-    # Ignore external projects as they are unlikely to follow the gem5
-    # coding convention.
-    re_ignore("^ext/"),
-]
-
-def check_ignores(fname):
-    """Check if a file name matches any of the ignore rules"""
-
-    for rule in style_ignores:
-        if rule(fname):
-            return True
-
-    return False
+from style.verifiers import all_verifiers
+from style.validators import all_validators
+from style.file_types import lang_type
+from style.style import MercurialUI, check_ignores
+from style.region import *

+from mercurial import bdiff, mdiff, commands

 def modified_regions(old_data, new_data):
    regions = Regions()
@ -126,375 +84,12 @@ def modregions(wctx, fname):

    return mod_regions

-class UserInterface(object):
-    def __init__(self, verbose=False):
-        self.verbose = verbose

-    def prompt(self, prompt, results, default):
-        while True:
-            result = self.do_prompt(prompt, results, default)
-            if result in results:
-                return result
-
-class MercurialUI(UserInterface):
-    def __init__(self, ui, *args, **kwargs):
-        super(MercurialUI, self).__init__(*args, **kwargs)
-        self.ui = ui
-
-    def do_prompt(self, prompt, results, default):
-        return self.ui.prompt(prompt, default=default)
-
-    def write(self, string):
-        self.ui.write(string)
-
-class StdioUI(UserInterface):
-    def do_prompt(self, prompt, results, default):
-        return raw_input(prompt) or default
-
-    def write(self, string):
-        sys.stdout.write(string)
-
-
-class Verifier(object):
-    """Base class for style verifier objects
-
-    Subclasses must define these class attributes:
-      languages = set of strings identifying applicable languages
-      test_name = long descriptive name of test, will be used in
-                  messages such as "error in <foo>" or "invalid <foo>"
-      opt_name = short name used to generate command-line options to
-                 control the test (--fix-<foo>, --ignore-<foo>, etc.)
-    """
-
-    def __init__(self, ui, repo, opts):
-        self.ui = ui
-        self.repo = repo
-        # opt_name must be defined as a class attribute of derived classes.
-        # Check test-specific opts first as these have precedence.
-        self.opt_fix = opts.get('fix_' + self.opt_name, False)
-        self.opt_ignore = opts.get('ignore_' + self.opt_name, False)
-        self.opt_skip = opts.get('skip_' + self.opt_name, False)
-        # If no test-specific opts were set, then set based on "-all" opts.
-        if not (self.opt_fix or self.opt_ignore or self.opt_skip):
-            self.opt_fix = opts.get('fix_all', False)
-            self.opt_ignore = opts.get('ignore_all', False)
-            self.opt_skip = opts.get('skip_all', False)
-
-    def __getattr__(self, attr):
-        if attr in ('prompt', 'write'):
-            return getattr(self.ui, attr)
-
-        if attr == 'wctx':
-            try:
-                wctx = repo.workingctx()
-            except:
-                from mercurial import context
-                wctx = context.workingctx(repo)
-            self.wctx = wctx
-            return wctx
-
-        raise AttributeError
-
-    def open(self, filename, mode):
-        filename = self.repo.wjoin(filename)
-
-        try:
-            f = file(filename, mode)
-        except OSError, msg:
-            print 'could not open file %s: %s' % (filename, msg)
-            return None
-
-        return f
-
-    def skip(self, filename):
-        filename = self.repo.wjoin(filename)
-
-        # We never want to handle symlinks, so always skip them: If the location
-        # pointed to is a directory, skip it. If the location is a file inside
-        # the gem5 directory, it will be checked as a file, so symlink can be
-        # skipped. If the location is a file outside gem5, we don't want to
-        # check it anyway.
-        if os.path.islink(filename):
-            return True
-        return lang_type(filename) not in self.languages
-
-    def check(self, filename, regions=all_regions):
-        """Check specified regions of file 'filename'.
-
-        Line-by-line checks can simply provide a check_line() method
-        that returns True if the line is OK and False if it has an
-        error.  Verifiers that need a multi-line view (like
-        SortedIncludes) must override this entire function.
-
-        Returns a count of errors (0 if none), though actual non-zero
-        count value is not currently used anywhere.
-        """
-
-        f = self.open(filename, 'r')
-
-        errors = 0
-        for num,line in enumerate(f):
-            if num not in regions:
-                continue
-            line = line.rstrip('\n')
-            if not self.check_line(line):
-                self.write("invalid %s in %s:%d\n" % \
-                           (self.test_name, filename, num + 1))
-                if self.ui.verbose:
-                    self.write(">>%s<<\n" % line[:-1])
-                errors += 1
-        return errors
-
-    def fix(self, filename, regions=all_regions):
-        """Fix specified regions of file 'filename'.
-
-        Line-by-line fixes can simply provide a fix_line() method that
-        returns the fixed line. Verifiers that need a multi-line view
-        (like SortedIncludes) must override this entire function.
-        """
-
-        f = self.open(filename, 'r+')
-
-        lines = list(f)
-
-        f.seek(0)
-        f.truncate()
-
-        for i,line in enumerate(lines):
-            if i in regions:
-                line = self.fix_line(line)
-
-            f.write(line)
-        f.close()
-
-
-    def apply(self, filename, regions=all_regions):
-        """Possibly apply to specified regions of file 'filename'.
-
-        Verifier is skipped if --skip-<test> option was provided or if
-        file is not of an applicable type.  Otherwise file is checked
-        and error messages printed.  Errors are fixed or ignored if
-        the corresponding --fix-<test> or --ignore-<test> options were
-        provided.  If neither, the user is prompted for an action.
-
-        Returns True to abort, False otherwise.
-        """
-        if not (self.opt_skip or self.skip(filename)):
-            errors = self.check(filename, regions)
-            if errors and not self.opt_ignore:
-                if self.opt_fix:
-                    self.fix(filename, regions)
-                else:
-                    result = self.ui.prompt("(a)bort, (i)gnore, or (f)ix?",
-                                            'aif', 'a')
-                    if result == 'f':
-                        self.fix(filename, regions)
-                    elif result == 'a':
-                        return True # abort
-
-        return False
-
-
-class Whitespace(Verifier):
-    """Check whitespace.
-
-    Specifically:
-    - No tabs used for indent
-    - No trailing whitespace
-    """
-
-    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
-    test_name = 'whitespace'
-    opt_name = 'white'
-
-    def check_line(self, line):
-        match = lead.search(line)
-        if match and match.group(1).find('\t') != -1:
-            return False
-
-        match = trail.search(line)
-        if match:
-            return False
-
-        return True
-
-    def fix_line(self, line):
-        if lead.search(line):
-            newline = ''
-            for i,c in enumerate(line):
-                if c == ' ':
-                    newline += ' '
-                elif c == '\t':
-                    newline += ' ' * (tabsize - len(newline) % tabsize)
-                else:
-                    newline += line[i:]
-                    break
-
-            line = newline
-
-        return line.rstrip() + '\n'
-
-
-class ControlSpace(Verifier):
-    """Check for exactly one space after if/while/for"""
-
-    languages = set(('C', 'C++'))
-    test_name = 'spacing after if/while/for'
-    opt_name = 'control'
-
-    def check_line(self, line):
-        match = any_control.search(line)
-        return not (match and match.group(2) != " ")
-
-    def fix_line(self, line):
-        new_line = any_control.sub(r'\1 (', line)
-        return new_line
-
-
-class SortedIncludes(Verifier):
-    """Check for proper sorting of include statements"""
-
-    languages = sort_includes.default_languages
-    test_name = 'include file order'
-    opt_name = 'include'
-
-    def __init__(self, *args, **kwargs):
-        super(SortedIncludes, self).__init__(*args, **kwargs)
-        self.sort_includes = sort_includes.SortIncludes()
-
-    def check(self, filename, regions=all_regions):
-        f = self.open(filename, 'r')
-
-        lines = [ l.rstrip('\n') for l in f.xreadlines() ]
-        old = ''.join(line + '\n' for line in lines)
-        f.close()
-
-        if len(lines) == 0:
-            return 0
-
-        language = lang_type(filename, lines[0])
-        sort_lines = list(self.sort_includes(lines, filename, language))
-        new = ''.join(line + '\n' for line in sort_lines)
-
-        mod = modified_regions(old, new)
-        modified = mod & regions
-
-        if modified:
-            self.write("invalid sorting of includes in %s\n" % (filename))
-            if self.ui.verbose:
-                for start, end in modified.regions:
-                    self.write("bad region [%d, %d)\n" % (start, end))
-            return 1
-
-        return 0
-
-    def fix(self, filename, regions=all_regions):
-        f = self.open(filename, 'r+')
-
-        old = f.readlines()
-        lines = [ l.rstrip('\n') for l in old ]
-        language = lang_type(filename, lines[0])
-        sort_lines = list(self.sort_includes(lines, filename, language))
-        new = ''.join(line + '\n' for line in sort_lines)
-
-        f.seek(0)
-        f.truncate()
-
-        for i,line in enumerate(sort_lines):
-            f.write(line)
-            f.write('\n')
-        f.close()
-
-
-def linelen(line):
-    tabs = line.count('\t')
-    if not tabs:
-        return len(line)
-
-    count = 0
-    for c in line:
-        if c == '\t':
-            count += tabsize - count % tabsize
-        else:
-            count += 1
-
-    return count
-
-class LineLength(Verifier):
-    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
-    test_name = 'line length'
-    opt_name = 'length'
-
-    def check_line(self, line):
-        return linelen(line) <= 78
-
-    def fix(self, filename, regions=all_regions):
-        self.write("Warning: cannot automatically fix overly long lines.\n")
-
-
-class BoolCompare(Verifier):
-    languages = set(('C', 'C++', 'python'))
-    test_name = 'boolean comparison'
-    opt_name = 'boolcomp'
-
-    regex = re.compile(r'\s*==\s*([Tt]rue|[Ff]alse)\b')
-
-    def check_line(self, line):
-        return self.regex.search(line) == None
-
-    def fix_line(self, line):
-        match = self.regex.search(line)
-        if match:
-            if match.group(1) in ('true', 'True'):
-                line = self.regex.sub('', line)
-            else:
-                self.write("Warning: cannot automatically fix "
-                           "comparisons with false/False.\n")
-        return line
-
-
-# list of all verifier classes
-all_verifiers = [
-    Whitespace,
-    ControlSpace,
-    LineLength,
-    BoolCompare,
-    SortedIncludes
-]
-
-class ValidationStats(object):
-    def __init__(self):
-        self.toolong = 0
-        self.toolong80 = 0
-        self.leadtabs = 0
-        self.trailwhite = 0
-        self.badcontrol = 0
-        self.cret = 0
-
-    def dump(self):
-        print '''\
-%d violations of lines over 79 chars. %d of which are 80 chars exactly.
-%d cases of whitespace at the end of a line.
-%d cases of tabs to indent.
-%d bad parens after if/while/for.
-%d carriage returns found.
-''' % (self.toolong, self.toolong80, self.trailwhite, self.leadtabs,
-       self.badcontrol, self.cret)
-
-    def __nonzero__(self):
-        return self.toolong or self.toolong80 or self.leadtabs or \
-               self.trailwhite or self.badcontrol or self.cret
-
-def validate(filename, stats, verbose, exit_code):
+def validate(filename, verbose, exit_code):
    lang = lang_type(filename)
-    if lang not in format_types:
+    if lang not in ('C', 'C++'):
        return

-    def msg(lineno, line, message):
-        print '%s:%d>' % (filename, lineno + 1), message
-        if verbose > 2:
-            print line
-
    def bad():
        if exit_code is not None:
            sys.exit(exit_code)
@ -505,51 +100,18 @@ def validate(filename, stats, verbose, exit_code):
        if verbose > 0:
            print 'could not open file %s' % filename
        bad()
-        return
+        return None
+
+    vals = [ v(filename, verbose=(verbose > 1), language=lang)
+             for v in all_validators ]

    for i, line in enumerate(f):
        line = line.rstrip('\n')
+        for v in vals:
+            v.validate_line(i, line)

-        # no carriage returns
-        if line.find('\r') != -1:
-            self.cret += 1
-            if verbose > 1:
-                msg(i, line, 'carriage return found')
-            bad()

-        # lines max out at 79 chars
-        llen = linelen(line)
-        if llen > 79:
-            stats.toolong += 1
-            if llen == 80:
-                stats.toolong80 += 1
-            if verbose > 1:
-                msg(i, line, 'line too long (%d chars)' % llen)
-            bad()
-
-        # no tabs used to indent
-        match = lead.search(line)
-        if match and match.group(1).find('\t') != -1:
-            stats.leadtabs += 1
-            if verbose > 1:
-                msg(i, line, 'using tabs to indent')
-            bad()
-
-        # no trailing whitespace
-        if trail.search(line):
-            stats.trailwhite +=1
-            if verbose > 1:
-                msg(i, line, 'trailing whitespace')
-            bad()
-
-        # for c++, exactly one space betwen if/while/for and (
-        if lang == 'C++':
-            match = any_control.search(line)
-            if match and match.group(2) != " ":
-                stats.badcontrol += 1
-                if verbose > 1:
-                    msg(i, line, 'improper spacing after %s' % match.group(1))
-                bad()
+    return vals


 def _modified_regions(repo, patterns, **kwargs):
@ -627,11 +189,11 @@ def do_check_style(hgui, repo, *pats, **opts):
    ui = MercurialUI(hgui, verbose=hgui.verbose)

    # instantiate varifier objects
-    verifiers = [v(ui, repo, opts) for v in all_verifiers]
+    verifiers = [v(ui, opts, base=repo.root) for v in all_verifiers]

    for fname, mod_regions in _modified_regions(repo, pats, **opts):
        for verifier in verifiers:
-            if verifier.apply(fname, mod_regions):
+            if verifier.apply(joinpath(repo.root, fname), mod_regions):
                return True

    return False
@ -653,11 +215,13 @@ def do_check_format(hgui, repo, *pats, **opts):

    verbose = 0
    for fname, mod_regions in _modified_regions(repo, pats, **opts):
-        stats = ValidationStats()
-        validate(joinpath(repo.root, fname), stats, verbose, None)
-        if stats:
+        vals = validate(joinpath(repo.root, fname), verbose, None)
+        if vals is None:
+            return True
+        elif any([not v for v in vals]):
            print "%s:" % fname
-            stats.dump()
+            for v in vals:
+                v.dump()
            result = ui.prompt("invalid formatting\n(i)gnore or (a)bort?",
                               'ai', 'a')
            if result == 'a':
@ -744,9 +308,10 @@ if __name__ == '__main__':

    args = parser.parse_args()

-    stats = ValidationStats()
    for filename in args.file:
-        validate(filename, stats=stats, verbose=args.verbose, exit_code=1)
+        vals = validate(filename, verbose=args.verbose,
+                        exit_code=1)

-        if args.verbose > 0:
-            stats.dump()
+        if args.verbose > 0 and vals is not None:
+            for v in vals:
+                v.dump()
--- a/util/style/init.py
+++ b/util/style/init.py
@ -0,0 +1,38 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 ARM Limited
+# All rights reserved
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Andreas Sandberg
--- a/util/style/file_types.py
+++ b/util/style/file_types.py
@ -0,0 +1,176 @@
+import os
+
+# lanuage type for each file extension
+lang_types = {
+    '.c'     : "C",
+    '.cl'    : "C",
+    '.h'     : "C",
+    '.cc'    : "C++",
+    '.hh'    : "C++",
+    '.cxx'   : "C++",
+    '.hxx'   : "C++",
+    '.cpp'   : "C++",
+    '.hpp'   : "C++",
+    '.C'     : "C++",
+    '.H'     : "C++",
+    '.i'     : "swig",
+    '.py'    : "python",
+    '.pl'    : "perl",
+    '.pm'    : "perl",
+    '.s'     : "asm",
+    '.S'     : "asm",
+    '.l'     : "lex",
+    '.ll'    : "lex",
+    '.y'     : "yacc",
+    '.yy'    : "yacc",
+    '.isa'   : "isa",
+    '.sh'    : "shell",
+    '.slicc' : "slicc",
+    '.sm'    : "slicc",
+    '.awk'   : "awk",
+    '.el'    : "lisp",
+    '.txt'   : "text",
+    '.tex'   : "tex",
+    '.mk'    : "make",
+    }
+
+# languages based on file prefix
+lang_prefixes = (
+    ('SCons',    'scons'),
+    ('Make',     'make'),
+    ('make',     'make'),
+    ('Doxyfile', 'doxygen'),
+    )
+
+# languages based on #! line of first file
+hash_bang = (
+    ('python', 'python'),
+    ('perl',   'perl'),
+    ('sh',     'shell'),
+    )
+
+# the list of all languages that we detect
+all_languages = frozenset(lang_types.itervalues())
+all_languages |= frozenset(lang for start,lang in lang_prefixes)
+all_languages |= frozenset(lang for start,lang in hash_bang)
+
+def lang_type(filename, firstline=None, openok=True):
+    '''identify the language of a given filename and potentially the
+    firstline of the file.  If the firstline of the file is not
+    provided and openok is True, open the file and read the first line
+    if necessary'''
+
+    basename = os.path.basename(filename)
+    name,extension = os.path.splitext(basename)
+
+    # first try to detect language based on file extension
+    try:
+        return lang_types[extension]
+    except KeyError:
+        pass
+
+    # now try to detect language based on file prefix
+    for start,lang in lang_prefixes:
+        if basename.startswith(start):
+            return lang
+
+    # if a first line was not provided but the file is ok to open,
+    # grab the first line of the file.
+    if firstline is None and openok:
+        handle = file(filename, 'r')
+        firstline = handle.readline()
+        handle.close()
+
+    # try to detect language based on #! in first line
+    if firstline and firstline.startswith('#!'):
+        for string,lang in hash_bang:
+            if firstline.find(string) > 0:
+                return lang
+
+    # sorry, we couldn't detect the language
+    return None
+
+# directories and files to ignore by default
+default_dir_ignore = frozenset(('.hg', '.svn', 'build', 'ext'))
+default_file_ignore = frozenset(('parsetab.py', ))
+
+def find_files(base, languages=all_languages,
+               dir_ignore=default_dir_ignore,
+               file_ignore=default_file_ignore):
+    '''find all files in a directory and its subdirectories based on a
+    set of languages, ignore directories specified in dir_ignore and
+    files specified in file_ignore'''
+    if base[-1] != '/':
+        base += '/'
+
+    def update_dirs(dirs):
+        '''strip the ignored directories out of the provided list'''
+        index = len(dirs) - 1
+        for i,d in enumerate(reversed(dirs)):
+            if d in dir_ignore:
+                del dirs[index - i]
+
+    # walk over base
+    for root,dirs,files in os.walk(base):
+        root = root.replace(base, '', 1)
+
+        # strip ignored directories from the list
+        update_dirs(dirs)
+
+        for filename in files:
+            if filename in file_ignore:
+                # skip ignored files
+                continue
+
+            # try to figure out the language of the specified file
+            fullpath = os.path.join(base, root, filename)
+            language = lang_type(fullpath)
+
+            # if the file is one of the langauges that we want return
+            # its name and the language
+            if language in languages:
+                yield fullpath, language
+
+def update_file(dst, src, language, mutator):
+    '''update a file of the specified language with the provided
+    mutator generator.  If inplace is provided, update the file in
+    place and return the handle to the updated file.  If inplace is
+    false, write the updated file to cStringIO'''
+
+    # if the source and destination are the same, we're updating in place
+    inplace = dst == src
+
+    if isinstance(src, str):
+        # if a filename was provided, open the file
+        if inplace:
+            mode = 'r+'
+        else:
+            mode = 'r'
+        src = file(src, mode)
+
+    orig_lines = []
+
+    # grab all of the lines of the file and strip them of their line ending
+    old_lines = list(line.rstrip('\r\n') for line in src.xreadlines())
+    new_lines = list(mutator(old_lines, src.name, language))
+
+    for line in src.xreadlines():
+        line = line
+
+    if inplace:
+        # if we're updating in place and the file hasn't changed, do nothing
+        if old_lines == new_lines:
+            return
+
+        # otherwise, truncate the file and seek to the beginning.
+        dst = src
+        dst.truncate(0)
+        dst.seek(0)
+    elif isinstance(dst, str):
+        # if we're not updating in place and a destination file name
+        # was provided, create a file object
+        dst = file(dst, 'w')
+
+    for line in new_lines:
+        dst.write(line)
+        dst.write('\n')
--- a/src/python/m5/util/region.py
+++ b/src/python/m5/util/region.py
@ -230,6 +230,8 @@ class Regions(object):
    def __repr__(self):
        return 'Regions(%s)' % ([(r[0], r[1]) for r in self.regions], )

+all_regions = Regions(Region(neg_inf, pos_inf))
+
 if __name__ == '__main__':
    x = Regions(*((i, i + 1) for i in xrange(0,30,2)))
    y = Regions(*((i, i + 4) for i in xrange(0,30,5)))
--- a/util/style/sort_includes.py
+++ b/util/style/sort_includes.py
--- a/util/style/style.py
+++ b/util/style/style.py
@ -0,0 +1,149 @@
+#! /usr/bin/env python
+# Copyright (c) 2014, 2016 ARM Limited
+# All rights reserved
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Copyright (c) 2006 The Regents of The University of Michigan
+# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
+# Copyright (c) 2016 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+#          Steve Reinhardt
+#          Andreas Sandberg
+
+from abc import ABCMeta, abstractmethod
+import difflib
+import re
+import sys
+
+from region import *
+
+tabsize = 8
+lead = re.compile(r'^([ \t]+)')
+trail = re.compile(r'([ \t]+)$')
+any_control = re.compile(r'\b(if|while|for)([ \t]*)\(')
+
+
+class UserInterface(object):
+    __metaclass__ = ABCMeta
+
+    def __init__(self, verbose=False):
+        self.verbose = verbose
+
+    def prompt(self, prompt, results, default):
+        while True:
+            result = self._prompt(prompt, results, default)
+            if result in results:
+                return result
+
+    @abstractmethod
+    def _prompt(self, prompt, results, default):
+        pass
+
+    @abstractmethod
+    def write(self, string):
+        pass
+
+class StdioUI(UserInterface):
+    def _prompt(self, prompt, results, default):
+        return raw_input(prompt) or default
+
+    def write(self, string):
+        sys.stdout.write(string)
+
+class MercurialUI(UserInterface):
+    def __init__(self, ui, *args, **kwargs):
+        super(MercurialUI, self).__init__(*args, **kwargs)
+        self.hg_ui = ui
+
+    def _prompt(self, prompt, results, default):
+        return self.hg_ui.prompt(prompt, default=default)
+
+    def write(self, string):
+        self.hg_ui.write(string)
+
+
+def _re_ignore(expr):
+    """Helper function to create regular expression ignore file
+    matcher functions"""
+
+    rex = re.compile(expr)
+    def match_re(fname):
+        return rex.match(fname)
+    return match_re
+
+# This list contains a list of functions that are called to determine
+# if a file should be excluded from the style matching rules or
+# not. The functions are called with the file name relative to the
+# repository root (without a leading slash) as their argument. A file
+# is excluded if any function in the list returns true.
+style_ignores = [
+    # Ignore external projects as they are unlikely to follow the gem5
+    # coding convention.
+    _re_ignore("^ext/"),
+]
+
+def check_ignores(fname):
+    """Check if a file name matches any of the ignore rules"""
+
+    for rule in style_ignores:
+        if rule(fname):
+            return True
+
+    return False
+
+
+def normalized_len(line):
+    """Return a normalized line length with expanded tabs"""
+
+    count = 0
+    for c in line:
+        if c == '\t':
+            count += tabsize - count % tabsize
+        else:
+            count += 1
+
+    return count
+
+def modified_regions(old, new, context=0):
+    regions = Regions()
+    m = difflib.SequenceMatcher(a=old, b=new, autojunk=False)
+    for group in m.get_grouped_opcodes(context):
+        first = group[0]
+        last = group[-1]
+
+        regions.extend(Region(first[3], last[4] + 1))
+
+    return regions
--- a/util/style/validators.py
+++ b/util/style/validators.py
@ -0,0 +1,212 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2014, 2016 ARM Limited
+# All rights reserved
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Copyright (c) 2006 The Regents of The University of Michigan
+# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
+# Copyright (c) 2016 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+#          Steve Reinhardt
+#          Andreas Sandberg
+
+from abc import ABCMeta, abstractmethod
+import inspect
+import re
+import sys
+
+import style
+
+tabsize = 8
+lead = re.compile(r'^([ \t]+)')
+trail = re.compile(r'([ \t]+)$')
+any_control = re.compile(r'\b(if|while|for)([ \t]*)\(')
+
+class Validator(object):
+    """Base class for style validators
+
+    Validators analyze source files for common style violations and
+    produce source code style violation statistics. Unlike style
+    verifiers (see verifiers.py), they do not try to fix any style
+    violations violations.
+
+    Deprecation warning: These classes are currently only used by the
+    "hg m5format" command and not by any style hooks. New style
+    checkers should inherit from Verifier instead of Validator.
+
+    """
+
+    __metaclass__ = ABCMeta
+
+    def __init__(self, file_name, verbose=False, language=None):
+        self.file_name = file_name
+        self.verbose = verbose
+        self.bad = 0
+        self.language = language
+
+    def fail_line(self, line_no, line, message):
+        print '%s:%d>' % (self.file_name, line_no + 1), message
+        if self.verbose:
+            print line
+        self.bad += 1
+
+    def __nonzero__(self):
+        return self.bad == 0
+
+    @classmethod
+    def supported_lang(cls, language):
+        return True
+
+    @abstractmethod
+    def validate_line(self, line_no, line):
+        pass
+
+    @abstractmethod
+    def dump(self):
+        pass
+
+class SimpleValidator(Validator):
+    supported_langs = set()
+
+    def __init__(self, fail_message, dump_message, file_name, **kwargs):
+        super(SimpleValidator, self).__init__(file_name, **kwargs)
+
+        self.fail_message = fail_message
+        self.dump_message = dump_message
+
+    @classmethod
+    def supported_lang(cls, language):
+        return not cls.cupported_langs or language in cls.supported_langs
+
+    def validate_line(self, line_no, line):
+        if not self.simple_validate_line(line):
+            self.fail_line(line_no, line, self.fail_message)
+            return False
+        else:
+            return True
+
+    @abstractmethod
+    def simple_validate_line(self, line):
+        pass
+
+    def dump(self):
+        print self.dump_message % {
+            "bad" : self.bad
+        }
+
+class LineLength(Validator):
+    def __init__(self, *args, **kwargs):
+        super(LineLength, self).__init__(*args, **kwargs)
+
+        self.toolong80 = 0
+
+    def validate_line(self, line_no, line):
+        llen = style.normalized_len(line)
+        if llen == 80:
+            self.toolong80 += 1
+
+        if llen > 79:
+            self.fail_line(line_no, line, 'line too long (%d chars)' % llen)
+            return False
+        else:
+            return True
+
+    def dump(self):
+        print "%d violations of lines over 79 chars. " \
+            "%d of which are 80 chars exactly." % (self.bad, self.toolong80)
+
+class ControlSpacing(Validator):
+    supported_langs = set(('C', 'C++'))
+
+    def validate_line(self, line_no, line):
+        match = any_control.search(line)
+        if match and match.group(2) != " ":
+            stats.badcontrol += 1
+            self.fail_line(line_no, line,
+                           'improper spacing after %s' % match.group(1))
+            return False
+        else:
+            return True
+
+    def dump(self):
+        print "%d bad parens after if/while/for." % (self.bad, )
+
+class CarriageReturn(SimpleValidator):
+    def __init__(self, *args, **kwargs):
+        super(CarriageReturn, self).__init__(
+            "carriage return found",
+            "%(bad)d carriage returns found.",
+            *args, **kwargs)
+
+    def simple_validate_line(self, line):
+        return line.find('\r') == -1
+
+class TabIndent(SimpleValidator):
+    lead = re.compile(r'^([ \t]+)')
+
+    def __init__(self, *args, **kwargs):
+        super(TabIndent, self).__init__(
+            "using tabs to indent",
+            "%(bad)d cases of tabs to indent.",
+            *args, **kwargs)
+
+    def simple_validate_line(self, line):
+        match = TabIndent.lead.search(line)
+        return not (match and match.group(1).find('\t') != -1)
+
+class TrailingWhitespace(SimpleValidator):
+    trail = re.compile(r'([ \t]+)$')
+
+    def __init__(self, *args, **kwargs):
+        super(TrailingWhitespace, self).__init__(
+            "trailing whitespace",
+            "%(bad)d cases of whitespace at the end of a line.",
+            *args, **kwargs)
+
+    def simple_validate_line(self, line):
+        return not TrailingWhitespace.trail.search(line)
+
+def is_validator(cls):
+    """Determine if a class is a Validator that can be instantiated"""
+
+    return inspect.isclass(cls) and issubclass(cls, Validator) and \
+        not inspect.isabstract(cls)
+
+# list of all verifier classes
+all_validators = [ v for n, v in \
+                  inspect.getmembers(sys.modules[__name__], is_validator) ]
+
--- a/util/style/verifiers.py
+++ b/util/style/verifiers.py
@ -0,0 +1,379 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2014, 2016 ARM Limited
+# All rights reserved
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Copyright (c) 2006 The Regents of The University of Michigan
+# Copyright (c) 2007,2011 The Hewlett-Packard Development Company
+# Copyright (c) 2016 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+#          Steve Reinhardt
+#          Andreas Sandberg
+
+from abc import ABCMeta, abstractmethod
+from difflib import SequenceMatcher
+import inspect
+import os
+import re
+import sys
+
+import style
+import sort_includes
+from region import *
+from file_types import lang_type
+
+def _modified_regions(old, new):
+    m = SequenceMatcher(a=old, b=new, autojunk=False)
+
+    regions = Regions()
+    for tag, i1, i2, j1, j2 in m.get_opcodes():
+        if tag != "equal":
+            regions.extend(Region(i1, i2))
+    return regions
+
+
+class Verifier(object):
+    """Base class for style verifiers
+
+    Verifiers check for style violations and optionally fix such
+    violations. Implementations should either inherit from this class
+    (Verifier) if they need to work on entire files or LineVerifier if
+    they operate on a line-by-line basis.
+
+    Subclasses must define these class attributes:
+      languages = set of strings identifying applicable languages
+      test_name = long descriptive name of test, will be used in
+                  messages such as "error in <foo>" or "invalid <foo>"
+      opt_name = short name used to generate command-line options to
+                 control the test (--fix-<foo>, --ignore-<foo>, etc.)
+
+    """
+
+    __metaclass__ = ABCMeta
+
+    def __init__(self, ui, opts, base=None):
+        self.ui = ui
+        self.base = base
+
+        # opt_name must be defined as a class attribute of derived classes.
+        # Check test-specific opts first as these have precedence.
+        self.opt_fix = opts.get('fix_' + self.opt_name, False)
+        self.opt_ignore = opts.get('ignore_' + self.opt_name, False)
+        self.opt_skip = opts.get('skip_' + self.opt_name, False)
+        # If no test-specific opts were set, then set based on "-all" opts.
+        if not (self.opt_fix or self.opt_ignore or self.opt_skip):
+            self.opt_fix = opts.get('fix_all', False)
+            self.opt_ignore = opts.get('ignore_all', False)
+            self.opt_skip = opts.get('skip_all', False)
+
+    def normalize_filename(self, name):
+        abs_name = os.path.abspath(name)
+        if self.base is None:
+            return abs_name
+
+        abs_base = os.path.abspath(self.base)
+        return os.path.relpath(abs_name, start=abs_base)
+
+    def open(self, filename, mode):
+        try:
+            f = file(filename, mode)
+        except OSError, msg:
+            print 'could not open file %s: %s' % (filename, msg)
+            return None
+
+        return f
+
+    def skip(self, filename):
+        # We never want to handle symlinks, so always skip them: If the location
+        # pointed to is a directory, skip it. If the location is a file inside
+        # the gem5 directory, it will be checked as a file, so symlink can be
+        # skipped. If the location is a file outside gem5, we don't want to
+        # check it anyway.
+        if os.path.islink(filename):
+            return True
+        return lang_type(filename) not in self.languages
+
+    def apply(self, filename, regions=all_regions):
+        """Possibly apply to specified regions of file 'filename'.
+
+        Verifier is skipped if --skip-<test> option was provided or if
+        file is not of an applicable type.  Otherwise file is checked
+        and error messages printed.  Errors are fixed or ignored if
+        the corresponding --fix-<test> or --ignore-<test> options were
+        provided.  If neither, the user is prompted for an action.
+
+        Returns True to abort, False otherwise.
+        """
+        if not (self.opt_skip or self.skip(filename)):
+            errors = self.check(filename, regions)
+            if errors and not self.opt_ignore:
+                if self.opt_fix:
+                    self.fix(filename, regions)
+                else:
+                    result = self.ui.prompt("(a)bort, (i)gnore, or (f)ix?",
+                                            'aif', 'a')
+                    if result == 'f':
+                        self.fix(filename, regions)
+                    elif result == 'a':
+                        return True # abort
+
+        return False
+
+    @abstractmethod
+    def check(self, filename, regions=all_regions):
+        """Check specified regions of file 'filename'.
+
+        Line-by-line checks can simply provide a check_line() method
+        that returns True if the line is OK and False if it has an
+        error.  Verifiers that need a multi-line view (like
+        SortedIncludes) must override this entire function.
+
+        Returns a count of errors (0 if none), though actual non-zero
+        count value is not currently used anywhere.
+        """
+        pass
+
+    @abstractmethod
+    def fix(self, filename, regions=all_regions):
+        """Fix specified regions of file 'filename'.
+
+        Line-by-line fixes can simply provide a fix_line() method that
+        returns the fixed line. Verifiers that need a multi-line view
+        (like SortedIncludes) must override this entire function.
+        """
+        pass
+
+class LineVerifier(Verifier):
+    def check(self, filename, regions=all_regions):
+        f = self.open(filename, 'r')
+
+        errors = 0
+        for num,line in enumerate(f):
+            if num not in regions:
+                continue
+            line = line.rstrip('\n')
+            if not self.check_line(line):
+                self.ui.write("invalid %s in %s:%d\n" % \
+                              (self.test_name, filename, num + 1))
+                if self.ui.verbose:
+                    self.ui.write(">>%s<<\n" % line[:-1])
+                errors += 1
+        return errors
+
+    def fix(self, filename, regions=all_regions):
+        f = self.open(filename, 'r+')
+
+        lines = list(f)
+
+        f.seek(0)
+        f.truncate()
+
+        for i,line in enumerate(lines):
+            line = line.rstrip('\n')
+            if i in regions:
+                line = self.fix_line(line)
+
+            f.write(line)
+            f.write("\n")
+        f.close()
+
+
+    @abstractmethod
+    def check_line(self, line):
+        pass
+
+    @abstractmethod
+    def fix_line(self, line):
+        pass
+
+class Whitespace(LineVerifier):
+    """Check whitespace.
+
+    Specifically:
+    - No tabs used for indent
+    - No trailing whitespace
+    """
+
+    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
+    test_name = 'whitespace'
+    opt_name = 'white'
+
+    _lead = re.compile(r'^([ \t]+)')
+    _trail = re.compile(r'([ \t]+)$')
+
+    def check_line(self, line):
+        match = Whitespace._lead.search(line)
+        if match and match.group(1).find('\t') != -1:
+            return False
+
+        match = Whitespace._trail.search(line)
+        if match:
+            return False
+
+        return True
+
+    def fix_line(self, line):
+        if Whitespace._lead.search(line):
+            newline = ''
+            for i,c in enumerate(line):
+                if c == ' ':
+                    newline += ' '
+                elif c == '\t':
+                    newline += ' ' * (tabsize - len(newline) % tabsize)
+                else:
+                    newline += line[i:]
+                    break
+
+            line = newline
+
+        return line.rstrip() + '\n'
+
+
+class SortedIncludes(Verifier):
+    """Check for proper sorting of include statements"""
+
+    languages = sort_includes.default_languages
+    test_name = 'include file order'
+    opt_name = 'include'
+
+    def __init__(self, *args, **kwargs):
+        super(SortedIncludes, self).__init__(*args, **kwargs)
+        self.sort_includes = sort_includes.SortIncludes()
+
+    def check(self, filename, regions=all_regions):
+        f = self.open(filename, 'r')
+        norm_fname = self.normalize_filename(filename)
+
+        old = [ l.rstrip('\n') for l in f.xreadlines() ]
+        f.close()
+
+        if len(old) == 0:
+            return 0
+
+        language = lang_type(filename, old[0])
+        new = list(self.sort_includes(old, norm_fname, language))
+
+        modified = _modified_regions(old, new) & regions
+
+        if modified:
+            self.ui.write("invalid sorting of includes in %s\n" % (filename))
+            if self.ui.verbose:
+                for start, end in modified.regions:
+                    self.ui.write("bad region [%d, %d)\n" % (start, end))
+            return 1
+
+        return 0
+
+    def fix(self, filename, regions=all_regions):
+        f = self.open(filename, 'r+')
+
+        old = f.readlines()
+        lines = [ l.rstrip('\n') for l in old ]
+        language = lang_type(filename, lines[0])
+        sort_lines = list(self.sort_includes(lines, filename, language))
+        new = ''.join(line + '\n' for line in sort_lines)
+
+        f.seek(0)
+        f.truncate()
+
+        for i,line in enumerate(sort_lines):
+            f.write(line)
+            f.write('\n')
+        f.close()
+
+
+class ControlSpace(LineVerifier):
+    """Check for exactly one space after if/while/for"""
+
+    languages = set(('C', 'C++'))
+    test_name = 'spacing after if/while/for'
+    opt_name = 'control'
+
+    _any_control = re.compile(r'\b(if|while|for)([ \t]*)\(')
+
+    def check_line(self, line):
+        match = ControlSpace._any_control.search(line)
+        return not (match and match.group(2) != " ")
+
+    def fix_line(self, line):
+        new_line = _any_control.sub(r'\1 (', line)
+        return new_line
+
+
+class LineLength(LineVerifier):
+    languages = set(('C', 'C++', 'swig', 'python', 'asm', 'isa', 'scons'))
+    test_name = 'line length'
+    opt_name = 'length'
+
+    def check_line(self, line):
+        return style.normalized_len(line) <= 78
+
+    def fix(self, filename, regions=all_regions):
+        self.ui.write("Warning: cannot automatically fix overly long lines.\n")
+
+    def fix_line(self, line):
+        pass
+
+class BoolCompare(LineVerifier):
+    languages = set(('C', 'C++', 'python'))
+    test_name = 'boolean comparison'
+    opt_name = 'boolcomp'
+
+    regex = re.compile(r'\s*==\s*([Tt]rue|[Ff]alse)\b')
+
+    def check_line(self, line):
+        return self.regex.search(line) == None
+
+    def fix_line(self, line):
+        match = self.regex.search(line)
+        if match:
+            if match.group(1) in ('true', 'True'):
+                line = self.regex.sub('', line)
+            else:
+                self.ui.write("Warning: cannot automatically fix "
+                              "comparisons with false/False.\n")
+        return line
+
+def is_verifier(cls):
+    """Determine if a class is a Verifier that can be instantiated"""
+
+    return inspect.isclass(cls) and issubclass(cls, Verifier) and \
+        not inspect.isabstract(cls)
+
+# list of all verifier classes
+all_verifiers = [ v for n, v in \
+                  inspect.getmembers(sys.modules[__name__], is_verifier) ]