diff --git a/src/arch/alpha/isa/mem.isa b/src/arch/alpha/isa/mem.isa
index b1703221f..efff0eac7 100644
--- a/src/arch/alpha/isa/mem.isa
+++ b/src/arch/alpha/isa/mem.isa
@@ -275,7 +275,6 @@ def template StoreExecute {{
         if (fault == NoFault) {
             fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                               memAccessFlags, NULL);
-            if (traceData) { traceData->setData(Mem); }
         }
 
         if (fault == NoFault) {
@@ -310,7 +309,6 @@ def template StoreCondExecute {{
         if (fault == NoFault) {
             fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                               memAccessFlags, &write_result);
-            if (traceData) { traceData->setData(Mem); }
         }
 
         if (fault == NoFault) {
@@ -344,7 +342,6 @@ def template StoreInitiateAcc {{
         if (fault == NoFault) {
             fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                               memAccessFlags, NULL);
-            if (traceData) { traceData->setData(Mem); }
         }
 
         return fault;
@@ -478,9 +475,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
     mem_flags = makeList(mem_flags)
     inst_flags = makeList(inst_flags)
 
-    # add hook to get effective addresses into execution trace output.
-    ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n'
-
     # Some CPU models execute the memory operation as an atomic unit,
     # while others want to separate them into an effective address
     # computation and a memory access operation.  As a result, we need
diff --git a/src/arch/arm/isa/formats/mem.isa b/src/arch/arm/isa/formats/mem.isa
index 0b0a4c9fa..2f66ca54e 100644
--- a/src/arch/arm/isa/formats/mem.isa
+++ b/src/arch/arm/isa/formats/mem.isa
@@ -172,7 +172,6 @@ def template StoreExecute {{
             if (fault == NoFault) {
                 fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                                   memAccessFlags, NULL);
-                if (traceData) { traceData->setData(Mem); }
             }
 
             if (fault == NoFault) {
@@ -204,7 +203,6 @@ def template StoreInitiateAcc {{
             if (fault == NoFault) {
                 fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                                   memAccessFlags, NULL);
-                if (traceData) { traceData->setData(Mem); }
             }
 
             // Need to write back any potential address register update
diff --git a/src/arch/mips/isa/formats/mem.isa b/src/arch/mips/isa/formats/mem.isa
index 161a52b06..411cc5fda 100644
--- a/src/arch/mips/isa/formats/mem.isa
+++ b/src/arch/mips/isa/formats/mem.isa
@@ -305,7 +305,6 @@ def template StoreExecute {{
         if (fault == NoFault) {
             fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                               memAccessFlags, NULL);
-            if (traceData) { traceData->setData(Mem); }
         }
 
         if (fault == NoFault) {
@@ -342,7 +341,6 @@ def template StoreFPExecute {{
         if (fault == NoFault) {
             fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                               memAccessFlags, NULL);
-            if (traceData) { traceData->setData(Mem); }
         }
 
         if (fault == NoFault) {
@@ -377,7 +375,6 @@ def template StoreCondExecute {{
         if (fault == NoFault) {
             fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                               memAccessFlags, &write_result);
-            if (traceData) { traceData->setData(Mem); }
         }
 
         if (fault == NoFault) {
@@ -411,7 +408,6 @@ def template StoreInitiateAcc {{
         if (fault == NoFault) {
             fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                               memAccessFlags, NULL);
-            if (traceData) { traceData->setData(Mem); }
         }
 
         return fault;
@@ -435,8 +431,6 @@ def template StoreCompleteAcc {{
 
         if (fault == NoFault) {
             %(op_wb)s;
-
-            if (traceData) { traceData->setData(getMemData(xc, pkt)); }
         }
 
         return fault;
@@ -459,8 +453,6 @@ def template StoreCompleteAcc {{
 
         if (fault == NoFault) {
             %(op_wb)s;
-
-            if (traceData) { traceData->setData(getMemData(xc, pkt)); }
         }
 
         return fault;
diff --git a/src/arch/mips/isa/formats/util.isa b/src/arch/mips/isa/formats/util.isa
index a6edffeda..708338074 100644
--- a/src/arch/mips/isa/formats/util.isa
+++ b/src/arch/mips/isa/formats/util.isa
@@ -38,9 +38,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
     mem_flags = makeList(mem_flags)
     inst_flags = makeList(inst_flags)
 
-    # add hook to get effective addresses into execution trace output.
-    ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n'
-
     # Some CPU models execute the memory operation as an atomic unit,
     # while others want to separate them into an effective address
     # computation and a memory access operation.  As a result, we need
diff --git a/src/arch/power/isa/formats/mem.isa b/src/arch/power/isa/formats/mem.isa
index 1be49c2f7..3bcf0633a 100644
--- a/src/arch/power/isa/formats/mem.isa
+++ b/src/arch/power/isa/formats/mem.isa
@@ -166,7 +166,6 @@ def template StoreExecute {{
         if (fault == NoFault) {
             fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                               memAccessFlags, NULL);
-            if (traceData) { traceData->setData(Mem); }
         }
 
         if (fault == NoFault) {
@@ -196,7 +195,6 @@ def template StoreInitiateAcc {{
         if (fault == NoFault) {
             fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                               memAccessFlags, NULL);
-            if (traceData) { traceData->setData(Mem); }
         }
 
         // Need to write back any potential address register update
diff --git a/src/arch/power/isa/formats/util.isa b/src/arch/power/isa/formats/util.isa
index ab1e530b2..8fd7f7daa 100644
--- a/src/arch/power/isa/formats/util.isa
+++ b/src/arch/power/isa/formats/util.isa
@@ -97,9 +97,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
     mem_flags = makeList(mem_flags)
     inst_flags = makeList(inst_flags)
 
-    # add hook to get effective addresses into execution trace output.
-    ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n'
-
     # Generate InstObjParams for the memory access.
     iop = InstObjParams(name, Name, base_class,
                         {'ea_code': ea_code,
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index 376ea8d26..d12f11a2c 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -443,6 +443,10 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
     //The size of the data we're trying to read.
     int dataSize = sizeof(T);
 
+    if (inst->traceData) {
+        inst->traceData->setAddr(addr);
+    }
+
     if (inst->split2ndAccess) {     
         dataSize = inst->split2ndSize;
         cache_req->splitAccess = true;        
@@ -541,6 +545,11 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
     //The size of the data we're trying to read.
     int dataSize = sizeof(T);
 
+    if (inst->traceData) {
+        inst->traceData->setAddr(addr);
+        inst->traceData->setData(data);
+    }
+
     if (inst->split2ndAccess) {     
         dataSize = inst->split2ndSize;
         cache_req->splitAccess = true;        
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 05b4ca3e2..d96adffd5 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -351,10 +351,6 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
             }
         }
 
-        // This will need a new way to tell if it has a dcache attached.
-        if (req->isUncacheable())
-            recordEvent("Uncached Read");
-
         //If there's a fault, return it
         if (fault != NoFault) {
             if (req->isPrefetch()) {
@@ -451,6 +447,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 
     if (traceData) {
         traceData->setAddr(addr);
+        traceData->setData(data);
     }
 
     //The block size of our peer.
@@ -522,20 +519,10 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
             }
         }
 
-        // This will need a new way to tell if it's hooked up to a cache or not.
-        if (req->isUncacheable())
-            recordEvent("Uncached Write");
-
         //If there's a fault or we don't need to access a second cache line,
         //stop now.
         if (fault != NoFault || secondAddr <= addr)
         {
-            // If the write needs to have a fault on the access, consider
-            // calling changeStatus() and changing it to "bad addr write"
-            // or something.
-            if (traceData) {
-                traceData->setData(gtoh(data));
-            }
             if (req->isLocked() && fault == NoFault) {
                 assert(locked);
                 locked = false;
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 0104e1b1f..17ba6a10b 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -205,6 +205,27 @@ change_thread_state(ThreadID tid, int activate, int priority)
 {
 }
 
+void
+BaseSimpleCPU::prefetch(Addr addr, unsigned flags)
+{
+    if (traceData) {
+        traceData->setAddr(addr);
+    }
+
+    // need to do this...
+}
+
+void
+BaseSimpleCPU::writeHint(Addr addr, int size, unsigned flags)
+{
+    if (traceData) {
+        traceData->setAddr(addr);
+    }
+
+    // need to do this...
+}
+
+
 Fault
 BaseSimpleCPU::copySrcTranslate(Addr src)
 {
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index 39961fb88..87e211521 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -232,16 +232,8 @@ class BaseSimpleCPU : public BaseCPU
     Addr getEA()        { panic("BaseSimpleCPU::getEA() not implemented\n");
         M5_DUMMY_RETURN}
 
-    void prefetch(Addr addr, unsigned flags)
-    {
-        // need to do this...
-    }
-
-    void writeHint(Addr addr, int size, unsigned flags)
-    {
-        // need to do this...
-    }
-
+    void prefetch(Addr addr, unsigned flags);
+    void writeHint(Addr addr, int size, unsigned flags);
 
     Fault copySrcTranslate(Addr src);
 
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 221cb0d0d..b8fc5ab84 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -426,16 +426,16 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
     int data_size = sizeof(T);
     BaseTLB::Mode mode = BaseTLB::Read;
 
+    if (traceData) {
+        traceData->setAddr(addr);
+    }
+
     RequestPtr req  = new Request(asid, addr, data_size,
                                   flags, pc, _cpuId, tid);
 
     Addr split_addr = roundDown(addr + data_size - 1, block_size);
     assert(split_addr <= addr || split_addr - addr < block_size);
 
-    // This will need a new way to tell if it's hooked up to a cache or not.
-    if (req->isUncacheable())
-        recordEvent("Uncached Write");
-
     _status = DTBWaitResponse;
     if (split_addr > addr) {
         RequestPtr req1, req2;
@@ -460,11 +460,6 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
         thread->dtb->translateTiming(req, tc, translation, mode);
     }
 
-    if (traceData) {
-        traceData->setData(data);
-        traceData->setAddr(addr);
-    }
-
     return NoFault;
 }
 
@@ -548,16 +543,17 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
     int data_size = sizeof(T);
     BaseTLB::Mode mode = BaseTLB::Write;
 
+    if (traceData) {
+        traceData->setAddr(addr);
+        traceData->setData(data);
+    }
+
     RequestPtr req = new Request(asid, addr, data_size,
                                  flags, pc, _cpuId, tid);
 
     Addr split_addr = roundDown(addr + data_size - 1, block_size);
     assert(split_addr <= addr || split_addr - addr < block_size);
 
-    // This will need a new way to tell if it's hooked up to a cache or not.
-    if (req->isUncacheable())
-        recordEvent("Uncached Write");
-
     T *dataP = new T;
     *dataP = TheISA::htog(data);
     _status = DTBWaitResponse;
@@ -584,13 +580,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
         thread->dtb->translateTiming(req, tc, translation, mode);
     }
 
-    if (traceData) {
-        traceData->setAddr(req->getVaddr());
-        traceData->setData(data);
-    }
-
-    // If the write needs to have a fault on the access, consider calling
-    // changeStatus() and changing it to "bad addr write" or something.
+    // Translation faults will be returned via finishTranslation()
     return NoFault;
 }
 
diff --git a/src/cpu/translation.hh b/src/cpu/translation.hh
index 33e810710..983a748cf 100644
--- a/src/cpu/translation.hh
+++ b/src/cpu/translation.hh
@@ -35,6 +35,16 @@
 
 #include "sim/tlb.hh"
 
+/**
+ * This class captures the state of an address translation.  A translation
+ * can be split in two if the ISA supports it and the memory access crosses
+ * a page boundary.  In this case, this class is shared by two data
+ * translations (below).  Otherwise it is used by a single data translation
+ * class.  When each part of the translation is finished, the finish
+ * function is called which will indicate whether the whole translation is
+ * completed or not.  There are also functions for accessing parts of the
+ * translation state which deal with the possible split correctly.
+ */
 class WholeTranslationState
 {
   protected:
@@ -50,7 +60,10 @@ class WholeTranslationState
     uint64_t *res;
     BaseTLB::Mode mode;
 
-    /** Single translation state. */
+    /**
+     * Single translation state.  We set the number of outstanding
+     * translations to one and indicate that it is not split.
+     */
     WholeTranslationState(RequestPtr _req, uint8_t *_data, uint64_t *_res,
                           BaseTLB::Mode _mode)
         : outstanding(1), isSplit(false), mainReq(_req), sreqLow(NULL),
@@ -60,7 +73,11 @@ class WholeTranslationState
         assert(mode == BaseTLB::Read || mode == BaseTLB::Write);
     }
 
-    /** Split translation state. */
+    /**
+     * Split translation state.  We copy all state into this class, set the
+     * number of outstanding translations to two and then mark this as a
+     * split translation.
+     */
     WholeTranslationState(RequestPtr _req, RequestPtr _sreqLow,
                           RequestPtr _sreqHigh, uint8_t *_data, uint64_t *_res,
                           BaseTLB::Mode _mode)
@@ -71,6 +88,13 @@ class WholeTranslationState
         assert(mode == BaseTLB::Read || mode == BaseTLB::Write);
     }
 
+    /**
+     * Finish part of a translation.  If there is only one request then this
+     * translation is completed.  If the request has been split in two then
+     * the outstanding count determines whether the translation is complete.
+     * In this case, flags from the split request are copied to the main
+     * request to make it easier to access them later on.
+     */
     bool
     finish(Fault fault, int index)
     {
@@ -89,6 +113,10 @@ class WholeTranslationState
         return outstanding == 0;
     }
 
+    /**
+     * Determine whether this translation produced a fault.  Both parts of the
+     * translation must be checked if this is a split translation.
+     */
     Fault
     getFault() const
     {
@@ -102,36 +130,54 @@ class WholeTranslationState
             return NoFault;
     }
 
+    /** Remove all faults from the translation. */
     void
     setNoFault()
     {
         faults[0] = faults[1] = NoFault;
     }
 
+    /**
+     * Check if this request is uncacheable.  We only need to check the main
+     * request because the flags will have been copied here on a split
+     * translation.
+     */
     bool
     isUncacheable() const
     {
         return mainReq->isUncacheable();
     }
 
+    /**
+     * Check if this request is a prefetch.  We only need to check the main
+     * request because the flags will have been copied here on a split
+     * translation.
+     */
     bool
     isPrefetch() const
     {
         return mainReq->isPrefetch();
     }
 
+    /** Get the physical address of this request. */
     Addr
     getPaddr() const
     {
         return mainReq->getPaddr();
     }
 
+    /**
+     * Get the flags associated with this request.  We only need to access
+     * the main request because the flags will have been copied here on a
+     * split translation.
+     */
     unsigned
     getFlags()
     {
         return mainReq->getFlags();
     }
 
+    /** Delete all requests that make up this translation. */
     void
     deleteReqs()
     {
@@ -143,6 +189,16 @@ class WholeTranslationState
     }
 };
 
+
+/**
+ * This class represents part of a data address translation.  All state for
+ * the translation is held in WholeTranslationState (above).  Therefore this
+ * class does not need to know whether the translation is split or not.  The
+ * index variable determines this but is simply passed on to the state class.
+ * When this part of the translation is completed, finish is called.  If the
+ * translation state class indicate that the whole translation is complete
+ * then the execution context is informed.
+ */
 template <class ExecContext>
 class DataTranslation : public BaseTLB::Translation
 {
@@ -163,6 +219,10 @@ class DataTranslation : public BaseTLB::Translation
     {
     }
 
+    /**
+     * Finish this part of the translation and indicate that the whole
+     * translation is complete if the state says so.
+     */
     void
     finish(Fault fault, RequestPtr req, ThreadContext *tc,
            BaseTLB::Mode mode)
diff --git a/src/mem/ruby/profiler/AccessTraceForAddress.cc b/src/mem/ruby/profiler/AccessTraceForAddress.cc
index 48b28b735..978b72982 100644
--- a/src/mem/ruby/profiler/AccessTraceForAddress.cc
+++ b/src/mem/ruby/profiler/AccessTraceForAddress.cc
@@ -1,4 +1,3 @@
-
 /*
  * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
  * All rights reserved.
@@ -27,100 +26,96 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/*
- * $Id$
- *
- */
-
-#include "mem/ruby/profiler/AccessTraceForAddress.hh"
 #include "mem/ruby/common/Histogram.hh"
+#include "mem/ruby/profiler/AccessTraceForAddress.hh"
 
 AccessTraceForAddress::AccessTraceForAddress()
 {
-  m_histogram_ptr = NULL;
+    m_histogram_ptr = NULL;
 }
 
 AccessTraceForAddress::AccessTraceForAddress(const Address& addr)
 {
-  m_addr = addr;
-  m_total = 0;
-  m_loads = 0;
-  m_stores = 0;
-  m_atomics = 0;
-  m_user = 0;
-  m_sharing = 0;
-  m_histogram_ptr = NULL;
+    m_addr = addr;
+    m_total = 0;
+    m_loads = 0;
+    m_stores = 0;
+    m_atomics = 0;
+    m_user = 0;
+    m_sharing = 0;
+    m_histogram_ptr = NULL;
 }
 
 AccessTraceForAddress::~AccessTraceForAddress()
 {
-  if (m_histogram_ptr != NULL) {
-    delete m_histogram_ptr;
-    m_histogram_ptr = NULL;
-  }
+    if (m_histogram_ptr != NULL) {
+        delete m_histogram_ptr;
+        m_histogram_ptr = NULL;
+    }
 }
 
-void AccessTraceForAddress::print(ostream& out) const
+void
+AccessTraceForAddress::print(ostream& out) const
 {
-  out << m_addr;
+    out << m_addr;
 
-  if (m_histogram_ptr == NULL) {
-    out << " " << m_total;
-    out << " | " << m_loads;
-    out << " " << m_stores;
-    out << " " << m_atomics;
-    out << " | " << m_user;
-    out << " " << m_total-m_user;
-    out << " | " << m_sharing;
-    out << " | " << m_touched_by.count();
-  } else {
+    if (m_histogram_ptr == NULL) {
+        out << " " << m_total;
+        out << " | " << m_loads;
+        out << " " << m_stores;
+        out << " " << m_atomics;
+        out << " | " << m_user;
+        out << " " << m_total-m_user;
+        out << " | " << m_sharing;
+        out << " | " << m_touched_by.count();
+    } else {
+        assert(m_total == 0);
+        out << " " << (*m_histogram_ptr);
+    }
+}
+
+void
+AccessTraceForAddress::update(CacheRequestType type,
+                              AccessModeType access_mode, NodeID cpu,
+                              bool sharing_miss)
+{
+    m_touched_by.add(cpu);
+    m_total++;
+    if(type == CacheRequestType_ATOMIC) {
+        m_atomics++;
+    } else if(type == CacheRequestType_LD){
+        m_loads++;
+    } else if (type == CacheRequestType_ST){
+        m_stores++;
+    } else {
+        //  ERROR_MSG("Trying to add invalid access to trace");
+    }
+
+    if (access_mode == AccessModeType_UserMode) {
+        m_user++;
+    }
+
+    if (sharing_miss) {
+        m_sharing++;
+    }
+}
+
+int
+AccessTraceForAddress::getTotal() const
+{
+    if (m_histogram_ptr == NULL) {
+        return m_total;
+    } else {
+        return m_histogram_ptr->getTotal();
+    }
+}
+
+void
+AccessTraceForAddress::addSample(int value)
+{
     assert(m_total == 0);
-    out << " " << (*m_histogram_ptr);
-  }
-}
-
-void AccessTraceForAddress::update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, bool sharing_miss)
-{
-  m_touched_by.add(cpu);
-  m_total++;
-  if(type == CacheRequestType_ATOMIC) {
-    m_atomics++;
-  } else if(type == CacheRequestType_LD){
-    m_loads++;
-  } else if (type == CacheRequestType_ST){
-    m_stores++;
-  } else {
-    //  ERROR_MSG("Trying to add invalid access to trace");
-  }
-
-  if (access_mode == AccessModeType_UserMode) {
-    m_user++;
-  }
-
-  if (sharing_miss) {
-    m_sharing++;
-  }
-}
-
-int AccessTraceForAddress::getTotal() const
-{
-  if (m_histogram_ptr == NULL) {
-    return m_total;
-  } else {
-    return m_histogram_ptr->getTotal();
-  }
-}
-
-void AccessTraceForAddress::addSample(int value)
-{
-  assert(m_total == 0);
-  if (m_histogram_ptr == NULL) {
-    m_histogram_ptr = new Histogram;
-  }
-  m_histogram_ptr->add(value);
-}
-
-bool node_less_then_eq(const AccessTraceForAddress* n1, const AccessTraceForAddress* n2)
-{
-  return (n1->getTotal() > n2->getTotal());
+    if (m_histogram_ptr == NULL) {
+        m_histogram_ptr = new Histogram;
+    }
+    m_histogram_ptr->add(value);
 }
diff --git a/src/mem/ruby/profiler/AccessTraceForAddress.hh b/src/mem/ruby/profiler/AccessTraceForAddress.hh
index 2761d6de8..53b96856e 100644
--- a/src/mem/ruby/profiler/AccessTraceForAddress.hh
+++ b/src/mem/ruby/profiler/AccessTraceForAddress.hh
@@ -1,4 +1,3 @@
-
 /*
  * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
  * All rights reserved.
@@ -27,77 +26,60 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/*
- * $Id$
- *
- * Description:
- *
- */
+#ifndef __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__
+#define __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__
 
-#ifndef ACCESSTRACEFORADDRESS_H
-#define ACCESSTRACEFORADDRESS_H
-
-#include "mem/ruby/common/Global.hh"
-#include "mem/ruby/common/Address.hh"
-#include "mem/protocol/CacheRequestType.hh"
 #include "mem/protocol/AccessModeType.hh"
-#include "mem/ruby/system/NodeID.hh"
+#include "mem/protocol/CacheRequestType.hh"
+#include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/Global.hh"
 #include "mem/ruby/common/Set.hh"
+#include "mem/ruby/system/NodeID.hh"
+
 class Histogram;
 
-class AccessTraceForAddress {
-public:
-  // Constructors
-  AccessTraceForAddress();
-  explicit AccessTraceForAddress(const Address& addr);
+class AccessTraceForAddress
+{
+  public:
+    AccessTraceForAddress();
+    explicit AccessTraceForAddress(const Address& addr);
+    ~AccessTraceForAddress();
 
-  // Destructor
-  ~AccessTraceForAddress();
+    void update(CacheRequestType type, AccessModeType access_mode, NodeID cpu,
+                bool sharing_miss);
+    int getTotal() const;
+    int getSharing() const { return m_sharing; }
+    int getTouchedBy() const { return m_touched_by.count(); }
+    const Address& getAddress() const { return m_addr; }
+    void addSample(int value);
 
-  // Public Methods
+    void print(ostream& out) const;
 
-  void update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, bool sharing_miss);
-  int getTotal() const;
-  int getSharing() const { return m_sharing; }
-  int getTouchedBy() const { return m_touched_by.count(); }
-  const Address& getAddress() const { return m_addr; }
-  void addSample(int value);
-
-  void print(ostream& out) const;
-private:
-  // Private Methods
-
-  // Private copy constructor and assignment operator
-  // AccessTraceForAddress(const AccessTraceForAddress& obj);
-  // AccessTraceForAddress& operator=(const AccessTraceForAddress& obj);
-
-  // Data Members (m_ prefix)
-
-  Address m_addr;
-  uint64 m_loads;
-  uint64 m_stores;
-  uint64 m_atomics;
-  uint64 m_total;
-  uint64 m_user;
-  uint64 m_sharing;
-  Set m_touched_by;
-  Histogram* m_histogram_ptr;
+  private:
+    Address m_addr;
+    uint64 m_loads;
+    uint64 m_stores;
+    uint64 m_atomics;
+    uint64 m_total;
+    uint64 m_user;
+    uint64 m_sharing;
+    Set m_touched_by;
+    Histogram* m_histogram_ptr;
 };
 
-bool node_less_then_eq(const AccessTraceForAddress* n1, const AccessTraceForAddress* n2);
-
-// Output operator declaration
-ostream& operator<<(ostream& out, const AccessTraceForAddress& obj);
-
-// ******************* Definitions *******************
-
-// Output operator definition
-extern inline
-ostream& operator<<(ostream& out, const AccessTraceForAddress& obj)
+inline bool
+node_less_then_eq(const AccessTraceForAddress* n1,
+                  const AccessTraceForAddress* n2)
 {
-  obj.print(out);
-  out << flush;
-  return out;
+    return n1->getTotal() > n2->getTotal();
 }
 
-#endif //ACCESSTRACEFORADDRESS_H
+inline ostream&
+operator<<(ostream& out, const AccessTraceForAddress& obj)
+{
+    obj.print(out);
+    out << flush;
+    return out;
+}
+
+#endif // __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__
diff --git a/src/mem/ruby/profiler/AddressProfiler.cc b/src/mem/ruby/profiler/AddressProfiler.cc
index c613431ca..2d7d655c0 100644
--- a/src/mem/ruby/profiler/AddressProfiler.cc
+++ b/src/mem/ruby/profiler/AddressProfiler.cc
@@ -1,4 +1,3 @@
-
 /*
  * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
  * All rights reserved.
@@ -27,272 +26,293 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/*
- * AddressProfiler.cc
- *
- * Description: See AddressProfiler.hh
- *
- * $Id$
- *
- */
-
-#include "mem/ruby/profiler/AddressProfiler.hh"
+#include "mem/gems_common/Map.hh"
+#include "mem/gems_common/PrioHeap.hh"
 #include "mem/protocol/CacheMsg.hh"
 #include "mem/ruby/profiler/AccessTraceForAddress.hh"
-#include "mem/gems_common/PrioHeap.hh"
-#include "mem/gems_common/Map.hh"
-#include "mem/ruby/system/System.hh"
+#include "mem/ruby/profiler/AddressProfiler.hh"
 #include "mem/ruby/profiler/Profiler.hh"
+#include "mem/ruby/system/System.hh"
+
+typedef AddressProfiler::AddressMap AddressMap;
 
 // Helper functions
-static AccessTraceForAddress& lookupTraceForAddress(const Address& addr, 
-                                                    Map<Address, 
-                                                    AccessTraceForAddress>* record_map);
+AccessTraceForAddress&
+lookupTraceForAddress(const Address& addr, AddressMap* record_map)
+{
+    if (!record_map->exist(addr)) {
+        record_map->add(addr, AccessTraceForAddress(addr));
+    }
+    return record_map->lookup(addr);
+}
 
-static void printSorted(ostream& out, 
-                        int num_of_sequencers,
-                        const Map<Address, AccessTraceForAddress>* record_map, 
-                        string description);
+void
+printSorted(ostream& out, int num_of_sequencers, const AddressMap* record_map, 
+            string description)
+{
+    const int records_printed = 100;
+
+    uint64 misses = 0;
+    PrioHeap<AccessTraceForAddress*> heap;
+    Vector<Address> keys = record_map->keys();
+    for (int i = 0; i < keys.size(); i++) {
+        AccessTraceForAddress* record = &(record_map->lookup(keys[i]));
+        misses += record->getTotal();
+        heap.insert(record);
+    }
+
+    out << "Total_entries_" << description << ": " << keys.size() << endl;
+    if (g_system_ptr->getProfiler()->getAllInstructions())
+        out << "Total_Instructions_" << description << ": " << misses << endl;
+    else
+        out << "Total_data_misses_" << description << ": " << misses << endl;
+
+    out << "total | load store atomic | user supervisor | sharing | touched-by"
+        << endl;
+
+    Histogram remaining_records(1, 100);
+    Histogram all_records(1, 100);
+    Histogram remaining_records_log(-1);
+    Histogram all_records_log(-1);
+
+    // Allows us to track how many lines where touched by n processors
+    Vector<int64> m_touched_vec;
+    Vector<int64> m_touched_weighted_vec;
+    m_touched_vec.setSize(num_of_sequencers+1);
+    m_touched_weighted_vec.setSize(num_of_sequencers+1);
+    for (int i = 0; i < m_touched_vec.size(); i++) {
+        m_touched_vec[i] = 0;
+        m_touched_weighted_vec[i] = 0;
+    }
+
+    int counter = 0;
+    while (heap.size() > 0 && counter < records_printed) {
+        AccessTraceForAddress* record = heap.extractMin();
+        double percent = 100.0 * (record->getTotal() / double(misses));
+        out << description << " | " << percent << " % " << *record << endl;
+        all_records.add(record->getTotal());
+        all_records_log.add(record->getTotal());
+        counter++;
+        m_touched_vec[record->getTouchedBy()]++;
+        m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal();
+    }
+
+    while (heap.size() > 0) {
+        AccessTraceForAddress* record = heap.extractMin();
+        all_records.add(record->getTotal());
+        remaining_records.add(record->getTotal());
+        all_records_log.add(record->getTotal());
+        remaining_records_log.add(record->getTotal());
+        m_touched_vec[record->getTouchedBy()]++;
+        m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal();
+    }
+    out << endl;
+    out << "all_records_" << description << ": "
+        << all_records << endl
+        << "all_records_log_" << description << ": "
+        << all_records_log << endl
+        << "remaining_records_" << description << ": "
+        << remaining_records << endl
+        << "remaining_records_log_" << description << ": "
+        << remaining_records_log << endl
+        << "touched_by_" << description << ": "
+        << m_touched_vec << endl
+        << "touched_by_weighted_" << description << ": "
+        << m_touched_weighted_vec << endl
+        << endl;
+}
 
 AddressProfiler::AddressProfiler(int num_of_sequencers)
 {
-  m_dataAccessTrace = new Map<Address, AccessTraceForAddress>;
-  m_macroBlockAccessTrace = new Map<Address, AccessTraceForAddress>;
-  m_programCounterAccessTrace = new Map<Address, AccessTraceForAddress>;
-  m_retryProfileMap = new Map<Address, AccessTraceForAddress>;
-  m_num_of_sequencers = num_of_sequencers;
-  clearStats();
+    m_dataAccessTrace = new AddressMap;
+    m_macroBlockAccessTrace = new AddressMap;
+    m_programCounterAccessTrace = new AddressMap;
+    m_retryProfileMap = new AddressMap;
+    m_num_of_sequencers = num_of_sequencers;
+    clearStats();
 }
 
 AddressProfiler::~AddressProfiler()
 {
-  delete m_dataAccessTrace;
-  delete m_macroBlockAccessTrace;
-  delete m_programCounterAccessTrace;
-  delete m_retryProfileMap;
+    delete m_dataAccessTrace;
+    delete m_macroBlockAccessTrace;
+    delete m_programCounterAccessTrace;
+    delete m_retryProfileMap;
 }
 
-void AddressProfiler::setHotLines(bool hot_lines){
-  m_hot_lines = hot_lines;
-}
-void AddressProfiler::setAllInstructions(bool all_instructions){
-  m_all_instructions = all_instructions;
-}
-
-void AddressProfiler::printStats(ostream& out) const
+void
+AddressProfiler::setHotLines(bool hot_lines)
 {
-  if (m_hot_lines) {
-    out << endl;
-    out << "AddressProfiler Stats" << endl;
-    out << "---------------------" << endl;
-
-    out << endl;
-    out << "sharing_misses: " << m_sharing_miss_counter << endl;
-    out << "getx_sharing_histogram: " << m_getx_sharing_histogram << endl;
-    out << "gets_sharing_histogram: " << m_gets_sharing_histogram << endl;
-
-    out << endl;
-    out << "Hot Data Blocks" << endl;
-    out << "---------------" << endl;
-    out << endl;
-    printSorted(out, m_num_of_sequencers, m_dataAccessTrace, "block_address");
-
-    out << endl;
-    out << "Hot MacroData Blocks" << endl;
-    out << "--------------------" << endl;
-    out << endl;
-    printSorted(out, m_num_of_sequencers, m_macroBlockAccessTrace, "macroblock_address");
-
-    out << "Hot Instructions" << endl;
-    out << "----------------" << endl;
-    out << endl;
-    printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, "pc_address");
-  }
-
-  if (m_all_instructions){
-    out << endl;
-    out << "All Instructions Profile:" << endl;
-    out << "-------------------------" << endl;
-    out << endl;
-    printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, "pc_address");
-    out << endl;
-  }
-
-  if (m_retryProfileHisto.size() > 0) {
-    out << "Retry Profile" << endl;
-    out << "-------------" << endl;
-    out << endl;
-    out << "retry_histogram_absolute: " << m_retryProfileHisto << endl;
-    out << "retry_histogram_write: " << m_retryProfileHistoWrite << endl;
-    out << "retry_histogram_read: " << m_retryProfileHistoRead << endl;
-
-    out << "retry_histogram_percent: ";
-    m_retryProfileHisto.printPercent(out);
-    out << endl;
-
-    printSorted(out, m_num_of_sequencers, m_retryProfileMap, "block_address");
-    out << endl;
-  }
-
+    m_hot_lines = hot_lines;
 }
 
-void AddressProfiler::clearStats()
+void
+AddressProfiler::setAllInstructions(bool all_instructions)
 {
-  // Clear the maps
-  m_sharing_miss_counter = 0;
-  m_dataAccessTrace->clear();
-  m_macroBlockAccessTrace->clear();
-  m_programCounterAccessTrace->clear();
-  m_retryProfileMap->clear();
-  m_retryProfileHisto.clear();
-  m_retryProfileHistoRead.clear();
-  m_retryProfileHistoWrite.clear();
-  m_getx_sharing_histogram.clear();
-  m_gets_sharing_histogram.clear();
+    m_all_instructions = all_instructions;
 }
 
-void AddressProfiler::profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor)
+void
+AddressProfiler::printStats(ostream& out) const
 {
-  Set indirection_set;
-  indirection_set.addSet(sharers);
-  indirection_set.addSet(owner);
-  indirection_set.remove(requestor);
-  int num_indirections = indirection_set.count();
+    if (m_hot_lines) {
+        out << endl;
+        out << "AddressProfiler Stats" << endl;
+        out << "---------------------" << endl;
 
-  m_getx_sharing_histogram.add(num_indirections);
-  bool indirection_miss = (num_indirections > 0);
+        out << endl;
+        out << "sharing_misses: " << m_sharing_miss_counter << endl;
+        out << "getx_sharing_histogram: " << m_getx_sharing_histogram << endl;
+        out << "gets_sharing_histogram: " << m_gets_sharing_histogram << endl;
 
-  addTraceSample(datablock, PC, CacheRequestType_ST, AccessModeType(0), requestor, indirection_miss);
-}
+        out << endl;
+        out << "Hot Data Blocks" << endl;
+        out << "---------------" << endl;
+        out << endl;
+        printSorted(out, m_num_of_sequencers, m_dataAccessTrace,
+                    "block_address");
 
-void AddressProfiler::profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor)
-{
-  Set indirection_set;
-  indirection_set.addSet(owner);
-  indirection_set.remove(requestor);
-  int num_indirections = indirection_set.count();
+        out << endl;
+        out << "Hot MacroData Blocks" << endl;
+        out << "--------------------" << endl;
+        out << endl;
+        printSorted(out, m_num_of_sequencers, m_macroBlockAccessTrace,
+                    "macroblock_address");
 
-  m_gets_sharing_histogram.add(num_indirections);
-  bool indirection_miss = (num_indirections > 0);
-
-  addTraceSample(datablock, PC, CacheRequestType_LD, AccessModeType(0), requestor, indirection_miss);
-}
-
-void AddressProfiler::addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss)
-{
-  if (m_all_instructions) {
-    if (sharing_miss) {
-      m_sharing_miss_counter++;
+        out << "Hot Instructions" << endl;
+        out << "----------------" << endl;
+        out << endl;
+        printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace,
+                    "pc_address");
     }
 
-    // record data address trace info
-    data_addr.makeLineAddress();
-    lookupTraceForAddress(data_addr, m_dataAccessTrace).update(type, access_mode, id, sharing_miss);
+    if (m_all_instructions) {
+        out << endl;
+        out << "All Instructions Profile:" << endl;
+        out << "-------------------------" << endl;
+        out << endl;
+        printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace,
+                    "pc_address");
+        out << endl;
+    }
 
-    // record macro data address trace info
-    Address macro_addr(data_addr.maskLowOrderBits(10)); // 6 for datablock, 4 to make it 16x more coarse
-    lookupTraceForAddress(macro_addr, m_macroBlockAccessTrace).update(type, access_mode, id, sharing_miss);
+    if (m_retryProfileHisto.size() > 0) {
+        out << "Retry Profile" << endl;
+        out << "-------------" << endl;
+        out << endl;
+        out << "retry_histogram_absolute: " << m_retryProfileHisto << endl;
+        out << "retry_histogram_write: " << m_retryProfileHistoWrite << endl;
+        out << "retry_histogram_read: " << m_retryProfileHistoRead << endl;
 
-    // record program counter address trace info
-    lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).update(type, access_mode, id, sharing_miss);
-  }
+        out << "retry_histogram_percent: ";
+        m_retryProfileHisto.printPercent(out);
+        out << endl;
 
-  if (m_all_instructions) {
-    // This code is used if the address profiler is an all-instructions profiler
-    // record program counter address trace info
-    lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).update(type, access_mode, id, sharing_miss);
-  }
+        printSorted(out, m_num_of_sequencers, m_retryProfileMap,
+                    "block_address");
+        out << endl;
+    }
 }
 
-void AddressProfiler::profileRetry(const Address& data_addr, AccessType type, int count)
+void
+AddressProfiler::clearStats()
 {
-  m_retryProfileHisto.add(count);
-  if (type == AccessType_Read) {
-    m_retryProfileHistoRead.add(count);
-  } else {
-    m_retryProfileHistoWrite.add(count);
-  }
-  if (count > 1) {
-    lookupTraceForAddress(data_addr, m_retryProfileMap).addSample(count);
-  }
+    // Clear the maps
+    m_sharing_miss_counter = 0;
+    m_dataAccessTrace->clear();
+    m_macroBlockAccessTrace->clear();
+    m_programCounterAccessTrace->clear();
+    m_retryProfileMap->clear();
+    m_retryProfileHisto.clear();
+    m_retryProfileHistoRead.clear();
+    m_retryProfileHistoWrite.clear();
+    m_getx_sharing_histogram.clear();
+    m_gets_sharing_histogram.clear();
 }
 
-// ***** Normal Functions ******
-
-static void printSorted(ostream& out, 
-                        int num_of_sequencers,
-                        const Map<Address, AccessTraceForAddress>* record_map, 
-                        string description)
+void
+AddressProfiler::profileGetX(const Address& datablock, const Address& PC,
+                             const Set& owner, const Set& sharers,
+                             NodeID requestor)
 {
-  const int records_printed = 100;
+    Set indirection_set;
+    indirection_set.addSet(sharers);
+    indirection_set.addSet(owner);
+    indirection_set.remove(requestor);
+    int num_indirections = indirection_set.count();
 
-  uint64 misses = 0;
-  PrioHeap<AccessTraceForAddress*> heap;
-  Vector<Address> keys = record_map->keys();
-  for(int i=0; i<keys.size(); i++){
-    AccessTraceForAddress* record = &(record_map->lookup(keys[i]));
-    misses += record->getTotal();
-    heap.insert(record);
-  }
+    m_getx_sharing_histogram.add(num_indirections);
+    bool indirection_miss = (num_indirections > 0);
 
-  out << "Total_entries_" << description << ": " << keys.size() << endl;
-  if (g_system_ptr->getProfiler()->getAllInstructions())
-    out << "Total_Instructions_" << description << ": " << misses << endl;
-  else
-    out << "Total_data_misses_" << description << ": " << misses << endl;
-
-  out << "total | load store atomic | user supervisor | sharing | touched-by" << endl;
-
-  Histogram remaining_records(1, 100);
-  Histogram all_records(1, 100);
-  Histogram remaining_records_log(-1);
-  Histogram all_records_log(-1);
-
-  // Allows us to track how many lines where touched by n processors
-  Vector<int64> m_touched_vec;
-  Vector<int64> m_touched_weighted_vec;
-  m_touched_vec.setSize(num_of_sequencers+1);
-  m_touched_weighted_vec.setSize(num_of_sequencers+1);
-  for (int i=0; i<m_touched_vec.size(); i++) {
-    m_touched_vec[i] = 0;
-    m_touched_weighted_vec[i] = 0;
-  }
-
-  int counter = 0;
-  while((heap.size() > 0) && (counter < records_printed)) {
-    AccessTraceForAddress* record = heap.extractMin();
-    double percent = 100.0*(record->getTotal()/double(misses));
-    out << description << " | " << percent << " % " << *record << endl;
-    all_records.add(record->getTotal());
-    all_records_log.add(record->getTotal());
-    counter++;
-    m_touched_vec[record->getTouchedBy()]++;
-    m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal();
-  }
-
-  while(heap.size() > 0) {
-    AccessTraceForAddress* record = heap.extractMin();
-    all_records.add(record->getTotal());
-    remaining_records.add(record->getTotal());
-    all_records_log.add(record->getTotal());
-    remaining_records_log.add(record->getTotal());
-    m_touched_vec[record->getTouchedBy()]++;
-    m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal();
-  }
-  out << endl;
-  out << "all_records_" << description << ": " << all_records << endl;
-  out << "all_records_log_" << description << ": " << all_records_log << endl;
-  out << "remaining_records_" << description << ": " << remaining_records << endl;
-  out << "remaining_records_log_" << description << ": " << remaining_records_log << endl;
-  out << "touched_by_" << description << ": " << m_touched_vec << endl;
-  out << "touched_by_weighted_" << description << ": " << m_touched_weighted_vec << endl;
-  out << endl;
+    addTraceSample(datablock, PC, CacheRequestType_ST, AccessModeType(0),
+                   requestor, indirection_miss);
 }
 
-static AccessTraceForAddress& lookupTraceForAddress(const Address& addr, Map<Address, AccessTraceForAddress>* record_map)
+void
+AddressProfiler::profileGetS(const Address& datablock, const Address& PC,
+                             const Set& owner, const Set& sharers,
+                             NodeID requestor)
 {
-  if(record_map->exist(addr) == false){
-    record_map->add(addr, AccessTraceForAddress(addr));
-  }
-  return record_map->lookup(addr);
+    Set indirection_set;
+    indirection_set.addSet(owner);
+    indirection_set.remove(requestor);
+    int num_indirections = indirection_set.count();
+
+    m_gets_sharing_histogram.add(num_indirections);
+    bool indirection_miss = (num_indirections > 0);
+
+    addTraceSample(datablock, PC, CacheRequestType_LD, AccessModeType(0),
+                   requestor, indirection_miss);
+}
+
+void
+AddressProfiler::addTraceSample(Address data_addr, Address pc_addr,
+                                CacheRequestType type,
+                                AccessModeType access_mode, NodeID id,
+                                bool sharing_miss)
+{
+    if (m_all_instructions) {
+        if (sharing_miss) {
+            m_sharing_miss_counter++;
+        }
+
+        // record data address trace info
+        data_addr.makeLineAddress();
+        lookupTraceForAddress(data_addr, m_dataAccessTrace).
+            update(type, access_mode, id, sharing_miss);
+
+        // record macro data address trace info
+
+        // 6 for datablock, 4 to make it 16x more coarse
+        Address macro_addr(data_addr.maskLowOrderBits(10));
+        lookupTraceForAddress(macro_addr, m_macroBlockAccessTrace).
+            update(type, access_mode, id, sharing_miss);
+
+        // record program counter address trace info
+        lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).
+            update(type, access_mode, id, sharing_miss);
+    }
+
+    if (m_all_instructions) {
+        // This code is used if the address profiler is an
+        // all-instructions profiler record program counter address
+        // trace info
+        lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).
+            update(type, access_mode, id, sharing_miss);
+    }
+}
+
+void
+AddressProfiler::profileRetry(const Address& data_addr, AccessType type,
+                              int count)
+{
+    m_retryProfileHisto.add(count);
+    if (type == AccessType_Read) {
+        m_retryProfileHistoRead.add(count);
+    } else {
+        m_retryProfileHistoWrite.add(count);
+    }
+    if (count > 1) {
+        lookupTraceForAddress(data_addr, m_retryProfileMap).addSample(count);
+    }
 }
diff --git a/src/mem/ruby/profiler/AddressProfiler.hh b/src/mem/ruby/profiler/AddressProfiler.hh
index 177aa56d6..76dac323f 100644
--- a/src/mem/ruby/profiler/AddressProfiler.hh
+++ b/src/mem/ruby/profiler/AddressProfiler.hh
@@ -1,4 +1,3 @@
-
 /*
  * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
  * All rights reserved.
@@ -27,89 +26,77 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/*
- * AddressProfiler.hh
- *
- * Description:
- *
- * $Id$
- *
- */
+#ifndef __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__
+#define __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__
 
-#ifndef ADDRESSPROFILER_H
-#define ADDRESSPROFILER_H
-
-#include "mem/ruby/common/Global.hh"
-#include "mem/ruby/system/NodeID.hh"
-#include "mem/ruby/common/Histogram.hh"
-#include "mem/ruby/common/Address.hh"
-#include "mem/protocol/CacheMsg.hh"
 #include "mem/protocol/AccessType.hh"
+#include "mem/protocol/CacheMsg.hh"
+#include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/Global.hh"
+#include "mem/ruby/common/Histogram.hh"
+#include "mem/ruby/system/NodeID.hh"
 
 class AccessTraceForAddress;
 class Set;
 template <class KEY_TYPE, class VALUE_TYPE> class Map;
 
-class AddressProfiler {
-public:
-  // Constructors
-  AddressProfiler(int num_of_sequencers);
+class AddressProfiler
+{
+  public:
+    typedef Map<Address, AccessTraceForAddress> AddressMap;
 
-  // Destructor
-  ~AddressProfiler();
+  public:
+    AddressProfiler(int num_of_sequencers);
+    ~AddressProfiler();
 
-  // Public Methods
-  void printStats(ostream& out) const;
-  void clearStats();
+    void printStats(ostream& out) const;
+    void clearStats();
 
-  void addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss);
-  void profileRetry(const Address& data_addr, AccessType type, int count);
-  void profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor);
-  void profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor);
+    void addTraceSample(Address data_addr, Address pc_addr,
+                        CacheRequestType type, AccessModeType access_mode,
+                        NodeID id, bool sharing_miss);
+    void profileRetry(const Address& data_addr, AccessType type, int count);
+    void profileGetX(const Address& datablock, const Address& PC,
+                     const Set& owner, const Set& sharers, NodeID requestor);
+    void profileGetS(const Address& datablock, const Address& PC,
+                     const Set& owner, const Set& sharers, NodeID requestor);
 
-  void print(ostream& out) const;
+    void print(ostream& out) const;
 
-  //added by SS
-  void setHotLines(bool hot_lines);
-  void setAllInstructions(bool all_instructions);
-private:
-  // Private Methods
+    //added by SS
+    void setHotLines(bool hot_lines);
+    void setAllInstructions(bool all_instructions);
 
-  // Private copy constructor and assignment operator
-  AddressProfiler(const AddressProfiler& obj);
-  AddressProfiler& operator=(const AddressProfiler& obj);
+  private:
+    // Private copy constructor and assignment operator
+    AddressProfiler(const AddressProfiler& obj);
+    AddressProfiler& operator=(const AddressProfiler& obj);
 
-  // Data Members (m_ prefix)
-  int64 m_sharing_miss_counter;
+    int64 m_sharing_miss_counter;
 
-  Map<Address, AccessTraceForAddress>* m_dataAccessTrace;
-  Map<Address, AccessTraceForAddress>* m_macroBlockAccessTrace;
-  Map<Address, AccessTraceForAddress>* m_programCounterAccessTrace;
-  Map<Address, AccessTraceForAddress>* m_retryProfileMap;
-  Histogram m_retryProfileHisto;
-  Histogram m_retryProfileHistoWrite;
-  Histogram m_retryProfileHistoRead;
-  Histogram m_getx_sharing_histogram;
-  Histogram m_gets_sharing_histogram;
-//added by SS
-  bool m_hot_lines;
-  bool m_all_instructions;
+    AddressMap* m_dataAccessTrace;
+    AddressMap* m_macroBlockAccessTrace;
+    AddressMap* m_programCounterAccessTrace;
+    AddressMap* m_retryProfileMap;
+    Histogram m_retryProfileHisto;
+    Histogram m_retryProfileHistoWrite;
+    Histogram m_retryProfileHistoRead;
+    Histogram m_getx_sharing_histogram;
+    Histogram m_gets_sharing_histogram;
 
-  int m_num_of_sequencers;
+    //added by SS
+    bool m_hot_lines;
+    bool m_all_instructions;
+
+    int m_num_of_sequencers;
 };
 
-// Output operator declaration
-ostream& operator<<(ostream& out, const AddressProfiler& obj);
-
-// ******************* Definitions *******************
-
-// Output operator definition
-extern inline
-ostream& operator<<(ostream& out, const AddressProfiler& obj)
+inline ostream&
+operator<<(ostream& out, const AddressProfiler& obj)
 {
-  obj.print(out);
-  out << flush;
-  return out;
+    obj.print(out);
+    out << flush;
+    return out;
 }
 
-#endif //ADDRESSPROFILER_H
+#endif // __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__
diff --git a/src/mem/ruby/profiler/CacheProfiler.cc b/src/mem/ruby/profiler/CacheProfiler.cc
index 50581fcf9..9d12a46ab 100644
--- a/src/mem/ruby/profiler/CacheProfiler.cc
+++ b/src/mem/ruby/profiler/CacheProfiler.cc
@@ -1,4 +1,3 @@
-
 /*
  * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
  * All rights reserved.
@@ -27,111 +26,113 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/*
- * CacheProfiler.C
- *
- * Description: See CacheProfiler.hh
- *
- * $Id$
- *
- */
-
-#include "mem/ruby/profiler/CacheProfiler.hh"
-#include "mem/ruby/profiler/AccessTraceForAddress.hh"
 #include "mem/gems_common/PrioHeap.hh"
-#include "mem/ruby/system/System.hh"
-#include "mem/ruby/profiler/Profiler.hh"
 #include "mem/gems_common/Vector.hh"
+#include "mem/ruby/profiler/AccessTraceForAddress.hh"
+#include "mem/ruby/profiler/CacheProfiler.hh"
+#include "mem/ruby/profiler/Profiler.hh"
+#include "mem/ruby/system/System.hh"
 
 CacheProfiler::CacheProfiler(const string& description)
 {
-  m_description = description;
-  m_requestTypeVec_ptr = new Vector<int>;
-  m_requestTypeVec_ptr->setSize(int(CacheRequestType_NUM));
+    m_description = description;
+    m_requestTypeVec_ptr = new Vector<int>;
+    m_requestTypeVec_ptr->setSize(int(CacheRequestType_NUM));
 
-  clearStats();
+    clearStats();
 }
 
 CacheProfiler::~CacheProfiler()
 {
-  delete m_requestTypeVec_ptr;
+    delete m_requestTypeVec_ptr;
 }
 
-void CacheProfiler::printStats(ostream& out) const
+void
+CacheProfiler::printStats(ostream& out) const
 {
-  out << "Cache Stats: " << m_description << endl;
-  string description = "  " + m_description;
-
-  out << description << "_total_misses: " << m_misses << endl;
-  out << description << "_total_demand_misses: " << m_demand_misses << endl;
-  out << description << "_total_prefetches: " << m_prefetches << endl;
-  out << description << "_total_sw_prefetches: " << m_sw_prefetches << endl;
-  out << description << "_total_hw_prefetches: " << m_hw_prefetches << endl;
-  out << endl;
-
-  int requests = 0;
-
-  for(int i=0; i<int(CacheRequestType_NUM); i++) {
-    requests += m_requestTypeVec_ptr->ref(i);
-  }
-
-  assert(m_misses == requests);
-
-  if (requests > 0) {
-    for(int i=0; i<int(CacheRequestType_NUM); i++){
-      if (m_requestTypeVec_ptr->ref(i) > 0) {
-        out << description << "_request_type_" << CacheRequestType_to_string(CacheRequestType(i)) << ":   "
-            << (100.0 * double((m_requestTypeVec_ptr->ref(i)))) / double(requests)
-            << "%" << endl;
-      }
-    }
+    out << "Cache Stats: " << m_description << endl;
+    string description = "  " + m_description;
 
+    out << description << "_total_misses: " << m_misses << endl;
+    out << description << "_total_demand_misses: " << m_demand_misses << endl;
+    out << description << "_total_prefetches: " << m_prefetches << endl;
+    out << description << "_total_sw_prefetches: " << m_sw_prefetches << endl;
+    out << description << "_total_hw_prefetches: " << m_hw_prefetches << endl;
     out << endl;
 
-    for(int i=0; i<AccessModeType_NUM; i++){
-      if (m_accessModeTypeHistogram[i] > 0) {
-        out << description << "_access_mode_type_" << (AccessModeType) i << ":   " << m_accessModeTypeHistogram[i]
-            << "    " << (100.0 * m_accessModeTypeHistogram[i]) / requests << "%" << endl;
-      }
+    int requests = 0;
+
+    for (int i = 0; i < int(CacheRequestType_NUM); i++) {
+        requests += m_requestTypeVec_ptr->ref(i);
     }
-  }
 
-  out << description << "_request_size: " << m_requestSize << endl;
-  out << endl;
+    assert(m_misses == requests);
 
+    if (requests > 0) {
+        for (int i = 0; i < int(CacheRequestType_NUM); i++) {
+            if (m_requestTypeVec_ptr->ref(i) > 0) {
+                out << description << "_request_type_"
+                    << CacheRequestType_to_string(CacheRequestType(i))
+                    << ":   "
+                    << 100.0 * (double)m_requestTypeVec_ptr->ref(i) /
+                    (double)requests
+                    << "%" << endl;
+            }
+        }
+
+        out << endl;
+
+        for (int i = 0; i < AccessModeType_NUM; i++){
+            if (m_accessModeTypeHistogram[i] > 0) {
+                out << description << "_access_mode_type_"
+                    << (AccessModeType) i << ":   "
+                    << m_accessModeTypeHistogram[i] << "    "
+                    << 100.0 * m_accessModeTypeHistogram[i] / requests
+                    << "%" << endl;
+            }
+        }
+    }
+
+    out << description << "_request_size: " << m_requestSize << endl;
+    out << endl;
 }
 
-void CacheProfiler::clearStats()
+void
+CacheProfiler::clearStats()
 {
-  for(int i=0; i<int(CacheRequestType_NUM); i++) {
-    m_requestTypeVec_ptr->ref(i) = 0;
-  }
-  m_requestSize.clear();
-  m_misses = 0;
-  m_demand_misses = 0;
-  m_prefetches = 0;
-  m_sw_prefetches = 0;
-  m_hw_prefetches = 0;
-  for(int i=0; i<AccessModeType_NUM; i++){
-    m_accessModeTypeHistogram[i] = 0;
-  }
+    for (int i = 0; i < int(CacheRequestType_NUM); i++) {
+        m_requestTypeVec_ptr->ref(i) = 0;
+    }
+    m_requestSize.clear();
+    m_misses = 0;
+    m_demand_misses = 0;
+    m_prefetches = 0;
+    m_sw_prefetches = 0;
+    m_hw_prefetches = 0;
+    for (int i = 0; i < AccessModeType_NUM; i++) {
+        m_accessModeTypeHistogram[i] = 0;
+    }
 }
 
-void CacheProfiler::addStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit)
+void
+CacheProfiler::addStatSample(CacheRequestType requestType,
+                             AccessModeType type, int msgSize,
+                             PrefetchBit pfBit)
 {
-  m_misses++;
+    m_misses++;
 
-  m_requestTypeVec_ptr->ref(requestType)++;
+    m_requestTypeVec_ptr->ref(requestType)++;
 
-  m_accessModeTypeHistogram[type]++;
-  m_requestSize.add(msgSize);
-  if (pfBit == PrefetchBit_No) {
-    m_demand_misses++;
-  } else if (pfBit == PrefetchBit_Yes) {
-    m_prefetches++;
-    m_sw_prefetches++;
-  } else { // must be L1_HW || L2_HW prefetch
-    m_prefetches++;
-    m_hw_prefetches++;
-  }
+    m_accessModeTypeHistogram[type]++;
+    m_requestSize.add(msgSize);
+    if (pfBit == PrefetchBit_No) {
+        m_demand_misses++;
+    } else if (pfBit == PrefetchBit_Yes) {
+        m_prefetches++;
+        m_sw_prefetches++;
+    } else {
+        // must be L1_HW || L2_HW prefetch
+        m_prefetches++;
+        m_hw_prefetches++;
+    }
 }
diff --git a/src/mem/ruby/profiler/CacheProfiler.hh b/src/mem/ruby/profiler/CacheProfiler.hh
index 11f189148..7dcdf57f0 100644
--- a/src/mem/ruby/profiler/CacheProfiler.hh
+++ b/src/mem/ruby/profiler/CacheProfiler.hh
@@ -1,4 +1,3 @@
-
 /*
  * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
  * All rights reserved.
@@ -27,77 +26,58 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/*
- * CacheProfiler.hh
- *
- * Description:
- *
- * $Id$
- *
- */
-
-#ifndef CACHEPROFILER_H
-#define CACHEPROFILER_H
+#ifndef __MEM_RUBY_PROFILER_CACHEPROFILER_HH__
+#define __MEM_RUBY_PROFILER_CACHEPROFILER_HH__
 
 #include <iostream>
 #include <string>
 
-#include "mem/ruby/common/Global.hh"
-#include "mem/ruby/system/NodeID.hh"
-#include "mem/ruby/common/Histogram.hh"
 #include "mem/protocol/AccessModeType.hh"
-#include "mem/protocol/PrefetchBit.hh"
 #include "mem/protocol/CacheRequestType.hh"
+#include "mem/protocol/PrefetchBit.hh"
+#include "mem/ruby/common/Global.hh"
+#include "mem/ruby/common/Histogram.hh"
+#include "mem/ruby/system/NodeID.hh"
 
 template <class TYPE> class Vector;
 
-class CacheProfiler {
-public:
-  // Constructors
-  CacheProfiler(const std::string& description);
+class CacheProfiler
+{
+  public:
+    CacheProfiler(const std::string& description);
+    ~CacheProfiler();
 
-  // Destructor
-  ~CacheProfiler();
+    void printStats(std::ostream& out) const;
+    void clearStats();
 
-  // Public Methods
-  void printStats(std::ostream& out) const;
-  void clearStats();
+    void addStatSample(CacheRequestType requestType, AccessModeType type,
+                       int msgSize, PrefetchBit pfBit);
 
-  void addStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit);
+    void print(std::ostream& out) const;
 
-  void print(std::ostream& out) const;
-private:
-  // Private Methods
+  private:
+    // Private copy constructor and assignment operator
+    CacheProfiler(const CacheProfiler& obj);
+    CacheProfiler& operator=(const CacheProfiler& obj);
 
-  // Private copy constructor and assignment operator
-  CacheProfiler(const CacheProfiler& obj);
-  CacheProfiler& operator=(const CacheProfiler& obj);
+    std::string m_description;
+    Histogram m_requestSize;
+    int64 m_misses;
+    int64 m_demand_misses;
+    int64 m_prefetches;
+    int64 m_sw_prefetches;
+    int64 m_hw_prefetches;
+    int64 m_accessModeTypeHistogram[AccessModeType_NUM];
 
-  // Data Members (m_ prefix)
-  std::string m_description;
-  Histogram m_requestSize;
-  int64 m_misses;
-  int64 m_demand_misses;
-  int64 m_prefetches;
-  int64 m_sw_prefetches;
-  int64 m_hw_prefetches;
-  int64 m_accessModeTypeHistogram[AccessModeType_NUM];
-
-  Vector < int >* m_requestTypeVec_ptr;
+    Vector <int>* m_requestTypeVec_ptr;
 };
 
-// Output operator declaration
-std::ostream& operator<<(std::ostream& out, const CacheProfiler& obj);
-
-// ******************* Definitions *******************
-
-// Output operator definition
-extern inline
-std::ostream& operator<<(std::ostream& out, const CacheProfiler& obj)
+inline std::ostream&
+operator<<(std::ostream& out, const CacheProfiler& obj)
 {
-  obj.print(out);
-  out << std::flush;
-  return out;
+    obj.print(out);
+    out << std::flush;
+    return out;
 }
 
-#endif //CACHEPROFILER_H
+#endif // __MEM_RUBY_PROFILER_CACHEPROFILER_HH__
diff --git a/src/mem/ruby/profiler/MemCntrlProfiler.cc b/src/mem/ruby/profiler/MemCntrlProfiler.cc
index b41d7de78..e25719666 100644
--- a/src/mem/ruby/profiler/MemCntrlProfiler.cc
+++ b/src/mem/ruby/profiler/MemCntrlProfiler.cc
@@ -1,4 +1,3 @@
-
 /*
  * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
  * All rights reserved.
@@ -32,19 +31,14 @@
 using namespace std;
 
 MemCntrlProfiler::MemCntrlProfiler(const string& description,
-                                   int banks_per_rank,
-                                   int ranks_per_dimm,
-                                   int dimms_per_channel)
+    int banks_per_rank, int ranks_per_dimm, int dimms_per_channel)
 {
     m_description = description;
     m_banks_per_rank = banks_per_rank;
     m_ranks_per_dimm = ranks_per_dimm;
     m_dimms_per_channel = dimms_per_channel;
 
-    int totalBanks = banks_per_rank * 
-                     ranks_per_dimm * 
-                     dimms_per_channel;
-
+    int totalBanks = banks_per_rank * ranks_per_dimm * dimms_per_channel;
     m_memBankCount.setSize(totalBanks);
 
     clearStats();
@@ -54,50 +48,65 @@ MemCntrlProfiler::~MemCntrlProfiler()
 {
 }
 
-void MemCntrlProfiler::printStats(ostream& out) const
+void
+MemCntrlProfiler::printStats(ostream& out) const
 {
-    if (m_memReq || m_memRefresh) {    // if there's a memory controller at all
-        uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles;
-        double stallsPerReq = total_stalls * 1.0 / m_memReq;
-        out << "Memory controller: " << m_description << ":" << endl;
-        out << "  memory_total_requests: " << m_memReq << endl;  // does not include refreshes
-        out << "  memory_reads: " << m_memRead << endl;
-        out << "  memory_writes: " << m_memWrite << endl;
-        out << "  memory_refreshes: " << m_memRefresh << endl;
-        out << "  memory_total_request_delays: " << total_stalls << endl;
-        out << "  memory_delays_per_request: " << stallsPerReq << endl;
-        out << "  memory_delays_in_input_queue: " << m_memInputQ << endl;
-        out << "  memory_delays_behind_head_of_bank_queue: " << m_memBankQ << endl;
-        out << "  memory_delays_stalled_at_head_of_bank_queue: " << m_memWaitCycles << endl;
-        // Note:  The following "memory stalls" entries are a breakdown of the
-        // cycles which already showed up in m_memWaitCycles.  The order is
-        // significant; it is the priority of attributing the cycles.
-        // For example, bank_busy is before arbitration because if the bank was
-        // busy, we didn't even check arbitration.
-        // Note:  "not old enough" means that since we grouped waiting heads-of-queues
-        // into batches to avoid starvation, a request in a newer batch
-        // didn't try to arbitrate yet because there are older requests waiting.
-        out << "  memory_stalls_for_bank_busy: " << m_memBankBusy << endl;
-        out << "  memory_stalls_for_random_busy: " << m_memRandBusy << endl;
-        out << "  memory_stalls_for_anti_starvation: " << m_memNotOld << endl;
-        out << "  memory_stalls_for_arbitration: " << m_memArbWait << endl;
-        out << "  memory_stalls_for_bus: " << m_memBusBusy << endl;
-        out << "  memory_stalls_for_tfaw: " << m_memTfawBusy << endl;
-        out << "  memory_stalls_for_read_write_turnaround: " << m_memReadWriteBusy << endl;
-        out << "  memory_stalls_for_read_read_turnaround: " << m_memDataBusBusy << endl;
-        out << "  accesses_per_bank: ";
-        for (int bank=0; bank < m_memBankCount.size(); bank++) {
-            out << m_memBankCount[bank] << "  ";
-        }
-    }  else {
+    if (!m_memReq && !m_memRefresh) {
         out << "Memory Controller: " << m_description
-            << " no stats recorded." << endl;
-    }    
+            << " no stats recorded." << endl
+            << endl
+            << endl;
+        return;
+    }
+
+    // if there's a memory controller at all
+    uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles;
+    double stallsPerReq = total_stalls * 1.0 / m_memReq;
+    out << "Memory controller: " << m_description << ":" << endl;
+
+    // does not include refreshes
+    out << "  memory_total_requests: " << m_memReq << endl;
+    out << "  memory_reads: " << m_memRead << endl;
+    out << "  memory_writes: " << m_memWrite << endl;
+    out << "  memory_refreshes: " << m_memRefresh << endl;
+    out << "  memory_total_request_delays: " << total_stalls << endl;
+    out << "  memory_delays_per_request: " << stallsPerReq << endl;
+    out << "  memory_delays_in_input_queue: " << m_memInputQ << endl;
+    out << "  memory_delays_behind_head_of_bank_queue: "
+        << m_memBankQ << endl;
+    out << "  memory_delays_stalled_at_head_of_bank_queue: "
+        << m_memWaitCycles << endl;
+
+    // Note: The following "memory stalls" entries are a breakdown of
+    // the cycles which already showed up in m_memWaitCycles.  The
+    // order is significant; it is the priority of attributing the
+    // cycles.  For example, bank_busy is before arbitration because
+    // if the bank was busy, we didn't even check arbitration.
+    // Note: "not old enough" means that since we grouped waiting
+    // heads-of-queues into batches to avoid starvation, a request in
+    // a newer batch didn't try to arbitrate yet because there are
+    // older requests waiting.
+    out << "  memory_stalls_for_bank_busy: " << m_memBankBusy << endl;
+    out << "  memory_stalls_for_random_busy: " << m_memRandBusy << endl;
+    out << "  memory_stalls_for_anti_starvation: " << m_memNotOld << endl;
+    out << "  memory_stalls_for_arbitration: " << m_memArbWait << endl;
+    out << "  memory_stalls_for_bus: " << m_memBusBusy << endl;
+    out << "  memory_stalls_for_tfaw: " << m_memTfawBusy << endl;
+    out << "  memory_stalls_for_read_write_turnaround: "
+        << m_memReadWriteBusy << endl;
+    out << "  memory_stalls_for_read_read_turnaround: "
+        << m_memDataBusBusy << endl;
+    out << "  accesses_per_bank: ";
+
+    for (int bank = 0; bank < m_memBankCount.size(); bank++) {
+        out << m_memBankCount[bank] << "  ";
+    }
     out << endl;
     out << endl;
 }
 
-void MemCntrlProfiler::clearStats()
+void
+MemCntrlProfiler::clearStats()
 {
     m_memReq = 0;
     m_memBankBusy = 0;
@@ -115,72 +124,100 @@ void MemCntrlProfiler::clearStats()
     m_memRandBusy = 0;
     m_memNotOld = 0;
 
-    for (int bank=0; 
-         bank < m_memBankCount.size(); 
-         bank++) {
+    for (int bank = 0; bank < m_memBankCount.size(); bank++) {
         m_memBankCount[bank] = 0;
     }
 }
 
-void MemCntrlProfiler::profileMemReq(int bank) {
-  m_memReq++;
-  m_memBankCount[bank]++;
+void
+MemCntrlProfiler::profileMemReq(int bank)
+{
+    m_memReq++;
+    m_memBankCount[bank]++;
 }
 
-void MemCntrlProfiler::profileMemBankBusy() {    
-  m_memBankBusy++; 
+void
+MemCntrlProfiler::profileMemBankBusy()
+{
+    m_memBankBusy++;
 }
 
-void MemCntrlProfiler::profileMemBusBusy() {    
-  m_memBusBusy++; 
+void
+MemCntrlProfiler::profileMemBusBusy()
+{
+    m_memBusBusy++;
 }
 
-void MemCntrlProfiler::profileMemReadWriteBusy() {    
-  m_memReadWriteBusy++; 
+void
+MemCntrlProfiler::profileMemReadWriteBusy()
+{
+    m_memReadWriteBusy++;
 }
 
-void MemCntrlProfiler::profileMemDataBusBusy() {    
-  m_memDataBusBusy++; 
+void
+MemCntrlProfiler::profileMemDataBusBusy()
+{
+    m_memDataBusBusy++;
 }
 
-void MemCntrlProfiler::profileMemTfawBusy() {    
-  m_memTfawBusy++; 
+void
+MemCntrlProfiler::profileMemTfawBusy()
+{
+    m_memTfawBusy++;
 }
 
-void MemCntrlProfiler::profileMemRefresh() {    
-  m_memRefresh++; 
+void
+MemCntrlProfiler::profileMemRefresh()
+{
+    m_memRefresh++;
 }
 
-void MemCntrlProfiler::profileMemRead() {    
-  m_memRead++; 
+void
+MemCntrlProfiler::profileMemRead()
+{
+    m_memRead++;
 }
 
-void MemCntrlProfiler::profileMemWrite() {    
-  m_memWrite++; 
+void
+MemCntrlProfiler::profileMemWrite()
+{
+    m_memWrite++;
 }
 
-void MemCntrlProfiler::profileMemWaitCycles(int cycles) {
-  m_memWaitCycles += cycles; 
+void
+MemCntrlProfiler::profileMemWaitCycles(int cycles)
+{
+    m_memWaitCycles += cycles;
 }
 
-void MemCntrlProfiler::profileMemInputQ(int cycles) {    
-  m_memInputQ += cycles; 
+void
+MemCntrlProfiler::profileMemInputQ(int cycles)
+{
+    m_memInputQ += cycles;
 }
 
-void MemCntrlProfiler::profileMemBankQ(int cycles) {    
-  m_memBankQ += cycles; 
+void
+MemCntrlProfiler::profileMemBankQ(int cycles)
+{
+    m_memBankQ += cycles;
 }
 
-void MemCntrlProfiler::profileMemArbWait(int cycles) {    
-  m_memArbWait += cycles; 
+void
+MemCntrlProfiler::profileMemArbWait(int cycles)
+{
+    m_memArbWait += cycles;
 }
 
-void MemCntrlProfiler::profileMemRandBusy() {    
-  m_memRandBusy++; 
+void
+MemCntrlProfiler::profileMemRandBusy()
+{
+    m_memRandBusy++;
 }
 
-void MemCntrlProfiler::profileMemNotOld() {    
-  m_memNotOld++; 
+void
+MemCntrlProfiler::profileMemNotOld()
+{
+    m_memNotOld++;
 }
 
 
diff --git a/src/mem/ruby/profiler/MemCntrlProfiler.hh b/src/mem/ruby/profiler/MemCntrlProfiler.hh
index ebedd5185..85c39e0ad 100644
--- a/src/mem/ruby/profiler/MemCntrlProfiler.hh
+++ b/src/mem/ruby/profiler/MemCntrlProfiler.hh
@@ -1,4 +1,3 @@
-
 /*
  * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
  * All rights reserved.
@@ -27,17 +26,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/*
- * MemCntrlProfiler.hh
- *
- * Description:
- *
- * $Id$
- *
- */
-
-#ifndef MEM_CNTRL_PROFILER_H
-#define MEM_CNTRL_PROFILER_H
+#ifndef __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__
+#define __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__
 
 #include <iostream>
 #include <string>
@@ -47,80 +37,67 @@
 
 template <class TYPE> class Vector;
 
-class MemCntrlProfiler {
-public:
-  // Constructors
- MemCntrlProfiler(const std::string& description,
-                   int banks_per_rank,
-                   int ranks_per_dimm,
-                   int dimms_per_channel);
+class MemCntrlProfiler
+{
+  public:
+    MemCntrlProfiler(const std::string& description, int banks_per_rank,
+                     int ranks_per_dimm, int dimms_per_channel);
+    ~MemCntrlProfiler();
 
-  // Destructor
-  ~MemCntrlProfiler();
+    void printStats(std::ostream& out) const;
+    void clearStats();
 
-  // Public Methods
-  void printStats(std::ostream& out) const;
-  void clearStats();
+    void profileMemReq(int bank);
+    void profileMemBankBusy();
+    void profileMemBusBusy();
+    void profileMemTfawBusy();
+    void profileMemReadWriteBusy();
+    void profileMemDataBusBusy();
+    void profileMemRefresh();
+    void profileMemRead();
+    void profileMemWrite();
+    void profileMemWaitCycles(int cycles);
+    void profileMemInputQ(int cycles);
+    void profileMemBankQ(int cycles);
+    void profileMemArbWait(int cycles);
+    void profileMemRandBusy();
+    void profileMemNotOld();
 
-  void profileMemReq(int bank);
-  void profileMemBankBusy();
-  void profileMemBusBusy();
-  void profileMemTfawBusy();
-  void profileMemReadWriteBusy();
-  void profileMemDataBusBusy();
-  void profileMemRefresh();
-  void profileMemRead();
-  void profileMemWrite();
-  void profileMemWaitCycles(int cycles);
-  void profileMemInputQ(int cycles);
-  void profileMemBankQ(int cycles);
-  void profileMemArbWait(int cycles);
-  void profileMemRandBusy();
-  void profileMemNotOld();
+    void print(std::ostream& out) const;
 
-  void print(std::ostream& out) const;
 private:
-  // Private Methods
+    // Private copy constructor and assignment operator
+    MemCntrlProfiler(const MemCntrlProfiler& obj);
+    MemCntrlProfiler& operator=(const MemCntrlProfiler& obj);
 
-  // Private copy constructor and assignment operator
-  MemCntrlProfiler(const MemCntrlProfiler& obj);
-  MemCntrlProfiler& operator=(const MemCntrlProfiler& obj);
-
-  // Data Members (m_ prefix)
-  std::string m_description;
-  uint64 m_memReq;
-  uint64 m_memBankBusy;
-  uint64 m_memBusBusy;
-  uint64 m_memTfawBusy;
-  uint64 m_memReadWriteBusy;
-  uint64 m_memDataBusBusy;
-  uint64 m_memRefresh;
-  uint64 m_memRead;
-  uint64 m_memWrite;
-  uint64 m_memWaitCycles;
-  uint64 m_memInputQ;
-  uint64 m_memBankQ;
-  uint64 m_memArbWait;
-  uint64 m_memRandBusy;
-  uint64 m_memNotOld;
-  Vector<uint64> m_memBankCount;
-  int m_banks_per_rank;
-  int m_ranks_per_dimm;
-  int m_dimms_per_channel;
+    std::string m_description;
+    uint64 m_memReq;
+    uint64 m_memBankBusy;
+    uint64 m_memBusBusy;
+    uint64 m_memTfawBusy;
+    uint64 m_memReadWriteBusy;
+    uint64 m_memDataBusBusy;
+    uint64 m_memRefresh;
+    uint64 m_memRead;
+    uint64 m_memWrite;
+    uint64 m_memWaitCycles;
+    uint64 m_memInputQ;
+    uint64 m_memBankQ;
+    uint64 m_memArbWait;
+    uint64 m_memRandBusy;
+    uint64 m_memNotOld;
+    Vector<uint64> m_memBankCount;
+    int m_banks_per_rank;
+    int m_ranks_per_dimm;
+    int m_dimms_per_channel;
 };
 
-// Output operator declaration
-std::ostream& operator<<(std::ostream& out, const MemCntrlProfiler& obj);
-
-// ******************* Definitions *******************
-
-// Output operator definition
-extern inline
-std::ostream& operator<<(std::ostream& out, const MemCntrlProfiler& obj)
+inline std::ostream&
+operator<<(std::ostream& out, const MemCntrlProfiler& obj)
 {
-  obj.print(out);
-  out << std::flush;
-  return out;
+    obj.print(out);
+    out << std::flush;
+    return out;
 }
 
-#endif //MEM_CNTRL_PROFILER_H
+#endif // __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__
diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc
index 365f6cf42..2cc3eddfc 100644
--- a/src/mem/ruby/profiler/Profiler.cc
+++ b/src/mem/ruby/profiler/Profiler.cc
@@ -42,34 +42,24 @@
    ----------------------------------------------------------------------
 */
 
-/*
- * Profiler.cc
- *
- * Description: See Profiler.hh
- *
- * $Id$
- *
- */
-
 // Allows use of times() library call, which determines virtual runtime
 #include <sys/resource.h>
 #include <sys/times.h>
 
-#include "mem/ruby/profiler/Profiler.hh"
-#include "mem/ruby/profiler/AddressProfiler.hh"
-#include "mem/ruby/system/System.hh"
-#include "mem/ruby/network/Network.hh"
-#include "mem/gems_common/PrioHeap.hh"
-#include "mem/protocol/CacheMsg.hh"
-#include "mem/protocol/Protocol.hh"
-#include "mem/gems_common/util.hh"
 #include "mem/gems_common/Map.hh"
-#include "mem/ruby/common/Debug.hh"
+#include "mem/gems_common/PrioHeap.hh"
+#include "mem/gems_common/util.hh"
+#include "mem/protocol/CacheMsg.hh"
 #include "mem/protocol/MachineType.hh"
-
+#include "mem/protocol/Protocol.hh"
+#include "mem/ruby/common/Debug.hh"
+#include "mem/ruby/network/Network.hh"
+#include "mem/ruby/profiler/AddressProfiler.hh"
+#include "mem/ruby/profiler/Profiler.hh"
+#include "mem/ruby/system/System.hh"
 #include "mem/ruby/system/System.hh"
 
-extern std::ostream * debug_cout_ptr;
+extern std::ostream* debug_cout_ptr;
 
 static double process_memory_total();
 static double process_memory_resident();
@@ -77,570 +67,623 @@ static double process_memory_resident();
 Profiler::Profiler(const Params *p)
     : SimObject(p)
 {
-  m_requestProfileMap_ptr = new Map<string, int>;
+    m_requestProfileMap_ptr = new Map<string, int>;
 
-  m_inst_profiler_ptr = NULL;
-  m_address_profiler_ptr = NULL;
+    m_inst_profiler_ptr = NULL;
+    m_address_profiler_ptr = NULL;
 
-  m_real_time_start_time = time(NULL); // Not reset in clearStats()
-  m_stats_period = 1000000; // Default
-  m_periodic_output_file_ptr = &cerr;
+    m_real_time_start_time = time(NULL); // Not reset in clearStats()
+    m_stats_period = 1000000; // Default
+    m_periodic_output_file_ptr = &cerr;
 
-  m_hot_lines = p->hot_lines;
-  m_all_instructions = p->all_instructions;
+    m_hot_lines = p->hot_lines;
+    m_all_instructions = p->all_instructions;
 
-  m_num_of_sequencers = p->num_of_sequencers;
+    m_num_of_sequencers = p->num_of_sequencers;
 
-  m_hot_lines = false;
-  m_all_instructions = false;
+    m_hot_lines = false;
+    m_all_instructions = false;
 
-  m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
-  m_address_profiler_ptr -> setHotLines(m_hot_lines);
-  m_address_profiler_ptr -> setAllInstructions(m_all_instructions);
+    m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
+    m_address_profiler_ptr->setHotLines(m_hot_lines);
+    m_address_profiler_ptr->setAllInstructions(m_all_instructions);
 
-  if (m_all_instructions) {
-    m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
-    m_inst_profiler_ptr -> setHotLines(m_hot_lines);
-    m_inst_profiler_ptr -> setAllInstructions(m_all_instructions);
-  }
+    if (m_all_instructions) {
+        m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
+        m_inst_profiler_ptr->setHotLines(m_hot_lines);
+        m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
+    }
 }
 
 Profiler::~Profiler()
 {
-  if (m_periodic_output_file_ptr != &cerr) {
-    delete m_periodic_output_file_ptr;
-  }
+    if (m_periodic_output_file_ptr != &cerr) {
+        delete m_periodic_output_file_ptr;
+    }
 
-  delete m_requestProfileMap_ptr;
+    delete m_requestProfileMap_ptr;
 }
 
-void Profiler::wakeup()
+void
+Profiler::wakeup()
 {
-  // FIXME - avoid the repeated code
+    // FIXME - avoid the repeated code
 
-  Vector<integer_t> perProcCycleCount;
-  perProcCycleCount.setSize(m_num_of_sequencers);
+    Vector<integer_t> perProcCycleCount;
+    perProcCycleCount.setSize(m_num_of_sequencers);
 
-  for(int i=0; i < m_num_of_sequencers; i++) {
-    perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
-    // The +1 allows us to avoid division by zero
-  }
+    for (int i = 0; i < m_num_of_sequencers; i++) {
+        perProcCycleCount[i] =
+            g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
+        // The +1 allows us to avoid division by zero
+    }
 
-  (*m_periodic_output_file_ptr) << "ruby_cycles: " 
-                                << g_eventQueue_ptr->getTime()-m_ruby_start 
-                                << endl;
+    ostream &out = *m_periodic_output_file_ptr;
 
-  (*m_periodic_output_file_ptr) << "mbytes_resident: " 
-                                << process_memory_resident() 
-                                << endl;
+    out << "ruby_cycles: " << g_eventQueue_ptr->getTime()-m_ruby_start << endl
+        << "mbytes_resident: " << process_memory_resident() << endl
+        << "mbytes_total: " << process_memory_total() << endl;
 
-  (*m_periodic_output_file_ptr) << "mbytes_total: " 
-                                << process_memory_total() 
-                                << endl;
-
-  if (process_memory_total() > 0) {
-    (*m_periodic_output_file_ptr) << "resident_ratio: " 
-                          << process_memory_resident()/process_memory_total() 
-                          << endl;
-  }
-
-  (*m_periodic_output_file_ptr) << "miss_latency: " 
-                                << m_allMissLatencyHistogram 
-                                << endl;
-
-  *m_periodic_output_file_ptr << endl;
-
-  if (m_all_instructions) {
-    m_inst_profiler_ptr->printStats(*m_periodic_output_file_ptr);
-  }
-
-  //g_system_ptr->getNetwork()->printStats(*m_periodic_output_file_ptr);
-  g_eventQueue_ptr->scheduleEvent(this, m_stats_period);
-}
-
-void Profiler::setPeriodicStatsFile(const string& filename)
-{
-  cout << "Recording periodic statistics to file '" << filename << "' every "
-       << m_stats_period << " Ruby cycles" << endl;
-
-  if (m_periodic_output_file_ptr != &cerr) {
-    delete m_periodic_output_file_ptr;
-  }
-
-  m_periodic_output_file_ptr = new ofstream(filename.c_str());
-  g_eventQueue_ptr->scheduleEvent(this, 1);
-}
-
-void Profiler::setPeriodicStatsInterval(integer_t period)
-{
-  cout << "Recording periodic statistics every " << m_stats_period 
-       << " Ruby cycles" << endl;
-
-  m_stats_period = period;
-  g_eventQueue_ptr->scheduleEvent(this, 1);
-}
-
-void Profiler::printConfig(ostream& out) const
-{
-  out << endl;
-  out << "Profiler Configuration" << endl;
-  out << "----------------------" << endl;
-  out << "periodic_stats_period: " << m_stats_period << endl;
-}
-
-void Profiler::print(ostream& out) const
-{
-  out << "[Profiler]";
-}
-
-void Profiler::printStats(ostream& out, bool short_stats)
-{
-  out << endl;
-  if (short_stats) {
-    out << "SHORT ";
-  }
-  out << "Profiler Stats" << endl;
-  out << "--------------" << endl;
-
-  time_t real_time_current = time(NULL);
-  double seconds = difftime(real_time_current, m_real_time_start_time);
-  double minutes = seconds/60.0;
-  double hours = minutes/60.0;
-  double days = hours/24.0;
-  Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start;
-
-  if (!short_stats) {
-    out << "Elapsed_time_in_seconds: " << seconds << endl;
-    out << "Elapsed_time_in_minutes: " << minutes << endl;
-    out << "Elapsed_time_in_hours: " << hours << endl;
-    out << "Elapsed_time_in_days: " << days << endl;
-    out << endl;
-  }
-
-  // print the virtual runtimes as well
-  struct tms vtime;
-  times(&vtime);
-  seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0;
-  minutes = seconds / 60.0;
-  hours = minutes / 60.0;
-  days = hours / 24.0;
-  out << "Virtual_time_in_seconds: " << seconds << endl;
-  out << "Virtual_time_in_minutes: " << minutes << endl;
-  out << "Virtual_time_in_hours:   " << hours << endl;
-  out << "Virtual_time_in_days:    " << days << endl;
-  out << endl;
-
-  out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl;
-  out << "Ruby_start_time: " << m_ruby_start << endl;
-  out << "Ruby_cycles: " << ruby_cycles << endl;
-  out << endl;
-
-  if (!short_stats) {
-    out << "mbytes_resident: " << process_memory_resident() << endl;
-    out << "mbytes_total: " << process_memory_total() << endl;
     if (process_memory_total() > 0) {
-      out << "resident_ratio: " 
-          << process_memory_resident()/process_memory_total() << endl;
+        out << "resident_ratio: " 
+            << process_memory_resident() / process_memory_total() << endl;
     }
-    out << endl;
 
-  }
-
-  Vector<integer_t> perProcCycleCount;
-  perProcCycleCount.setSize(m_num_of_sequencers);
-
-  for(int i=0; i < m_num_of_sequencers; i++) {
-    perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
-    // The +1 allows us to avoid division by zero
-  }
-
-  out << "ruby_cycles_executed: " << perProcCycleCount << endl;
-
-  out << endl;
-
-  if (!short_stats) {
-    out << "Busy Controller Counts:" << endl;
-    for(int i=0; i < MachineType_NUM; i++) {
-      for(int j=0; j < MachineType_base_count((MachineType)i); j++) {
-        MachineID machID;
-        machID.type = (MachineType)i;
-        machID.num = j;
-        out << machID << ":" << m_busyControllerCount[i][j] << "  ";
-        if ((j+1)%8 == 0) {
-          out << endl;
-        }
-      }
-      out << endl;
-    }
-    out << endl;
-
-    out << "Busy Bank Count:" << m_busyBankCount << endl;
-    out << endl;
-
-    out << "sequencer_requests_outstanding: " << m_sequencer_requests << endl;
-    out << endl;
-  }
-
-  if (!short_stats) {
-    out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
-    out << "----------------------------------------" << endl;
     out << "miss_latency: " << m_allMissLatencyHistogram << endl;
-    for(int i=0; i<m_missLatencyHistograms.size(); i++) {
-      if (m_missLatencyHistograms[i].size() > 0) {
-        out << "miss_latency_" << RubyRequestType(i) << ": " << m_missLatencyHistograms[i] << endl;
-      }
-    }
-    for(int i=0; i<m_machLatencyHistograms.size(); i++) {
-      if (m_machLatencyHistograms[i].size() > 0) {
-        out << "miss_latency_" << GenericMachineType(i) << ": " << m_machLatencyHistograms[i] << endl;
-      }
-    }
 
     out << endl;
 
-    out << "All Non-Zero Cycle SW Prefetch Requests" << endl;
-    out << "------------------------------------" << endl;
-    out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl;
-    for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) {
-      if (m_SWPrefetchLatencyHistograms[i].size() > 0) {
-        out << "prefetch_latency_" << CacheRequestType(i) << ": " << m_SWPrefetchLatencyHistograms[i] << endl;
-      }
-    }
-    for(int i=0; i<m_SWPrefetchMachLatencyHistograms.size(); i++) {
-      if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) {
-        out << "prefetch_latency_" << GenericMachineType(i) << ": " << m_SWPrefetchMachLatencyHistograms[i] << endl;
-      }
-    }
-    out << "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram << endl;
-
-    if (m_all_sharing_histogram.size() > 0) {
-      out << "all_sharing: " << m_all_sharing_histogram << endl;
-      out << "read_sharing: " << m_read_sharing_histogram << endl;
-      out << "write_sharing: " << m_write_sharing_histogram << endl;
-
-      out << "all_sharing_percent: "; m_all_sharing_histogram.printPercent(out); out << endl;
-      out << "read_sharing_percent: "; m_read_sharing_histogram.printPercent(out); out << endl;
-      out << "write_sharing_percent: "; m_write_sharing_histogram.printPercent(out); out << endl;
-
-      int64 total_miss = m_cache_to_cache +  m_memory_to_cache;
-      out << "all_misses: " << total_miss << endl;
-      out << "cache_to_cache_misses: " << m_cache_to_cache << endl;
-      out << "memory_to_cache_misses: " << m_memory_to_cache << endl;
-      out << "cache_to_cache_percent: " << 100.0 * (double(m_cache_to_cache) / double(total_miss)) << endl;
-      out << "memory_to_cache_percent: " << 100.0 * (double(m_memory_to_cache) / double(total_miss)) << endl;
-      out << endl;
-    }
-
-    if (m_outstanding_requests.size() > 0) {
-      out << "outstanding_requests: "; m_outstanding_requests.printPercent(out); out << endl;
-      out << endl;
-    }
-  }
-
-  if (!short_stats) {
-    out << "Request vs. RubySystem State Profile" << endl;
-    out << "--------------------------------" << endl;
-    out << endl;
-
-    Vector<string> requestProfileKeys = m_requestProfileMap_ptr->keys();
-    requestProfileKeys.sortVector();
-
-    for(int i=0; i<requestProfileKeys.size(); i++) {
-      int temp_int = m_requestProfileMap_ptr->lookup(requestProfileKeys[i]);
-      double percent = (100.0*double(temp_int))/double(m_requests);
-      while (requestProfileKeys[i] != "") {
-        out << setw(10) << string_split(requestProfileKeys[i], ':');
-      }
-      out << setw(11) << temp_int;
-      out << setw(14) << percent << endl;
-    }
-    out << endl;
-
-    out << "filter_action: " << m_filter_action_histogram << endl;
-
-    if (!m_all_instructions) {
-      m_address_profiler_ptr->printStats(out);
-    }
-
     if (m_all_instructions) {
-      m_inst_profiler_ptr->printStats(out);
+        m_inst_profiler_ptr->printStats(out);
     }
 
+    //g_system_ptr->getNetwork()->printStats(out);
+    g_eventQueue_ptr->scheduleEvent(this, m_stats_period);
+}
+
+void
+Profiler::setPeriodicStatsFile(const string& filename)
+{
+    cout << "Recording periodic statistics to file '" << filename << "' every "
+         << m_stats_period << " Ruby cycles" << endl;
+
+    if (m_periodic_output_file_ptr != &cerr) {
+        delete m_periodic_output_file_ptr;
+    }
+
+    m_periodic_output_file_ptr = new ofstream(filename.c_str());
+    g_eventQueue_ptr->scheduleEvent(this, 1);
+}
+
+void
+Profiler::setPeriodicStatsInterval(integer_t period)
+{
+    cout << "Recording periodic statistics every " << m_stats_period 
+         << " Ruby cycles" << endl;
+
+    m_stats_period = period;
+    g_eventQueue_ptr->scheduleEvent(this, 1);
+}
+
+void
+Profiler::printConfig(ostream& out) const
+{
+    out << endl;
+    out << "Profiler Configuration" << endl;
+    out << "----------------------" << endl;
+    out << "periodic_stats_period: " << m_stats_period << endl;
+}
+
+void
+Profiler::print(ostream& out) const
+{
+    out << "[Profiler]";
+}
+
+void
+Profiler::printStats(ostream& out, bool short_stats)
+{
+    out << endl;
+    if (short_stats) {
+        out << "SHORT ";
+    }
+    out << "Profiler Stats" << endl;
+    out << "--------------" << endl;
+
+    time_t real_time_current = time(NULL);
+    double seconds = difftime(real_time_current, m_real_time_start_time);
+    double minutes = seconds / 60.0;
+    double hours = minutes / 60.0;
+    double days = hours / 24.0;
+    Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start;
+
+    if (!short_stats) {
+        out << "Elapsed_time_in_seconds: " << seconds << endl;
+        out << "Elapsed_time_in_minutes: " << minutes << endl;
+        out << "Elapsed_time_in_hours: " << hours << endl;
+        out << "Elapsed_time_in_days: " << days << endl;
+        out << endl;
+    }
+
+    // print the virtual runtimes as well
+    struct tms vtime;
+    times(&vtime);
+    seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0;
+    minutes = seconds / 60.0;
+    hours = minutes / 60.0;
+    days = hours / 24.0;
+    out << "Virtual_time_in_seconds: " << seconds << endl;
+    out << "Virtual_time_in_minutes: " << minutes << endl;
+    out << "Virtual_time_in_hours:   " << hours << endl;
+    out << "Virtual_time_in_days:    " << days << endl;
+    out << endl;
+
+    out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl;
+    out << "Ruby_start_time: " << m_ruby_start << endl;
+    out << "Ruby_cycles: " << ruby_cycles << endl;
+    out << endl;
+
+    if (!short_stats) {
+        out << "mbytes_resident: " << process_memory_resident() << endl;
+        out << "mbytes_total: " << process_memory_total() << endl;
+        if (process_memory_total() > 0) {
+            out << "resident_ratio: " 
+                << process_memory_resident()/process_memory_total() << endl;
+        }
+        out << endl;
+    }
+
+    Vector<integer_t> perProcCycleCount;
+    perProcCycleCount.setSize(m_num_of_sequencers);
+
+    for (int i = 0; i < m_num_of_sequencers; i++) {
+        perProcCycleCount[i] =
+            g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
+        // The +1 allows us to avoid division by zero
+    }
+
+    out << "ruby_cycles_executed: " << perProcCycleCount << endl;
+
     out << endl;
-    out << "Message Delayed Cycles" << endl;
-    out << "----------------------" << endl;
-    out << "Total_delay_cycles: " <<   m_delayedCyclesHistogram << endl;
-    out << "Total_nonPF_delay_cycles: " << m_delayedCyclesNonPFHistogram << endl;
-    for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) {
-      out << "  virtual_network_" << i << "_delay_cycles: " << m_delayedCyclesVCHistograms[i] << endl;
+
+    if (!short_stats) {
+        out << "Busy Controller Counts:" << endl;
+        for (int i = 0; i < MachineType_NUM; i++) {
+            int size = MachineType_base_count((MachineType)i);
+            for (int j = 0; j < size; j++) {
+                MachineID machID;
+                machID.type = (MachineType)i;
+                machID.num = j;
+                out << machID << ":" << m_busyControllerCount[i][j] << "  ";
+                if ((j + 1) % 8 == 0) {
+                    out << endl;
+                }
+            }
+            out << endl;
+        }
+        out << endl;
+
+        out << "Busy Bank Count:" << m_busyBankCount << endl;
+        out << endl;
+
+        out << "sequencer_requests_outstanding: "
+            << m_sequencer_requests << endl;
+        out << endl;
     }
 
-    printResourceUsage(out);
-  }
+    if (!short_stats) {
+        out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
+        out << "----------------------------------------" << endl;
+        out << "miss_latency: " << m_allMissLatencyHistogram << endl;
+        for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
+            if (m_missLatencyHistograms[i].size() > 0) {
+                out << "miss_latency_" << RubyRequestType(i) << ": "
+                    << m_missLatencyHistograms[i] << endl;
+            }
+        }
+        for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
+            if (m_machLatencyHistograms[i].size() > 0) {
+                out << "miss_latency_" << GenericMachineType(i) << ": "
+                    << m_machLatencyHistograms[i] << endl;
+            }
+        }
 
+        out << endl;
+
+        out << "All Non-Zero Cycle SW Prefetch Requests" << endl;
+        out << "------------------------------------" << endl;
+        out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl;
+        for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
+            if (m_SWPrefetchLatencyHistograms[i].size() > 0) {
+                out << "prefetch_latency_" << CacheRequestType(i) << ": "
+                    << m_SWPrefetchLatencyHistograms[i] << endl;
+            }
+        }
+        for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
+            if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) {
+                out << "prefetch_latency_" << GenericMachineType(i) << ": "
+                    << m_SWPrefetchMachLatencyHistograms[i] << endl;
+            }
+        }
+        out << "prefetch_latency_L2Miss:"
+            << m_SWPrefetchL2MissLatencyHistogram << endl;
+
+        if (m_all_sharing_histogram.size() > 0) {
+            out << "all_sharing: " << m_all_sharing_histogram << endl;
+            out << "read_sharing: " << m_read_sharing_histogram << endl;
+            out << "write_sharing: " << m_write_sharing_histogram << endl;
+
+            out << "all_sharing_percent: ";
+            m_all_sharing_histogram.printPercent(out);
+            out << endl;
+
+            out << "read_sharing_percent: ";
+            m_read_sharing_histogram.printPercent(out);
+            out << endl;
+
+            out << "write_sharing_percent: ";
+            m_write_sharing_histogram.printPercent(out);
+            out << endl;
+
+            int64 total_miss = m_cache_to_cache +  m_memory_to_cache;
+            out << "all_misses: " << total_miss << endl;
+            out << "cache_to_cache_misses: " << m_cache_to_cache << endl;
+            out << "memory_to_cache_misses: " << m_memory_to_cache << endl;
+            out << "cache_to_cache_percent: "
+                << 100.0 * (double(m_cache_to_cache) / double(total_miss))
+                << endl;
+            out << "memory_to_cache_percent: "
+                << 100.0 * (double(m_memory_to_cache) / double(total_miss))
+                << endl;
+            out << endl;
+        }
+
+        if (m_outstanding_requests.size() > 0) {
+            out << "outstanding_requests: ";
+            m_outstanding_requests.printPercent(out);
+            out << endl;
+            out << endl;
+        }
+    }
+
+    if (!short_stats) {
+        out << "Request vs. RubySystem State Profile" << endl;
+        out << "--------------------------------" << endl;
+        out << endl;
+
+        Vector<string> requestProfileKeys = m_requestProfileMap_ptr->keys();
+        requestProfileKeys.sortVector();
+
+        for (int i = 0; i < requestProfileKeys.size(); i++) {
+            int temp_int =
+                m_requestProfileMap_ptr->lookup(requestProfileKeys[i]);
+            double percent = (100.0 * double(temp_int)) / double(m_requests);
+            while (requestProfileKeys[i] != "") {
+                out << setw(10) << string_split(requestProfileKeys[i], ':');
+            }
+            out << setw(11) << temp_int;
+            out << setw(14) << percent << endl;
+        }
+        out << endl;
+
+        out << "filter_action: " << m_filter_action_histogram << endl;
+
+        if (!m_all_instructions) {
+            m_address_profiler_ptr->printStats(out);
+        }
+
+        if (m_all_instructions) {
+            m_inst_profiler_ptr->printStats(out);
+        }
+
+        out << endl;
+        out << "Message Delayed Cycles" << endl;
+        out << "----------------------" << endl;
+        out << "Total_delay_cycles: " <<   m_delayedCyclesHistogram << endl;
+        out << "Total_nonPF_delay_cycles: "
+            << m_delayedCyclesNonPFHistogram << endl;
+        for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) {
+            out << "  virtual_network_" << i << "_delay_cycles: "
+                << m_delayedCyclesVCHistograms[i] << endl;
+        }
+
+        printResourceUsage(out);
+    }
 }
 
-void Profiler::printResourceUsage(ostream& out) const
+void
+Profiler::printResourceUsage(ostream& out) const
 {
-  out << endl;
-  out << "Resource Usage" << endl;
-  out << "--------------" << endl;
+    out << endl;
+    out << "Resource Usage" << endl;
+    out << "--------------" << endl;
 
-  integer_t pagesize = getpagesize(); // page size in bytes
-  out << "page_size: " << pagesize << endl;
+    integer_t pagesize = getpagesize(); // page size in bytes
+    out << "page_size: " << pagesize << endl;
 
-  rusage usage;
-  getrusage (RUSAGE_SELF, &usage);
+    rusage usage;
+    getrusage (RUSAGE_SELF, &usage);
 
-  out << "user_time: " << usage.ru_utime.tv_sec << endl;
-  out << "system_time: " << usage.ru_stime.tv_sec << endl;
-  out << "page_reclaims: " << usage.ru_minflt << endl;
-  out << "page_faults: " << usage.ru_majflt << endl;
-  out << "swaps: " << usage.ru_nswap << endl;
-  out << "block_inputs: " << usage.ru_inblock << endl;
-  out << "block_outputs: " << usage.ru_oublock << endl;
+    out << "user_time: " << usage.ru_utime.tv_sec << endl;
+    out << "system_time: " << usage.ru_stime.tv_sec << endl;
+    out << "page_reclaims: " << usage.ru_minflt << endl;
+    out << "page_faults: " << usage.ru_majflt << endl;
+    out << "swaps: " << usage.ru_nswap << endl;
+    out << "block_inputs: " << usage.ru_inblock << endl;
+    out << "block_outputs: " << usage.ru_oublock << endl;
 }
 
-void Profiler::clearStats()
+void
+Profiler::clearStats()
 {
-  m_ruby_start = g_eventQueue_ptr->getTime();
+    m_ruby_start = g_eventQueue_ptr->getTime();
 
-  m_cycles_executed_at_start.setSize(m_num_of_sequencers);
-  for (int i=0; i < m_num_of_sequencers; i++) {
-    if (g_system_ptr == NULL) {
-      m_cycles_executed_at_start[i] = 0;
+    m_cycles_executed_at_start.setSize(m_num_of_sequencers);
+    for (int i = 0; i < m_num_of_sequencers; i++) {
+        if (g_system_ptr == NULL) {
+            m_cycles_executed_at_start[i] = 0;
+        } else {
+            m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i);
+        }
+    }
+
+    m_busyControllerCount.setSize(MachineType_NUM); // all machines
+    for (int i = 0; i < MachineType_NUM; i++) {
+        int size = MachineType_base_count((MachineType)i);
+        m_busyControllerCount[i].setSize(size);
+        for (int j = 0; j < size; j++) {
+            m_busyControllerCount[i][j] = 0;
+        }
+    }
+    m_busyBankCount = 0;
+
+    m_delayedCyclesHistogram.clear();
+    m_delayedCyclesNonPFHistogram.clear();
+    int size = RubySystem::getNetwork()->getNumberOfVirtualNetworks();
+    m_delayedCyclesVCHistograms.setSize(size);
+    for (int i = 0; i < size; i++) {
+        m_delayedCyclesVCHistograms[i].clear();
+    }
+
+    m_missLatencyHistograms.setSize(RubyRequestType_NUM);
+    for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
+        m_missLatencyHistograms[i].clear(200);
+    }
+    m_machLatencyHistograms.setSize(GenericMachineType_NUM+1);
+    for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
+        m_machLatencyHistograms[i].clear(200);
+    }
+    m_allMissLatencyHistogram.clear(200);
+
+    m_SWPrefetchLatencyHistograms.setSize(CacheRequestType_NUM);
+    for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
+        m_SWPrefetchLatencyHistograms[i].clear(200);
+    }
+    m_SWPrefetchMachLatencyHistograms.setSize(GenericMachineType_NUM+1);
+    for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
+        m_SWPrefetchMachLatencyHistograms[i].clear(200);
+    }
+    m_allSWPrefetchLatencyHistogram.clear(200);
+
+    m_sequencer_requests.clear();
+    m_read_sharing_histogram.clear();
+    m_write_sharing_histogram.clear();
+    m_all_sharing_histogram.clear();
+    m_cache_to_cache = 0;
+    m_memory_to_cache = 0;
+
+    // clear HashMaps
+    m_requestProfileMap_ptr->clear();
+
+    // count requests profiled
+    m_requests = 0;
+
+    m_outstanding_requests.clear();
+    m_outstanding_persistent_requests.clear();
+
+    // Flush the prefetches through the system - used so that there
+    // are no outstanding requests after stats are cleared
+    //g_eventQueue_ptr->triggerAllEvents();
+
+    // update the start time
+    m_ruby_start = g_eventQueue_ptr->getTime();
+}
+
+void
+Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id)
+{
+    if (msg.getType() != CacheRequestType_IFETCH) {
+        // Note: The following line should be commented out if you
+        // want to use the special profiling that is part of the GS320
+        // protocol
+
+        // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
+        // profiled by the AddressProfiler
+        m_address_profiler_ptr->
+            addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
+                           msg.getType(), msg.getAccessMode(), id, false);
+    }
+}
+
+void
+Profiler::profileSharing(const Address& addr, AccessType type,
+                         NodeID requestor, const Set& sharers,
+                         const Set& owner)
+{
+    Set set_contacted(owner);
+    if (type == AccessType_Write) {
+        set_contacted.addSet(sharers);
+    }
+    set_contacted.remove(requestor);
+    int number_contacted = set_contacted.count();
+
+    if (type == AccessType_Write) {
+        m_write_sharing_histogram.add(number_contacted);
     } else {
-      m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i);
+        m_read_sharing_histogram.add(number_contacted);
     }
-  }
+    m_all_sharing_histogram.add(number_contacted);
 
-  m_busyControllerCount.setSize(MachineType_NUM); // all machines
-  for(int i=0; i < MachineType_NUM; i++) {
-    m_busyControllerCount[i].setSize(MachineType_base_count((MachineType)i));
-    for(int j=0; j < MachineType_base_count((MachineType)i); j++) {
-      m_busyControllerCount[i][j] = 0;
+    if (number_contacted == 0) {
+        m_memory_to_cache++;
+    } else {
+        m_cache_to_cache++;
     }
-  }
-  m_busyBankCount = 0;
-
-  m_delayedCyclesHistogram.clear();
-  m_delayedCyclesNonPFHistogram.clear();
-  m_delayedCyclesVCHistograms.setSize(RubySystem::getNetwork()->getNumberOfVirtualNetworks());
-  for (int i = 0; i < RubySystem::getNetwork()->getNumberOfVirtualNetworks(); i++) {
-    m_delayedCyclesVCHistograms[i].clear();
-  }
-
-  m_missLatencyHistograms.setSize(RubyRequestType_NUM);
-  for(int i=0; i<m_missLatencyHistograms.size(); i++) {
-    m_missLatencyHistograms[i].clear(200);
-  }
-  m_machLatencyHistograms.setSize(GenericMachineType_NUM+1);
-  for(int i=0; i<m_machLatencyHistograms.size(); i++) {
-    m_machLatencyHistograms[i].clear(200);
-  }
-  m_allMissLatencyHistogram.clear(200);
-
-  m_SWPrefetchLatencyHistograms.setSize(CacheRequestType_NUM);
-  for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) {
-    m_SWPrefetchLatencyHistograms[i].clear(200);
-  }
-  m_SWPrefetchMachLatencyHistograms.setSize(GenericMachineType_NUM+1);
-  for(int i=0; i<m_SWPrefetchMachLatencyHistograms.size(); i++) {
-    m_SWPrefetchMachLatencyHistograms[i].clear(200);
-  }
-  m_allSWPrefetchLatencyHistogram.clear(200);
-
-  m_sequencer_requests.clear();
-  m_read_sharing_histogram.clear();
-  m_write_sharing_histogram.clear();
-  m_all_sharing_histogram.clear();
-  m_cache_to_cache = 0;
-  m_memory_to_cache = 0;
-
-  // clear HashMaps
-  m_requestProfileMap_ptr->clear();
-
-  // count requests profiled
-  m_requests = 0;
-
-  m_outstanding_requests.clear();
-  m_outstanding_persistent_requests.clear();
-
-  // Flush the prefetches through the system - used so that there are no outstanding requests after stats are cleared
-  //g_eventQueue_ptr->triggerAllEvents();
-
-  // update the start time
-  m_ruby_start = g_eventQueue_ptr->getTime();
 }
 
-void Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id)
+void
+Profiler::profileMsgDelay(int virtualNetwork, int delayCycles)
 {
-  if (msg.getType() != CacheRequestType_IFETCH) {
-
-    // Note: The following line should be commented out if you want to
-    // use the special profiling that is part of the GS320 protocol
-
-    // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be profiled by the AddressProfiler
-    m_address_profiler_ptr->addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), msg.getType(), msg.getAccessMode(), id, false);
-  }
-}
-
-void Profiler::profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner)
-{
-  Set set_contacted(owner);
-  if (type == AccessType_Write) {
-    set_contacted.addSet(sharers);
-  }
-  set_contacted.remove(requestor);
-  int number_contacted = set_contacted.count();
-
-  if (type == AccessType_Write) {
-    m_write_sharing_histogram.add(number_contacted);
-  } else {
-    m_read_sharing_histogram.add(number_contacted);
-  }
-  m_all_sharing_histogram.add(number_contacted);
-
-  if (number_contacted == 0) {
-    m_memory_to_cache++;
-  } else {
-    m_cache_to_cache++;
-  }
-
-}
-
-void Profiler::profileMsgDelay(int virtualNetwork, int delayCycles) {
-  assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
-  m_delayedCyclesHistogram.add(delayCycles);
-  m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
-  if (virtualNetwork != 0) {
-    m_delayedCyclesNonPFHistogram.add(delayCycles);
-  }
+    assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
+    m_delayedCyclesHistogram.add(delayCycles);
+    m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
+    if (virtualNetwork != 0) {
+        m_delayedCyclesNonPFHistogram.add(delayCycles);
+    }
 }
 
 // profiles original cache requests including PUTs
-void Profiler::profileRequest(const string& requestStr)
+void
+Profiler::profileRequest(const string& requestStr)
 {
-  m_requests++;
+    m_requests++;
 
-  if (m_requestProfileMap_ptr->exist(requestStr)) {
-    (m_requestProfileMap_ptr->lookup(requestStr))++;
-  } else {
-    m_requestProfileMap_ptr->add(requestStr, 1);
-  }
+    if (m_requestProfileMap_ptr->exist(requestStr)) {
+        (m_requestProfileMap_ptr->lookup(requestStr))++;
+    } else {
+        m_requestProfileMap_ptr->add(requestStr, 1);
+    }
 }
 
-void Profiler::controllerBusy(MachineID machID)
+void
+Profiler::controllerBusy(MachineID machID)
 {
-  m_busyControllerCount[(int)machID.type][(int)machID.num]++;
+    m_busyControllerCount[(int)machID.type][(int)machID.num]++;
 }
 
-void Profiler::profilePFWait(Time waitTime)
+void
+Profiler::profilePFWait(Time waitTime)
 {
-  m_prefetchWaitHistogram.add(waitTime);
+    m_prefetchWaitHistogram.add(waitTime);
 }
 
-void Profiler::bankBusy()
+void
+Profiler::bankBusy()
 {
-  m_busyBankCount++;
+    m_busyBankCount++;
 }
 
 // non-zero cycle demand request
-void Profiler::missLatency(Time t, RubyRequestType type)
+void
+Profiler::missLatency(Time t, RubyRequestType type)
 {
-  m_allMissLatencyHistogram.add(t);
-  m_missLatencyHistograms[type].add(t);
+    m_allMissLatencyHistogram.add(t);
+    m_missLatencyHistograms[type].add(t);
 }
 
 // non-zero cycle prefetch request
-void Profiler::swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach)
+void
+Profiler::swPrefetchLatency(Time t, CacheRequestType type,
+                            GenericMachineType respondingMach)
 {
-  m_allSWPrefetchLatencyHistogram.add(t);
-  m_SWPrefetchLatencyHistograms[type].add(t);
-  m_SWPrefetchMachLatencyHistograms[respondingMach].add(t);
-  if(respondingMach == GenericMachineType_Directory || respondingMach == GenericMachineType_NUM) {
-    m_SWPrefetchL2MissLatencyHistogram.add(t);
-  }
+    m_allSWPrefetchLatencyHistogram.add(t);
+    m_SWPrefetchLatencyHistograms[type].add(t);
+    m_SWPrefetchMachLatencyHistograms[respondingMach].add(t);
+    if (respondingMach == GenericMachineType_Directory ||
+        respondingMach == GenericMachineType_NUM) {
+        m_SWPrefetchL2MissLatencyHistogram.add(t);
+    }
 }
 
-void Profiler::profileTransition(const string& component, NodeID version, Address addr,
-                                 const string& state, const string& event,
-                                 const string& next_state, const string& note)
+void
+Profiler::profileTransition(const string& component, NodeID version,
+    Address addr, const string& state, const string& event,
+    const string& next_state, const string& note)
 {
-  const int EVENT_SPACES = 20;
-  const int ID_SPACES = 3;
-  const int TIME_SPACES = 7;
-  const int COMP_SPACES = 10;
-  const int STATE_SPACES = 6;
+    const int EVENT_SPACES = 20;
+    const int ID_SPACES = 3;
+    const int TIME_SPACES = 7;
+    const int COMP_SPACES = 10;
+    const int STATE_SPACES = 6;
 
-  if ((g_debug_ptr->getDebugTime() > 0) &&
-      (g_eventQueue_ptr->getTime() >= g_debug_ptr->getDebugTime())) {
-    (* debug_cout_ptr).flags(ios::right);
-    (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " ";
-    (* debug_cout_ptr) << setw(ID_SPACES) << version << " ";
-    (* debug_cout_ptr) << setw(COMP_SPACES) << component;
-    (* debug_cout_ptr) << setw(EVENT_SPACES) << event << " ";
+    if (g_debug_ptr->getDebugTime() <= 0 ||
+        g_eventQueue_ptr->getTime() < g_debug_ptr->getDebugTime())
+        return;
 
-    (* debug_cout_ptr).flags(ios::right);
-    (* debug_cout_ptr) << setw(STATE_SPACES) << state;
-    (* debug_cout_ptr) << ">";
-    (* debug_cout_ptr).flags(ios::left);
-    (* debug_cout_ptr) << setw(STATE_SPACES) << next_state;
+    ostream &out = *debug_cout_ptr;
+    out.flags(ios::right);
+    out << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " ";
+    out << setw(ID_SPACES) << version << " ";
+    out << setw(COMP_SPACES) << component;
+    out << setw(EVENT_SPACES) << event << " ";
 
-    (* debug_cout_ptr) << " " << addr << " " << note;
+    out.flags(ios::right);
+    out << setw(STATE_SPACES) << state;
+    out << ">";
+    out.flags(ios::left);
+    out << setw(STATE_SPACES) << next_state;
 
-    (* debug_cout_ptr) << endl;
-  }
+    out << " " << addr << " " << note;
+
+    out << endl;
 }
 
 // Helper function
-static double process_memory_total()
+static double
+process_memory_total()
 {
-  const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB,
-  ifstream proc_file;
-  proc_file.open("/proc/self/statm");
-  int total_size_in_pages = 0;
-  int res_size_in_pages = 0;
-  proc_file >> total_size_in_pages;
-  proc_file >> res_size_in_pages;
-  return double(total_size_in_pages)*MULTIPLIER; // size in megabytes
+    // 4kB page size, 1024*1024 bytes per MB,
+    const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0); 
+    ifstream proc_file;
+    proc_file.open("/proc/self/statm");
+    int total_size_in_pages = 0;
+    int res_size_in_pages = 0;
+    proc_file >> total_size_in_pages;
+    proc_file >> res_size_in_pages;
+    return double(total_size_in_pages) * MULTIPLIER; // size in megabytes
 }
 
-static double process_memory_resident()
+static double
+process_memory_resident()
 {
-  const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB,
-  ifstream proc_file;
-  proc_file.open("/proc/self/statm");
-  int total_size_in_pages = 0;
-  int res_size_in_pages = 0;
-  proc_file >> total_size_in_pages;
-  proc_file >> res_size_in_pages;
-  return double(res_size_in_pages)*MULTIPLIER; // size in megabytes
+    // 4kB page size, 1024*1024 bytes per MB,
+    const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0);
+    ifstream proc_file;
+    proc_file.open("/proc/self/statm");
+    int total_size_in_pages = 0;
+    int res_size_in_pages = 0;
+    proc_file >> total_size_in_pages;
+    proc_file >> res_size_in_pages;
+    return double(res_size_in_pages) * MULTIPLIER; // size in megabytes
 }
 
-void Profiler::rubyWatch(int id){
+void
+Profiler::rubyWatch(int id)
+{
     uint64 tr = 0;
     Address watch_address = Address(tr);
     const int ID_SPACES = 3;
     const int TIME_SPACES = 7;
 
-    (* debug_cout_ptr).flags(ios::right);
-    (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " ";
-    (* debug_cout_ptr) << setw(ID_SPACES) << id << " "
-                       << "RUBY WATCH "
-                       << watch_address
-                       << endl;
+    ostream &out = *debug_cout_ptr;
 
-    if(!m_watch_address_list_ptr->exist(watch_address)){
-      m_watch_address_list_ptr->add(watch_address, 1);
+    out.flags(ios::right);
+    out << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " ";
+    out << setw(ID_SPACES) << id << " "
+        << "RUBY WATCH " << watch_address << endl;
+
+    if (!m_watch_address_list_ptr->exist(watch_address)) {
+        m_watch_address_list_ptr->add(watch_address, 1);
     }
 }
 
-bool Profiler::watchAddress(Address addr){
+bool
+Profiler::watchAddress(Address addr)
+{
     if (m_watch_address_list_ptr->exist(addr))
-      return true;
+        return true;
     else
-      return false;
+        return false;
 }
 
 Profiler *
diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh
index 3ae1f5e31..bf4bf8a50 100644
--- a/src/mem/ruby/profiler/Profiler.hh
+++ b/src/mem/ruby/profiler/Profiler.hh
@@ -42,35 +42,24 @@
    ----------------------------------------------------------------------
 */
 
-/*
- * Profiler.hh
- *
- * Description:
- *
- * $Id$
- *
- */
+#ifndef __MEM_RUBY_PROFILER_PROFILER_HH__
+#define __MEM_RUBY_PROFILER_PROFILER_HH__
 
-#ifndef PROFILER_H
-#define PROFILER_H
-
-#include "mem/ruby/libruby.hh"
-
-#include "mem/ruby/common/Global.hh"
-#include "mem/protocol/GenericMachineType.hh"
-#include "mem/ruby/common/Histogram.hh"
-#include "mem/ruby/common/Consumer.hh"
 #include "mem/protocol/AccessModeType.hh"
 #include "mem/protocol/AccessType.hh"
-#include "mem/ruby/system/NodeID.hh"
-#include "mem/ruby/system/MachineID.hh"
+#include "mem/protocol/CacheRequestType.hh"
+#include "mem/protocol/GenericMachineType.hh"
+#include "mem/protocol/GenericRequestType.hh"
 #include "mem/protocol/PrefetchBit.hh"
 #include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/Consumer.hh"
+#include "mem/ruby/common/Global.hh"
+#include "mem/ruby/common/Histogram.hh"
 #include "mem/ruby/common/Set.hh"
-#include "mem/protocol/CacheRequestType.hh"
-#include "mem/protocol/GenericRequestType.hh"
+#include "mem/ruby/libruby.hh"
+#include "mem/ruby/system/MachineID.hh"
 #include "mem/ruby/system/MemoryControl.hh"
-
+#include "mem/ruby/system/NodeID.hh"
 #include "params/RubyProfiler.hh"
 #include "sim/sim_object.hh"
 
@@ -79,155 +68,165 @@ class AddressProfiler;
 
 template <class KEY_TYPE, class VALUE_TYPE> class Map;
 
-class Profiler : public SimObject, public Consumer {
-public:
-  // Constructors
+class Profiler : public SimObject, public Consumer
+{
+  public:
     typedef RubyProfilerParams Params;
-  Profiler(const Params *);
+    Profiler(const Params *);
+    ~Profiler();
 
-  // Destructor
-  ~Profiler();
+    void wakeup();
 
-  // Public Methods
-  void wakeup();
+    void setPeriodicStatsFile(const string& filename);
+    void setPeriodicStatsInterval(integer_t period);
 
-  void setPeriodicStatsFile(const string& filename);
-  void setPeriodicStatsInterval(integer_t period);
+    void printStats(ostream& out, bool short_stats=false);
+    void printShortStats(ostream& out) { printStats(out, true); }
+    void printTraceStats(ostream& out) const;
+    void clearStats();
+    void printConfig(ostream& out) const;
+    void printResourceUsage(ostream& out) const;
 
-  void printStats(ostream& out, bool short_stats=false);
-  void printShortStats(ostream& out) { printStats(out, true); }
-  void printTraceStats(ostream& out) const;
-  void clearStats();
-  void printConfig(ostream& out) const;
-  void printResourceUsage(ostream& out) const;
+    AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; }
+    AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
 
-  AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; }
-  AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
+    void addAddressTraceSample(const CacheMsg& msg, NodeID id);
 
-  void addAddressTraceSample(const CacheMsg& msg, NodeID id);
+    void profileRequest(const string& requestStr);
+    void profileSharing(const Address& addr, AccessType type,
+                        NodeID requestor, const Set& sharers,
+                        const Set& owner);
 
-  void profileRequest(const string& requestStr);
-  void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner);
+    void profileMulticastRetry(const Address& addr, int count);
 
-  void profileMulticastRetry(const Address& addr, int count);
+    void profileFilterAction(int action);
 
-  void profileFilterAction(int action);
+    void profileConflictingRequests(const Address& addr);
 
-  void profileConflictingRequests(const Address& addr);
-  void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); }
-  void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); }
-  void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); }
+    void
+    profileOutstandingRequest(int outstanding)
+    {
+        m_outstanding_requests.add(outstanding);
+    }
 
-  void recordPrediction(bool wasGood, bool wasPredicted);
+    void
+    profileOutstandingPersistentRequest(int outstanding)
+    {
+        m_outstanding_persistent_requests.add(outstanding);
+    }
 
-  void startTransaction(int cpu);
-  void endTransaction(int cpu);
-  void profilePFWait(Time waitTime);
+    void
+    profileAverageLatencyEstimate(int latency)
+    {
+        m_average_latency_estimate.add(latency);
+    }
 
-  void controllerBusy(MachineID machID);
-  void bankBusy();
-  void missLatency(Time t, RubyRequestType type);
-  void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach);
-  void sequencerRequests(int num) { m_sequencer_requests.add(num); }
+    void recordPrediction(bool wasGood, bool wasPredicted);
 
-  void profileTransition(const string& component, NodeID version, Address addr,
-                         const string& state, const string& event,
-                         const string& next_state, const string& note);
-  void profileMsgDelay(int virtualNetwork, int delayCycles);
+    void startTransaction(int cpu);
+    void endTransaction(int cpu);
+    void profilePFWait(Time waitTime);
 
-  void print(ostream& out) const;
+    void controllerBusy(MachineID machID);
+    void bankBusy();
+    void missLatency(Time t, RubyRequestType type);
+    void swPrefetchLatency(Time t, CacheRequestType type,
+                           GenericMachineType respondingMach);
+    void sequencerRequests(int num) { m_sequencer_requests.add(num); }
 
-  void rubyWatch(int proc);
-  bool watchAddress(Address addr);
+    void profileTransition(const string& component, NodeID version,
+                           Address addr, const string& state,
+                           const string& event, const string& next_state,
+                           const string& note);
+    void profileMsgDelay(int virtualNetwork, int delayCycles);
 
-  // return Ruby's start time
-  Time getRubyStartTime(){
-    return m_ruby_start;
-  }
+    void print(ostream& out) const;
 
-  //added by SS
-  bool getHotLines() { return m_hot_lines; }
-  bool getAllInstructions() { return m_all_instructions; }
+    void rubyWatch(int proc);
+    bool watchAddress(Address addr);
 
-private:
+    // return Ruby's start time
+    Time
+    getRubyStartTime()
+    {
+        return m_ruby_start;
+    }
 
-  // Private copy constructor and assignment operator
-  Profiler(const Profiler& obj);
-  Profiler& operator=(const Profiler& obj);
+    // added by SS
+    bool getHotLines() { return m_hot_lines; }
+    bool getAllInstructions() { return m_all_instructions; }
 
-  // Data Members (m_ prefix)
-  AddressProfiler* m_address_profiler_ptr;
-  AddressProfiler* m_inst_profiler_ptr;
+  private:
+    // Private copy constructor and assignment operator
+    Profiler(const Profiler& obj);
+    Profiler& operator=(const Profiler& obj);
 
-  Vector<int64> m_instructions_executed_at_start;
-  Vector<int64> m_cycles_executed_at_start;
+    AddressProfiler* m_address_profiler_ptr;
+    AddressProfiler* m_inst_profiler_ptr;
 
-  ostream* m_periodic_output_file_ptr;
-  integer_t m_stats_period;
+    Vector<int64> m_instructions_executed_at_start;
+    Vector<int64> m_cycles_executed_at_start;
 
-  Time m_ruby_start;
-  time_t m_real_time_start_time;
+    ostream* m_periodic_output_file_ptr;
+    integer_t m_stats_period;
 
-  Vector < Vector < integer_t > > m_busyControllerCount;
-  integer_t m_busyBankCount;
-  Histogram m_multicast_retry_histogram;
+    Time m_ruby_start;
+    time_t m_real_time_start_time;
 
-  Histogram m_filter_action_histogram;
-  Histogram m_tbeProfile;
+    Vector <Vector<integer_t> > m_busyControllerCount;
+    integer_t m_busyBankCount;
+    Histogram m_multicast_retry_histogram;
 
-  Histogram m_sequencer_requests;
-  Histogram m_read_sharing_histogram;
-  Histogram m_write_sharing_histogram;
-  Histogram m_all_sharing_histogram;
-  int64 m_cache_to_cache;
-  int64 m_memory_to_cache;
+    Histogram m_filter_action_histogram;
+    Histogram m_tbeProfile;
 
-  Histogram m_prefetchWaitHistogram;
+    Histogram m_sequencer_requests;
+    Histogram m_read_sharing_histogram;
+    Histogram m_write_sharing_histogram;
+    Histogram m_all_sharing_histogram;
+    int64 m_cache_to_cache;
+    int64 m_memory_to_cache;
 
-  Vector<Histogram> m_missLatencyHistograms;
-  Vector<Histogram> m_machLatencyHistograms;
-  Histogram m_allMissLatencyHistogram;
+    Histogram m_prefetchWaitHistogram;
 
-  Histogram  m_allSWPrefetchLatencyHistogram;
-  Histogram  m_SWPrefetchL2MissLatencyHistogram;
-  Vector<Histogram> m_SWPrefetchLatencyHistograms;
-  Vector<Histogram> m_SWPrefetchMachLatencyHistograms;
+    Vector<Histogram> m_missLatencyHistograms;
+    Vector<Histogram> m_machLatencyHistograms;
+    Histogram m_allMissLatencyHistogram;
 
-  Histogram m_delayedCyclesHistogram;
-  Histogram m_delayedCyclesNonPFHistogram;
-  Vector<Histogram> m_delayedCyclesVCHistograms;
+    Histogram m_allSWPrefetchLatencyHistogram;
+    Histogram m_SWPrefetchL2MissLatencyHistogram;
+    Vector<Histogram> m_SWPrefetchLatencyHistograms;
+    Vector<Histogram> m_SWPrefetchMachLatencyHistograms;
 
-  Histogram m_outstanding_requests;
-  Histogram m_outstanding_persistent_requests;
+    Histogram m_delayedCyclesHistogram;
+    Histogram m_delayedCyclesNonPFHistogram;
+    Vector<Histogram> m_delayedCyclesVCHistograms;
 
-  Histogram m_average_latency_estimate;
+    Histogram m_outstanding_requests;
+    Histogram m_outstanding_persistent_requests;
 
-  Map<Address, int>* m_watch_address_list_ptr;
-  // counts all initiated cache request including PUTs
-  int m_requests;
-  Map <string, int>* m_requestProfileMap_ptr;
+    Histogram m_average_latency_estimate;
 
-  //added by SS
-  bool m_hot_lines;
-  bool m_all_instructions;
+    Map<Address, int>* m_watch_address_list_ptr;
+    // counts all initiated cache request including PUTs
+    int m_requests;
+    Map <string, int>* m_requestProfileMap_ptr;
 
-  int m_num_of_sequencers;
+    //added by SS
+    bool m_hot_lines;
+    bool m_all_instructions;
+
+    int m_num_of_sequencers;
 };
 
-// Output operator declaration
-ostream& operator<<(ostream& out, const Profiler& obj);
-
-// ******************* Definitions *******************
-
-// Output operator definition
-extern inline
-ostream& operator<<(ostream& out, const Profiler& obj)
+inline ostream&
+operator<<(ostream& out, const Profiler& obj)
 {
-  obj.print(out);
-  out << flush;
-  return out;
+    obj.print(out);
+    out << flush;
+    return out;
 }
 
-#endif //PROFILER_H
+#endif // __MEM_RUBY_PROFILER_PROFILER_HH__
 
 
diff --git a/src/mem/ruby/profiler/StoreTrace.cc b/src/mem/ruby/profiler/StoreTrace.cc
index 4d4e4798d..ce42560b6 100644
--- a/src/mem/ruby/profiler/StoreTrace.cc
+++ b/src/mem/ruby/profiler/StoreTrace.cc
@@ -1,4 +1,3 @@
-
 /*
  * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
  * All rights reserved.
@@ -27,132 +26,130 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/*
- * $Id$
- *
- */
-
-#include "mem/ruby/profiler/StoreTrace.hh"
 #include "mem/ruby/eventqueue/RubyEventQueue.hh"
+#include "mem/ruby/profiler/StoreTrace.hh"
 
-bool StoreTrace::s_init = false; // Total number of store lifetimes of all lines
-int64 StoreTrace::s_total_samples = 0; // Total number of store lifetimes of all lines
+bool StoreTrace::s_init = false; // Total number of store lifetimes of
+                                 // all lines
+int64 StoreTrace::s_total_samples = 0; // Total number of store
+                                       // lifetimes of all lines
 Histogram* StoreTrace::s_store_count_ptr = NULL;
 Histogram* StoreTrace::s_store_first_to_stolen_ptr = NULL;
 Histogram* StoreTrace::s_store_last_to_stolen_ptr = NULL;
 Histogram* StoreTrace::s_store_first_to_last_ptr = NULL;
 
-StoreTrace::StoreTrace(const Address& addr) :
-  m_store_count(-1), m_store_first_to_stolen(-1), m_store_last_to_stolen(-1), m_store_first_to_last(-1)
+StoreTrace::StoreTrace(const Address& addr)
+    : m_store_count(-1), m_store_first_to_stolen(-1),
+      m_store_last_to_stolen(-1), m_store_first_to_last(-1)
 {
-  StoreTrace::initSummary();
-  m_addr = addr;
-  m_total_samples = 0;
-  m_last_writer = -1;  // Really -1 isn't valid, so this will trigger the initilization code
-  m_stores_this_interval = 0;
+    StoreTrace::initSummary();
+    m_addr = addr;
+    m_total_samples = 0;
+
+    // Really -1 isn't valid, so this will trigger the initilization code
+    m_last_writer = -1;
+    m_stores_this_interval = 0;
 }
 
 StoreTrace::~StoreTrace()
 {
 }
 
-void StoreTrace::print(ostream& out) const
+void
+StoreTrace::print(ostream& out) const
 {
-  out << m_addr;
-  out << " total_samples: " << m_total_samples << endl;
-  out << "store_count: " << m_store_count << endl;
-  out << "store_first_to_stolen: " << m_store_first_to_stolen << endl;
-  out << "store_last_to_stolen: " << m_store_last_to_stolen << endl;
-  out << "store_first_to_last: " << m_store_first_to_last  << endl;
+    out << m_addr
+        << " total_samples: " << m_total_samples << endl
+        << "store_count: " << m_store_count << endl
+        << "store_first_to_stolen: " << m_store_first_to_stolen << endl
+        << "store_last_to_stolen: " << m_store_last_to_stolen << endl
+        << "store_first_to_last: " << m_store_first_to_last  << endl;
 }
 
-// Class method
-void StoreTrace::initSummary()
+void
+StoreTrace::initSummary()
 {
-  if (!s_init) {
+    if (!s_init) {
+        s_total_samples = 0;
+        s_store_count_ptr = new Histogram(-1);
+        s_store_first_to_stolen_ptr = new Histogram(-1);
+        s_store_last_to_stolen_ptr = new Histogram(-1);
+        s_store_first_to_last_ptr = new Histogram(-1);
+    }
+    s_init = true;
+}
+
+void
+StoreTrace::printSummary(ostream& out)
+{
+    out << "total_samples: " << s_total_samples << endl;
+    out << "store_count: " << (*s_store_count_ptr) << endl;
+    out << "store_first_to_stolen: " << (*s_store_first_to_stolen_ptr) << endl;
+    out << "store_last_to_stolen: " << (*s_store_last_to_stolen_ptr) << endl;
+    out << "store_first_to_last: " << (*s_store_first_to_last_ptr) << endl;
+}
+
+void
+StoreTrace::clearSummary()
+{
+    StoreTrace::initSummary();
     s_total_samples = 0;
-    s_store_count_ptr = new Histogram(-1);
-    s_store_first_to_stolen_ptr = new Histogram(-1);
-    s_store_last_to_stolen_ptr = new Histogram(-1);
-    s_store_first_to_last_ptr = new Histogram(-1);
-  }
-  s_init = true;
+    s_store_count_ptr->clear();
+    s_store_first_to_stolen_ptr->clear();
+    s_store_last_to_stolen_ptr->clear();
+    s_store_first_to_last_ptr->clear();
 }
 
-// Class method
-void StoreTrace::printSummary(ostream& out)
+void
+StoreTrace::store(NodeID node)
 {
-  out << "total_samples: " << s_total_samples << endl;
-  out << "store_count: " << (*s_store_count_ptr) << endl;
-  out << "store_first_to_stolen: " << (*s_store_first_to_stolen_ptr) << endl;
-  out << "store_last_to_stolen: " << (*s_store_last_to_stolen_ptr) << endl;
-  out << "store_first_to_last: " << (*s_store_first_to_last_ptr) << endl;
-}
-
-// Class method
-void StoreTrace::clearSummary()
-{
-  StoreTrace::initSummary();
-  s_total_samples = 0;
-  s_store_count_ptr->clear();
-  s_store_first_to_stolen_ptr->clear();
-  s_store_last_to_stolen_ptr->clear();
-  s_store_first_to_last_ptr->clear();
-}
-
-void StoreTrace::store(NodeID node)
-{
-  Time current = g_eventQueue_ptr->getTime();
-
-  assert((m_last_writer == -1) || (m_last_writer == node));
-
-  m_last_writer = node;
-  if (m_last_writer == -1) {
-    assert(m_stores_this_interval == 0);
-  }
-
-  if (m_stores_this_interval == 0) {
-    // A new proessor just wrote the line, so reset the stats
-    m_first_store = current;
-  }
-
-  m_last_store = current;
-  m_stores_this_interval++;
-}
-
-void StoreTrace::downgrade(NodeID node)
-{
-  if (node == m_last_writer) {
     Time current = g_eventQueue_ptr->getTime();
-    assert(m_stores_this_interval != 0);
-    assert(m_last_store != 0);
-    assert(m_first_store != 0);
-    assert(m_last_writer != -1);
 
-    // Per line stats
-    m_store_first_to_stolen.add(current - m_first_store);
-    m_store_count.add(m_stores_this_interval);
-    m_store_last_to_stolen.add(current - m_last_store);
-    m_store_first_to_last.add(m_last_store - m_first_store);
-    m_total_samples++;
+    assert((m_last_writer == -1) || (m_last_writer == node));
 
-    // Global stats
-    assert(s_store_first_to_stolen_ptr != NULL);
-    s_store_first_to_stolen_ptr->add(current - m_first_store);
-    s_store_count_ptr->add(m_stores_this_interval);
-    s_store_last_to_stolen_ptr->add(current - m_last_store);
-    s_store_first_to_last_ptr->add(m_last_store - m_first_store);
-    s_total_samples++;
+    m_last_writer = node;
+    if (m_last_writer == -1) {
+        assert(m_stores_this_interval == 0);
+    }
 
-    // Initilize for next go round
-    m_stores_this_interval = 0;
-    m_last_store = 0;
-    m_first_store = 0;
-    m_last_writer = -1;
-  }
+    if (m_stores_this_interval == 0) {
+        // A new proessor just wrote the line, so reset the stats
+        m_first_store = current;
+    }
+
+    m_last_store = current;
+    m_stores_this_interval++;
 }
 
-bool node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2)
+void
+StoreTrace::downgrade(NodeID node)
 {
-  return (n1->getTotal() > n2->getTotal());
+    if (node == m_last_writer) {
+        Time current = g_eventQueue_ptr->getTime();
+        assert(m_stores_this_interval != 0);
+        assert(m_last_store != 0);
+        assert(m_first_store != 0);
+        assert(m_last_writer != -1);
+
+        // Per line stats
+        m_store_first_to_stolen.add(current - m_first_store);
+        m_store_count.add(m_stores_this_interval);
+        m_store_last_to_stolen.add(current - m_last_store);
+        m_store_first_to_last.add(m_last_store - m_first_store);
+        m_total_samples++;
+
+        // Global stats
+        assert(s_store_first_to_stolen_ptr != NULL);
+        s_store_first_to_stolen_ptr->add(current - m_first_store);
+        s_store_count_ptr->add(m_stores_this_interval);
+        s_store_last_to_stolen_ptr->add(current - m_last_store);
+        s_store_first_to_last_ptr->add(m_last_store - m_first_store);
+        s_total_samples++;
+
+        // Initilize for next go round
+        m_stores_this_interval = 0;
+        m_last_store = 0;
+        m_first_store = 0;
+        m_last_writer = -1;
+    }
 }
diff --git a/src/mem/ruby/profiler/StoreTrace.hh b/src/mem/ruby/profiler/StoreTrace.hh
index 5cdf7ce41..8bddfe6c7 100644
--- a/src/mem/ruby/profiler/StoreTrace.hh
+++ b/src/mem/ruby/profiler/StoreTrace.hh
@@ -1,4 +1,3 @@
-
 /*
  * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
  * All rights reserved.
@@ -27,82 +26,63 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/*
- * $Id$
- *
- * Description:
- *
- */
+#ifndef __MEM_RUBY_PROFILER_STORETRACE_HH__
+#define __MEM_RUBY_PROFILER_STORETRACE_HH__
 
-#ifndef StoreTrace_H
-#define StoreTrace_H
-
-#include "mem/ruby/common/Global.hh"
 #include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/Global.hh"
 #include "mem/ruby/common/Histogram.hh"
 
-class StoreTrace {
-public:
-  // Constructors
-  StoreTrace() { }
-  explicit StoreTrace(const Address& addr);
+class StoreTrace
+{
+  public:
+    StoreTrace() { }
+    explicit StoreTrace(const Address& addr);
+    ~StoreTrace();
 
-  // Destructor
-  ~StoreTrace();
+    void store(NodeID node);
+    void downgrade(NodeID node);
+    int getTotal() const { return m_total_samples; }
+    static void initSummary();
+    static void printSummary(ostream& out);
+    static void clearSummary();
 
-  // Public Methods
-  void store(NodeID node);
-  void downgrade(NodeID node);
-  int getTotal() const { return m_total_samples; }
-  static void initSummary();
-  static void printSummary(ostream& out);
-  static void clearSummary();
+    void print(ostream& out) const;
 
-  void print(ostream& out) const;
-private:
-  // Private Methods
+  private:
+    static bool s_init;
+    static int64 s_total_samples; // Total number of store lifetimes
+                                  // of all lines
+    static Histogram* s_store_count_ptr;
+    static Histogram* s_store_first_to_stolen_ptr;
+    static Histogram* s_store_last_to_stolen_ptr;
+    static Histogram* s_store_first_to_last_ptr;
 
-  // Private copy constructor and assignment operator
-  //  StoreTrace(const StoreTrace& obj);
-  //  StoreTrace& operator=(const StoreTrace& obj);
+    Address m_addr;
+    NodeID m_last_writer;
+    Time m_first_store;
+    Time m_last_store;
+    int m_stores_this_interval;
 
-  // Class Members (s_ prefix)
-  static bool s_init;
-  static int64 s_total_samples; // Total number of store lifetimes of all lines
-  static Histogram* s_store_count_ptr;
-  static Histogram* s_store_first_to_stolen_ptr;
-  static Histogram* s_store_last_to_stolen_ptr;
-  static Histogram* s_store_first_to_last_ptr;
-
-  // Data Members (m_ prefix)
-
-  Address m_addr;
-  NodeID m_last_writer;
-  Time m_first_store;
-  Time m_last_store;
-  int m_stores_this_interval;
-
-  int64 m_total_samples; // Total number of store lifetimes of this line
-  Histogram m_store_count;
-  Histogram m_store_first_to_stolen;
-  Histogram m_store_last_to_stolen;
-  Histogram m_store_first_to_last;
+    int64 m_total_samples; // Total number of store lifetimes of this line
+    Histogram m_store_count;
+    Histogram m_store_first_to_stolen;
+    Histogram m_store_last_to_stolen;
+    Histogram m_store_first_to_last;
 };
 
-bool node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2);
-
-// Output operator declaration
-ostream& operator<<(ostream& out, const StoreTrace& obj);
-
-// ******************* Definitions *******************
-
-// Output operator definition
-extern inline
-ostream& operator<<(ostream& out, const StoreTrace& obj)
+inline bool
+node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2)
 {
-  obj.print(out);
-  out << flush;
-  return out;
+    return n1->getTotal() > n2->getTotal();
 }
 
-#endif //StoreTrace_H
+inline ostream&
+operator<<(ostream& out, const StoreTrace& obj)
+{
+    obj.print(out);
+    out << flush;
+    return out;
+}
+
+#endif // __MEM_RUBY_PROFILER_STORETRACE_HH__
diff --git a/util/regress b/util/regress
index 1d0b9049a..a74bd09c3 100755
--- a/util/regress
+++ b/util/regress
@@ -36,28 +36,29 @@ from subprocess import call
 progname = os.path.basename(sys.argv[0])
 
 optparser = optparse.OptionParser()
-optparser.add_option('-v', '--verbose', dest='verbose', action='store_true',
-                     default=False,
-                     help='echo commands before executing')
-optparser.add_option('--builds', dest='builds',
-                     default='ALPHA_SE,ALPHA_SE_MOESI_hammer,' \
-                     'ALPHA_SE_MESI_CMP_directory,'  \
-                     'ALPHA_SE_MOESI_CMP_directory,' \
-                     'ALPHA_SE_MOESI_CMP_token,' \
-                     'ALPHA_FS,MIPS_SE,' \
-                     'POWER_SE,SPARC_SE,SPARC_FS,X86_SE,ARM_SE',
-                     help='comma-separated list of build targets to test  '
-                     " (default: '%default')" )
-optparser.add_option('--variants', dest='variants',
-                     default='fast',
-                     help='comma-separated list of build variants to test '
-                     " (default: '%default')" )
-optparser.add_option('--scons-opts', dest='scons_opts', default='',
-                     help='scons options', metavar='OPTS')
-optparser.add_option('-j', '--jobs', type='int', default=1,
-                     help='number of parallel jobs to use')
-optparser.add_option('-k', '--keep-going', action='store_true',
-                     help='keep going after errors')
+add_option = optparser.add_option
+add_option('-v', '--verbose', dest='verbose', action='store_true',
+           default=False,
+           help='echo commands before executing')
+add_option('--builds', dest='builds',
+           default='ALPHA_SE,ALPHA_SE_MOESI_hammer,' \
+           'ALPHA_SE_MESI_CMP_directory,'  \
+           'ALPHA_SE_MOESI_CMP_directory,' \
+           'ALPHA_SE_MOESI_CMP_token,' \
+           'ALPHA_FS,MIPS_SE,POWER_SE,SPARC_SE,SPARC_FS,X86_SE,ARM_SE',
+           help="comma-separated build targets to test (default: '%default')")
+add_option('--variants', dest='variants', default='fast',
+           help="comma-separated build variants to test (default: '%default')")
+add_option('--scons-opts', dest='scons_opts', default='', metavar='OPTS',
+           help='scons options')
+add_option('-j', '--jobs', type='int', default=1,
+           help='number of parallel jobs to use')
+add_option('-k', '--keep-going', action='store_true',
+           help='keep going after errors')
+add_option('-D', '--build-dir', default='',
+           help='build directory location')
+add_option('-n', "--no-exec", default=False, action='store_true',
+           help="don't actually invoke scons, just echo SCons command line")
 
 (options, tests) = optparser.parse_args()
 
@@ -66,6 +67,8 @@ optparser.add_option('-k', '--keep-going', action='store_true',
 builds = options.builds.split(',')
 variants = options.variants.split(',')
 
+options.build_dir = os.path.join(options.build_dir, 'build')
+
 # Call os.system() and raise exception if return status is non-zero
 def system(cmd):
     try:
@@ -91,11 +94,11 @@ def shellquote(s):
 
 if not tests:
     print "No tests specified, just building binaries."
-    targets = ['build/%s/m5.%s' % (build, variant)
+    targets = ['%s/%s/m5.%s' % (options.build_dir, build, variant)
                for build in builds
                for variant in variants]
 elif 'all' in tests:
-    targets = ['build/%s/tests/%s' % (build, variant)
+    targets = ['%s/%s/tests/%s' % (options.build_dir, build, variant)
                for build in builds
                for variant in variants]
 else:
@@ -103,17 +106,36 @@ else:
     # If we ever get a quick SPARC_FS test, this code should be removed
     if 'quick' in tests and 'SPARC_FS' in builds:
         builds.remove('SPARC_FS')
-    targets = ['build/%s/tests/%s/%s' % (build, variant, test)
+    targets = ['%s/%s/tests/%s/%s' % (options.build_dir, build, variant, test)
                for build in builds
                for variant in variants
                for test in tests]
 
+def cpu_count():
+    if 'bsd' in sys.platform or sys.platform == 'darwin':
+        try:
+            return int(os.popen('sysctl -n hw.ncpu').read())
+        except ValueError:
+            pass
+    else:
+        try:
+            return os.sysconf('SC_NPROCESSORS_ONLN')
+        except (ValueError, OSError, AttributeError):
+            pass
+
+    raise NotImplementedError('cannot determine number of cpus')
+
 scons_opts = options.scons_opts
 if options.jobs != 1:
+    if options.jobs == 0:
+        options.jobs = cpu_count()
     scons_opts += ' -j %d' % options.jobs
 if options.keep_going:
     scons_opts += ' -k'
 
-system('scons IGNORE_STYLE=True %s %s' % (scons_opts, ' '.join(targets)))
-
-sys.exit(0)
+cmd = 'scons IGNORE_STYLE=True %s %s' % (scons_opts, ' '.join(targets))
+if options.no_exec:
+    print cmd
+else:
+    system(cmd)
+    sys.exit(0)