m5: merge inorder updates

2010-03-27 02:23:00 -04:00 · 2010-03-27 02:23:00 -04:00 · 1c98bc5a56
parent 941399728f 6b293c73fd
commit 1c98bc5a56
25 changed files with 1662 additions and 1607 deletions
--- a/src/arch/alpha/isa/mem.isa
+++ b/src/arch/alpha/isa/mem.isa
@ -275,7 +275,6 @@ def template StoreExecute {{
        if (fault == NoFault) {
            fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                              memAccessFlags, NULL);
-            if (traceData) { traceData->setData(Mem); }
        }

        if (fault == NoFault) {
@ -310,7 +309,6 @@ def template StoreCondExecute {{
        if (fault == NoFault) {
            fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                              memAccessFlags, &write_result);
-            if (traceData) { traceData->setData(Mem); }
        }

        if (fault == NoFault) {
@ -344,7 +342,6 @@ def template StoreInitiateAcc {{
        if (fault == NoFault) {
            fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                              memAccessFlags, NULL);
-            if (traceData) { traceData->setData(Mem); }
        }

        return fault;
@ -478,9 +475,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
    mem_flags = makeList(mem_flags)
    inst_flags = makeList(inst_flags)

-    # add hook to get effective addresses into execution trace output.
-    ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n'
-
    # Some CPU models execute the memory operation as an atomic unit,
    # while others want to separate them into an effective address
    # computation and a memory access operation.  As a result, we need
--- a/src/arch/arm/isa/formats/mem.isa
+++ b/src/arch/arm/isa/formats/mem.isa
@ -172,7 +172,6 @@ def template StoreExecute {{
            if (fault == NoFault) {
                fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                                  memAccessFlags, NULL);
-                if (traceData) { traceData->setData(Mem); }
            }

            if (fault == NoFault) {
@ -204,7 +203,6 @@ def template StoreInitiateAcc {{
            if (fault == NoFault) {
                fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                                  memAccessFlags, NULL);
-                if (traceData) { traceData->setData(Mem); }
            }

            // Need to write back any potential address register update
--- a/src/arch/mips/isa/formats/mem.isa
+++ b/src/arch/mips/isa/formats/mem.isa
@ -305,7 +305,6 @@ def template StoreExecute {{
        if (fault == NoFault) {
            fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                              memAccessFlags, NULL);
-            if (traceData) { traceData->setData(Mem); }
        }

        if (fault == NoFault) {
@ -342,7 +341,6 @@ def template StoreFPExecute {{
        if (fault == NoFault) {
            fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                              memAccessFlags, NULL);
-            if (traceData) { traceData->setData(Mem); }
        }

        if (fault == NoFault) {
@ -377,7 +375,6 @@ def template StoreCondExecute {{
        if (fault == NoFault) {
            fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                              memAccessFlags, &write_result);
-            if (traceData) { traceData->setData(Mem); }
        }

        if (fault == NoFault) {
@ -411,7 +408,6 @@ def template StoreInitiateAcc {{
        if (fault == NoFault) {
            fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                              memAccessFlags, NULL);
-            if (traceData) { traceData->setData(Mem); }
        }

        return fault;
@ -435,8 +431,6 @@ def template StoreCompleteAcc {{

        if (fault == NoFault) {
            %(op_wb)s;
-
-            if (traceData) { traceData->setData(getMemData(xc, pkt)); }
        }

        return fault;
@ -459,8 +453,6 @@ def template StoreCompleteAcc {{

        if (fault == NoFault) {
            %(op_wb)s;
-
-            if (traceData) { traceData->setData(getMemData(xc, pkt)); }
        }

        return fault;
--- a/src/arch/mips/isa/formats/util.isa
+++ b/src/arch/mips/isa/formats/util.isa
@ -38,9 +38,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
    mem_flags = makeList(mem_flags)
    inst_flags = makeList(inst_flags)

-    # add hook to get effective addresses into execution trace output.
-    ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n'
-
    # Some CPU models execute the memory operation as an atomic unit,
    # while others want to separate them into an effective address
    # computation and a memory access operation.  As a result, we need
--- a/src/arch/power/isa/formats/mem.isa
+++ b/src/arch/power/isa/formats/mem.isa
@ -166,7 +166,6 @@ def template StoreExecute {{
        if (fault == NoFault) {
            fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                              memAccessFlags, NULL);
-            if (traceData) { traceData->setData(Mem); }
        }

        if (fault == NoFault) {
@ -196,7 +195,6 @@ def template StoreInitiateAcc {{
        if (fault == NoFault) {
            fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
                              memAccessFlags, NULL);
-            if (traceData) { traceData->setData(Mem); }
        }

        // Need to write back any potential address register update
--- a/src/arch/power/isa/formats/util.isa
+++ b/src/arch/power/isa/formats/util.isa
@ -97,9 +97,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
    mem_flags = makeList(mem_flags)
    inst_flags = makeList(inst_flags)

-    # add hook to get effective addresses into execution trace output.
-    ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n'
-
    # Generate InstObjParams for the memory access.
    iop = InstObjParams(name, Name, base_class,
                        {'ea_code': ea_code,
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@ -443,6 +443,10 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
    //The size of the data we're trying to read.
    int dataSize = sizeof(T);

+    if (inst->traceData) {
+        inst->traceData->setAddr(addr);
+    }
+
    if (inst->split2ndAccess) {     
        dataSize = inst->split2ndSize;
        cache_req->splitAccess = true;        
@ -541,6 +545,11 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
    //The size of the data we're trying to read.
    int dataSize = sizeof(T);

+    if (inst->traceData) {
+        inst->traceData->setAddr(addr);
+        inst->traceData->setData(data);
+    }
+
    if (inst->split2ndAccess) {     
        dataSize = inst->split2ndSize;
        cache_req->splitAccess = true;        
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@ -351,10 +351,6 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
            }
        }

-        // This will need a new way to tell if it has a dcache attached.
-        if (req->isUncacheable())
-            recordEvent("Uncached Read");
-
        //If there's a fault, return it
        if (fault != NoFault) {
            if (req->isPrefetch()) {
@ -451,6 +447,7 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)

    if (traceData) {
        traceData->setAddr(addr);
+        traceData->setData(data);
    }

    //The block size of our peer.
@ -522,20 +519,10 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
            }
        }

-        // This will need a new way to tell if it's hooked up to a cache or not.
-        if (req->isUncacheable())
-            recordEvent("Uncached Write");
-
        //If there's a fault or we don't need to access a second cache line,
        //stop now.
        if (fault != NoFault || secondAddr <= addr)
        {
-            // If the write needs to have a fault on the access, consider
-            // calling changeStatus() and changing it to "bad addr write"
-            // or something.
-            if (traceData) {
-                traceData->setData(gtoh(data));
-            }
            if (req->isLocked() && fault == NoFault) {
                assert(locked);
                locked = false;
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@ -205,6 +205,27 @@ change_thread_state(ThreadID tid, int activate, int priority)
 {
 }

+void
+BaseSimpleCPU::prefetch(Addr addr, unsigned flags)
+{
+    if (traceData) {
+        traceData->setAddr(addr);
+    }
+
+    // need to do this...
+}
+
+void
+BaseSimpleCPU::writeHint(Addr addr, int size, unsigned flags)
+{
+    if (traceData) {
+        traceData->setAddr(addr);
+    }
+
+    // need to do this...
+}
+
+
 Fault
 BaseSimpleCPU::copySrcTranslate(Addr src)
 {
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@ -232,16 +232,8 @@ class BaseSimpleCPU : public BaseCPU
    Addr getEA()        { panic("BaseSimpleCPU::getEA() not implemented\n");
        M5_DUMMY_RETURN}

-    void prefetch(Addr addr, unsigned flags)
-    {
-        // need to do this...
-    }
-
-    void writeHint(Addr addr, int size, unsigned flags)
-    {
-        // need to do this...
-    }
-
+    void prefetch(Addr addr, unsigned flags);
+    void writeHint(Addr addr, int size, unsigned flags);

    Fault copySrcTranslate(Addr src);

--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@ -426,16 +426,16 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
    int data_size = sizeof(T);
    BaseTLB::Mode mode = BaseTLB::Read;

+    if (traceData) {
+        traceData->setAddr(addr);
+    }
+
    RequestPtr req  = new Request(asid, addr, data_size,
                                  flags, pc, _cpuId, tid);

    Addr split_addr = roundDown(addr + data_size - 1, block_size);
    assert(split_addr <= addr || split_addr - addr < block_size);

-    // This will need a new way to tell if it's hooked up to a cache or not.
-    if (req->isUncacheable())
-        recordEvent("Uncached Write");
-
    _status = DTBWaitResponse;
    if (split_addr > addr) {
        RequestPtr req1, req2;
@ -460,11 +460,6 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
        thread->dtb->translateTiming(req, tc, translation, mode);
    }

-    if (traceData) {
-        traceData->setData(data);
-        traceData->setAddr(addr);
-    }
-
    return NoFault;
 }

@ -548,16 +543,17 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
    int data_size = sizeof(T);
    BaseTLB::Mode mode = BaseTLB::Write;

+    if (traceData) {
+        traceData->setAddr(addr);
+        traceData->setData(data);
+    }
+
    RequestPtr req = new Request(asid, addr, data_size,
                                 flags, pc, _cpuId, tid);

    Addr split_addr = roundDown(addr + data_size - 1, block_size);
    assert(split_addr <= addr || split_addr - addr < block_size);

-    // This will need a new way to tell if it's hooked up to a cache or not.
-    if (req->isUncacheable())
-        recordEvent("Uncached Write");
-
    T *dataP = new T;
    *dataP = TheISA::htog(data);
    _status = DTBWaitResponse;
@ -584,13 +580,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
        thread->dtb->translateTiming(req, tc, translation, mode);
    }

-    if (traceData) {
-        traceData->setAddr(req->getVaddr());
-        traceData->setData(data);
-    }
-
-    // If the write needs to have a fault on the access, consider calling
-    // changeStatus() and changing it to "bad addr write" or something.
+    // Translation faults will be returned via finishTranslation()
    return NoFault;
 }

--- a/src/cpu/translation.hh
+++ b/src/cpu/translation.hh
@ -35,6 +35,16 @@

 #include "sim/tlb.hh"

+/**
+ * This class captures the state of an address translation.  A translation
+ * can be split in two if the ISA supports it and the memory access crosses
+ * a page boundary.  In this case, this class is shared by two data
+ * translations (below).  Otherwise it is used by a single data translation
+ * class.  When each part of the translation is finished, the finish
+ * function is called which will indicate whether the whole translation is
+ * completed or not.  There are also functions for accessing parts of the
+ * translation state which deal with the possible split correctly.
+ */
 class WholeTranslationState
 {
  protected:
@ -50,7 +60,10 @@ class WholeTranslationState
    uint64_t *res;
    BaseTLB::Mode mode;

-    /** Single translation state. */
+    /**
+     * Single translation state.  We set the number of outstanding
+     * translations to one and indicate that it is not split.
+     */
    WholeTranslationState(RequestPtr _req, uint8_t *_data, uint64_t *_res,
                          BaseTLB::Mode _mode)
        : outstanding(1), isSplit(false), mainReq(_req), sreqLow(NULL),
@ -60,7 +73,11 @@ class WholeTranslationState
        assert(mode == BaseTLB::Read || mode == BaseTLB::Write);
    }

-    /** Split translation state. */
+    /**
+     * Split translation state.  We copy all state into this class, set the
+     * number of outstanding translations to two and then mark this as a
+     * split translation.
+     */
    WholeTranslationState(RequestPtr _req, RequestPtr _sreqLow,
                          RequestPtr _sreqHigh, uint8_t *_data, uint64_t *_res,
                          BaseTLB::Mode _mode)
@ -71,6 +88,13 @@ class WholeTranslationState
        assert(mode == BaseTLB::Read || mode == BaseTLB::Write);
    }

+    /**
+     * Finish part of a translation.  If there is only one request then this
+     * translation is completed.  If the request has been split in two then
+     * the outstanding count determines whether the translation is complete.
+     * In this case, flags from the split request are copied to the main
+     * request to make it easier to access them later on.
+     */
    bool
    finish(Fault fault, int index)
    {
@ -89,6 +113,10 @@ class WholeTranslationState
        return outstanding == 0;
    }

+    /**
+     * Determine whether this translation produced a fault.  Both parts of the
+     * translation must be checked if this is a split translation.
+     */
    Fault
    getFault() const
    {
@ -102,36 +130,54 @@ class WholeTranslationState
            return NoFault;
    }

+    /** Remove all faults from the translation. */
    void
    setNoFault()
    {
        faults[0] = faults[1] = NoFault;
    }

+    /**
+     * Check if this request is uncacheable.  We only need to check the main
+     * request because the flags will have been copied here on a split
+     * translation.
+     */
    bool
    isUncacheable() const
    {
        return mainReq->isUncacheable();
    }

+    /**
+     * Check if this request is a prefetch.  We only need to check the main
+     * request because the flags will have been copied here on a split
+     * translation.
+     */
    bool
    isPrefetch() const
    {
        return mainReq->isPrefetch();
    }

+    /** Get the physical address of this request. */
    Addr
    getPaddr() const
    {
        return mainReq->getPaddr();
    }

+    /**
+     * Get the flags associated with this request.  We only need to access
+     * the main request because the flags will have been copied here on a
+     * split translation.
+     */
    unsigned
    getFlags()
    {
        return mainReq->getFlags();
    }

+    /** Delete all requests that make up this translation. */
    void
    deleteReqs()
    {
@ -143,6 +189,16 @@ class WholeTranslationState
    }
 };

+
+/**
+ * This class represents part of a data address translation.  All state for
+ * the translation is held in WholeTranslationState (above).  Therefore this
+ * class does not need to know whether the translation is split or not.  The
+ * index variable determines this but is simply passed on to the state class.
+ * When this part of the translation is completed, finish is called.  If the
+ * translation state class indicate that the whole translation is complete
+ * then the execution context is informed.
+ */
 template <class ExecContext>
 class DataTranslation : public BaseTLB::Translation
 {
@ -163,6 +219,10 @@ class DataTranslation : public BaseTLB::Translation
    {
    }

+    /**
+     * Finish this part of the translation and indicate that the whole
+     * translation is complete if the state says so.
+     */
    void
    finish(Fault fault, RequestPtr req, ThreadContext *tc,
           BaseTLB::Mode mode)
--- a/src/mem/ruby/profiler/AccessTraceForAddress.cc
+++ b/src/mem/ruby/profiler/AccessTraceForAddress.cc
@ -1,4 +1,3 @@
-
 /*
 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
 * All rights reserved.
@ -27,13 +26,8 @@
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-/*
- * $Id$
- *
- */
-
-#include "mem/ruby/profiler/AccessTraceForAddress.hh"
 #include "mem/ruby/common/Histogram.hh"
+#include "mem/ruby/profiler/AccessTraceForAddress.hh"

 AccessTraceForAddress::AccessTraceForAddress()
 {
@ -60,7 +54,8 @@ AccessTraceForAddress::~AccessTraceForAddress()
    }
 }

-void AccessTraceForAddress::print(ostream& out) const
+void
+AccessTraceForAddress::print(ostream& out) const
 {
    out << m_addr;

@ -79,7 +74,10 @@ void AccessTraceForAddress::print(ostream& out) const
    }
 }

-void AccessTraceForAddress::update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, bool sharing_miss)
+void
+AccessTraceForAddress::update(CacheRequestType type,
+                              AccessModeType access_mode, NodeID cpu,
+                              bool sharing_miss)
 {
    m_touched_by.add(cpu);
    m_total++;
@ -102,7 +100,8 @@ void AccessTraceForAddress::update(CacheRequestType type, AccessModeType access_
    }
 }

-int AccessTraceForAddress::getTotal() const
+int
+AccessTraceForAddress::getTotal() const
 {
    if (m_histogram_ptr == NULL) {
        return m_total;
@ -111,7 +110,8 @@ int AccessTraceForAddress::getTotal() const
    }
 }

-void AccessTraceForAddress::addSample(int value)
+void
+AccessTraceForAddress::addSample(int value)
 {
    assert(m_total == 0);
    if (m_histogram_ptr == NULL) {
@ -119,8 +119,3 @@ void AccessTraceForAddress::addSample(int value)
    }
    m_histogram_ptr->add(value);
 }
-
-bool node_less_then_eq(const AccessTraceForAddress* n1, const AccessTraceForAddress* n2)
-{
-  return (n1->getTotal() > n2->getTotal());
-}
--- a/src/mem/ruby/profiler/AccessTraceForAddress.hh
+++ b/src/mem/ruby/profiler/AccessTraceForAddress.hh
@ -1,4 +1,3 @@
-
 /*
 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
 * All rights reserved.
@ -27,36 +26,27 @@
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-/*
- * $Id$
- *
- * Description:
- *
- */
+#ifndef __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__
+#define __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__

-#ifndef ACCESSTRACEFORADDRESS_H
-#define ACCESSTRACEFORADDRESS_H
-
-#include "mem/ruby/common/Global.hh"
-#include "mem/ruby/common/Address.hh"
-#include "mem/protocol/CacheRequestType.hh"
 #include "mem/protocol/AccessModeType.hh"
-#include "mem/ruby/system/NodeID.hh"
+#include "mem/protocol/CacheRequestType.hh"
+#include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/Global.hh"
 #include "mem/ruby/common/Set.hh"
+#include "mem/ruby/system/NodeID.hh"
+
 class Histogram;

-class AccessTraceForAddress {
-public:
-  // Constructors
+class AccessTraceForAddress
+{
+  public:
    AccessTraceForAddress();
    explicit AccessTraceForAddress(const Address& addr);
-
-  // Destructor
    ~AccessTraceForAddress();

-  // Public Methods
-
-  void update(CacheRequestType type, AccessModeType access_mode, NodeID cpu, bool sharing_miss);
+    void update(CacheRequestType type, AccessModeType access_mode, NodeID cpu,
+                bool sharing_miss);
    int getTotal() const;
    int getSharing() const { return m_sharing; }
    int getTouchedBy() const { return m_touched_by.count(); }
@ -64,15 +54,8 @@ public:
    void addSample(int value);

    void print(ostream& out) const;
-private:
-  // Private Methods
-
-  // Private copy constructor and assignment operator
-  // AccessTraceForAddress(const AccessTraceForAddress& obj);
-  // AccessTraceForAddress& operator=(const AccessTraceForAddress& obj);
-
-  // Data Members (m_ prefix)

+  private:
    Address m_addr;
    uint64 m_loads;
    uint64 m_stores;
@ -84,20 +67,19 @@ private:
    Histogram* m_histogram_ptr;
 };

-bool node_less_then_eq(const AccessTraceForAddress* n1, const AccessTraceForAddress* n2);
+inline bool
+node_less_then_eq(const AccessTraceForAddress* n1,
+                  const AccessTraceForAddress* n2)
+{
+    return n1->getTotal() > n2->getTotal();
+}

-// Output operator declaration
-ostream& operator<<(ostream& out, const AccessTraceForAddress& obj);
-
-// ******************* Definitions *******************
-
-// Output operator definition
-extern inline
-ostream& operator<<(ostream& out, const AccessTraceForAddress& obj)
+inline ostream&
+operator<<(ostream& out, const AccessTraceForAddress& obj)
 {
    obj.print(out);
    out << flush;
    return out;
 }

-#endif //ACCESSTRACEFORADDRESS_H
+#endif // __MEM_RUBY_PROFILER_ACCESSTRACEFORADDRESS_HH__
--- a/src/mem/ruby/profiler/AddressProfiler.cc
+++ b/src/mem/ruby/profiler/AddressProfiler.cc
@ -1,4 +1,3 @@
-
 /*
 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
 * All rights reserved.
@ -27,39 +26,108 @@
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-/*
- * AddressProfiler.cc
- *
- * Description: See AddressProfiler.hh
- *
- * $Id$
- *
- */
-
-#include "mem/ruby/profiler/AddressProfiler.hh"
+#include "mem/gems_common/Map.hh"
+#include "mem/gems_common/PrioHeap.hh"
 #include "mem/protocol/CacheMsg.hh"
 #include "mem/ruby/profiler/AccessTraceForAddress.hh"
-#include "mem/gems_common/PrioHeap.hh"
-#include "mem/gems_common/Map.hh"
-#include "mem/ruby/system/System.hh"
+#include "mem/ruby/profiler/AddressProfiler.hh"
 #include "mem/ruby/profiler/Profiler.hh"
+#include "mem/ruby/system/System.hh"
+
+typedef AddressProfiler::AddressMap AddressMap;

 // Helper functions
-static AccessTraceForAddress& lookupTraceForAddress(const Address& addr, 
-                                                    Map<Address, 
-                                                    AccessTraceForAddress>* record_map);
+AccessTraceForAddress&
+lookupTraceForAddress(const Address& addr, AddressMap* record_map)
+{
+    if (!record_map->exist(addr)) {
+        record_map->add(addr, AccessTraceForAddress(addr));
+    }
+    return record_map->lookup(addr);
+}

-static void printSorted(ostream& out, 
-                        int num_of_sequencers,
-                        const Map<Address, AccessTraceForAddress>* record_map, 
-                        string description);
+void
+printSorted(ostream& out, int num_of_sequencers, const AddressMap* record_map, 
+            string description)
+{
+    const int records_printed = 100;
+
+    uint64 misses = 0;
+    PrioHeap<AccessTraceForAddress*> heap;
+    Vector<Address> keys = record_map->keys();
+    for (int i = 0; i < keys.size(); i++) {
+        AccessTraceForAddress* record = &(record_map->lookup(keys[i]));
+        misses += record->getTotal();
+        heap.insert(record);
+    }
+
+    out << "Total_entries_" << description << ": " << keys.size() << endl;
+    if (g_system_ptr->getProfiler()->getAllInstructions())
+        out << "Total_Instructions_" << description << ": " << misses << endl;
+    else
+        out << "Total_data_misses_" << description << ": " << misses << endl;
+
+    out << "total | load store atomic | user supervisor | sharing | touched-by"
+        << endl;
+
+    Histogram remaining_records(1, 100);
+    Histogram all_records(1, 100);
+    Histogram remaining_records_log(-1);
+    Histogram all_records_log(-1);
+
+    // Allows us to track how many lines where touched by n processors
+    Vector<int64> m_touched_vec;
+    Vector<int64> m_touched_weighted_vec;
+    m_touched_vec.setSize(num_of_sequencers+1);
+    m_touched_weighted_vec.setSize(num_of_sequencers+1);
+    for (int i = 0; i < m_touched_vec.size(); i++) {
+        m_touched_vec[i] = 0;
+        m_touched_weighted_vec[i] = 0;
+    }
+
+    int counter = 0;
+    while (heap.size() > 0 && counter < records_printed) {
+        AccessTraceForAddress* record = heap.extractMin();
+        double percent = 100.0 * (record->getTotal() / double(misses));
+        out << description << " | " << percent << " % " << *record << endl;
+        all_records.add(record->getTotal());
+        all_records_log.add(record->getTotal());
+        counter++;
+        m_touched_vec[record->getTouchedBy()]++;
+        m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal();
+    }
+
+    while (heap.size() > 0) {
+        AccessTraceForAddress* record = heap.extractMin();
+        all_records.add(record->getTotal());
+        remaining_records.add(record->getTotal());
+        all_records_log.add(record->getTotal());
+        remaining_records_log.add(record->getTotal());
+        m_touched_vec[record->getTouchedBy()]++;
+        m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal();
+    }
+    out << endl;
+    out << "all_records_" << description << ": "
+        << all_records << endl
+        << "all_records_log_" << description << ": "
+        << all_records_log << endl
+        << "remaining_records_" << description << ": "
+        << remaining_records << endl
+        << "remaining_records_log_" << description << ": "
+        << remaining_records_log << endl
+        << "touched_by_" << description << ": "
+        << m_touched_vec << endl
+        << "touched_by_weighted_" << description << ": "
+        << m_touched_weighted_vec << endl
+        << endl;
+}

 AddressProfiler::AddressProfiler(int num_of_sequencers)
 {
-  m_dataAccessTrace = new Map<Address, AccessTraceForAddress>;
-  m_macroBlockAccessTrace = new Map<Address, AccessTraceForAddress>;
-  m_programCounterAccessTrace = new Map<Address, AccessTraceForAddress>;
-  m_retryProfileMap = new Map<Address, AccessTraceForAddress>;
+    m_dataAccessTrace = new AddressMap;
+    m_macroBlockAccessTrace = new AddressMap;
+    m_programCounterAccessTrace = new AddressMap;
+    m_retryProfileMap = new AddressMap;
    m_num_of_sequencers = num_of_sequencers;
    clearStats();
 }
@ -72,14 +140,20 @@ AddressProfiler::~AddressProfiler()
    delete m_retryProfileMap;
 }

-void AddressProfiler::setHotLines(bool hot_lines){
+void
+AddressProfiler::setHotLines(bool hot_lines)
+{
    m_hot_lines = hot_lines;
 }
-void AddressProfiler::setAllInstructions(bool all_instructions){
+
+void
+AddressProfiler::setAllInstructions(bool all_instructions)
+{
    m_all_instructions = all_instructions;
 }

-void AddressProfiler::printStats(ostream& out) const
+void
+AddressProfiler::printStats(ostream& out) const
 {
    if (m_hot_lines) {
        out << endl;
@ -95,26 +169,30 @@ void AddressProfiler::printStats(ostream& out) const
        out << "Hot Data Blocks" << endl;
        out << "---------------" << endl;
        out << endl;
-    printSorted(out, m_num_of_sequencers, m_dataAccessTrace, "block_address");
+        printSorted(out, m_num_of_sequencers, m_dataAccessTrace,
+                    "block_address");

        out << endl;
        out << "Hot MacroData Blocks" << endl;
        out << "--------------------" << endl;
        out << endl;
-    printSorted(out, m_num_of_sequencers, m_macroBlockAccessTrace, "macroblock_address");
+        printSorted(out, m_num_of_sequencers, m_macroBlockAccessTrace,
+                    "macroblock_address");

        out << "Hot Instructions" << endl;
        out << "----------------" << endl;
        out << endl;
-    printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, "pc_address");
+        printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace,
+                    "pc_address");
    }

-  if (m_all_instructions){
+    if (m_all_instructions) {
        out << endl;
        out << "All Instructions Profile:" << endl;
        out << "-------------------------" << endl;
        out << endl;
-    printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace, "pc_address");
+        printSorted(out, m_num_of_sequencers, m_programCounterAccessTrace,
+                    "pc_address");
        out << endl;
    }

@ -130,13 +208,14 @@ void AddressProfiler::printStats(ostream& out) const
        m_retryProfileHisto.printPercent(out);
        out << endl;

-    printSorted(out, m_num_of_sequencers, m_retryProfileMap, "block_address");
+        printSorted(out, m_num_of_sequencers, m_retryProfileMap,
+                    "block_address");
        out << endl;
    }
-
 }

-void AddressProfiler::clearStats()
+void
+AddressProfiler::clearStats()
 {
    // Clear the maps
    m_sharing_miss_counter = 0;
@ -151,7 +230,10 @@ void AddressProfiler::clearStats()
    m_gets_sharing_histogram.clear();
 }

-void AddressProfiler::profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor)
+void
+AddressProfiler::profileGetX(const Address& datablock, const Address& PC,
+                             const Set& owner, const Set& sharers,
+                             NodeID requestor)
 {
    Set indirection_set;
    indirection_set.addSet(sharers);
@ -162,10 +244,14 @@ void AddressProfiler::profileGetX(const Address& datablock, const Address& PC, c
    m_getx_sharing_histogram.add(num_indirections);
    bool indirection_miss = (num_indirections > 0);

-  addTraceSample(datablock, PC, CacheRequestType_ST, AccessModeType(0), requestor, indirection_miss);
+    addTraceSample(datablock, PC, CacheRequestType_ST, AccessModeType(0),
+                   requestor, indirection_miss);
 }

-void AddressProfiler::profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor)
+void
+AddressProfiler::profileGetS(const Address& datablock, const Address& PC,
+                             const Set& owner, const Set& sharers,
+                             NodeID requestor)
 {
    Set indirection_set;
    indirection_set.addSet(owner);
@ -175,10 +261,15 @@ void AddressProfiler::profileGetS(const Address& datablock, const Address& PC, c
    m_gets_sharing_histogram.add(num_indirections);
    bool indirection_miss = (num_indirections > 0);

-  addTraceSample(datablock, PC, CacheRequestType_LD, AccessModeType(0), requestor, indirection_miss);
+    addTraceSample(datablock, PC, CacheRequestType_LD, AccessModeType(0),
+                   requestor, indirection_miss);
 }

-void AddressProfiler::addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss)
+void
+AddressProfiler::addTraceSample(Address data_addr, Address pc_addr,
+                                CacheRequestType type,
+                                AccessModeType access_mode, NodeID id,
+                                bool sharing_miss)
 {
    if (m_all_instructions) {
        if (sharing_miss) {
@ -187,24 +278,33 @@ void AddressProfiler::addTraceSample(Address data_addr, Address pc_addr, CacheRe

        // record data address trace info
        data_addr.makeLineAddress();
-    lookupTraceForAddress(data_addr, m_dataAccessTrace).update(type, access_mode, id, sharing_miss);
+        lookupTraceForAddress(data_addr, m_dataAccessTrace).
+            update(type, access_mode, id, sharing_miss);

        // record macro data address trace info
-    Address macro_addr(data_addr.maskLowOrderBits(10)); // 6 for datablock, 4 to make it 16x more coarse
-    lookupTraceForAddress(macro_addr, m_macroBlockAccessTrace).update(type, access_mode, id, sharing_miss);
+
+        // 6 for datablock, 4 to make it 16x more coarse
+        Address macro_addr(data_addr.maskLowOrderBits(10));
+        lookupTraceForAddress(macro_addr, m_macroBlockAccessTrace).
+            update(type, access_mode, id, sharing_miss);

        // record program counter address trace info
-    lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).update(type, access_mode, id, sharing_miss);
+        lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).
+            update(type, access_mode, id, sharing_miss);
    }

    if (m_all_instructions) {
-    // This code is used if the address profiler is an all-instructions profiler
-    // record program counter address trace info
-    lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).update(type, access_mode, id, sharing_miss);
+        // This code is used if the address profiler is an
+        // all-instructions profiler record program counter address
+        // trace info
+        lookupTraceForAddress(pc_addr, m_programCounterAccessTrace).
+            update(type, access_mode, id, sharing_miss);
    }
 }

-void AddressProfiler::profileRetry(const Address& data_addr, AccessType type, int count)
+void
+AddressProfiler::profileRetry(const Address& data_addr, AccessType type,
+                              int count)
 {
    m_retryProfileHisto.add(count);
    if (type == AccessType_Read) {
@ -216,83 +316,3 @@ void AddressProfiler::profileRetry(const Address& data_addr, AccessType type, in
        lookupTraceForAddress(data_addr, m_retryProfileMap).addSample(count);
    }
 }
-
-// ***** Normal Functions ******
-
-static void printSorted(ostream& out, 
-                        int num_of_sequencers,
-                        const Map<Address, AccessTraceForAddress>* record_map, 
-                        string description)
-{
-  const int records_printed = 100;
-
-  uint64 misses = 0;
-  PrioHeap<AccessTraceForAddress*> heap;
-  Vector<Address> keys = record_map->keys();
-  for(int i=0; i<keys.size(); i++){
-    AccessTraceForAddress* record = &(record_map->lookup(keys[i]));
-    misses += record->getTotal();
-    heap.insert(record);
-  }
-
-  out << "Total_entries_" << description << ": " << keys.size() << endl;
-  if (g_system_ptr->getProfiler()->getAllInstructions())
-    out << "Total_Instructions_" << description << ": " << misses << endl;
-  else
-    out << "Total_data_misses_" << description << ": " << misses << endl;
-
-  out << "total | load store atomic | user supervisor | sharing | touched-by" << endl;
-
-  Histogram remaining_records(1, 100);
-  Histogram all_records(1, 100);
-  Histogram remaining_records_log(-1);
-  Histogram all_records_log(-1);
-
-  // Allows us to track how many lines where touched by n processors
-  Vector<int64> m_touched_vec;
-  Vector<int64> m_touched_weighted_vec;
-  m_touched_vec.setSize(num_of_sequencers+1);
-  m_touched_weighted_vec.setSize(num_of_sequencers+1);
-  for (int i=0; i<m_touched_vec.size(); i++) {
-    m_touched_vec[i] = 0;
-    m_touched_weighted_vec[i] = 0;
-  }
-
-  int counter = 0;
-  while((heap.size() > 0) && (counter < records_printed)) {
-    AccessTraceForAddress* record = heap.extractMin();
-    double percent = 100.0*(record->getTotal()/double(misses));
-    out << description << " | " << percent << " % " << *record << endl;
-    all_records.add(record->getTotal());
-    all_records_log.add(record->getTotal());
-    counter++;
-    m_touched_vec[record->getTouchedBy()]++;
-    m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal();
-  }
-
-  while(heap.size() > 0) {
-    AccessTraceForAddress* record = heap.extractMin();
-    all_records.add(record->getTotal());
-    remaining_records.add(record->getTotal());
-    all_records_log.add(record->getTotal());
-    remaining_records_log.add(record->getTotal());
-    m_touched_vec[record->getTouchedBy()]++;
-    m_touched_weighted_vec[record->getTouchedBy()] += record->getTotal();
-  }
-  out << endl;
-  out << "all_records_" << description << ": " << all_records << endl;
-  out << "all_records_log_" << description << ": " << all_records_log << endl;
-  out << "remaining_records_" << description << ": " << remaining_records << endl;
-  out << "remaining_records_log_" << description << ": " << remaining_records_log << endl;
-  out << "touched_by_" << description << ": " << m_touched_vec << endl;
-  out << "touched_by_weighted_" << description << ": " << m_touched_weighted_vec << endl;
-  out << endl;
-}
-
-static AccessTraceForAddress& lookupTraceForAddress(const Address& addr, Map<Address, AccessTraceForAddress>* record_map)
-{
-  if(record_map->exist(addr) == false){
-    record_map->add(addr, AccessTraceForAddress(addr));
-  }
-  return record_map->lookup(addr);
-}
--- a/src/mem/ruby/profiler/AddressProfiler.hh
+++ b/src/mem/ruby/profiler/AddressProfiler.hh
@ -1,4 +1,3 @@
-
 /*
 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
 * All rights reserved.
@ -27,89 +26,77 @@
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-/*
- * AddressProfiler.hh
- *
- * Description:
- *
- * $Id$
- *
- */
+#ifndef __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__
+#define __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__

-#ifndef ADDRESSPROFILER_H
-#define ADDRESSPROFILER_H
-
-#include "mem/ruby/common/Global.hh"
-#include "mem/ruby/system/NodeID.hh"
-#include "mem/ruby/common/Histogram.hh"
-#include "mem/ruby/common/Address.hh"
-#include "mem/protocol/CacheMsg.hh"
 #include "mem/protocol/AccessType.hh"
+#include "mem/protocol/CacheMsg.hh"
+#include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/Global.hh"
+#include "mem/ruby/common/Histogram.hh"
+#include "mem/ruby/system/NodeID.hh"

 class AccessTraceForAddress;
 class Set;
 template <class KEY_TYPE, class VALUE_TYPE> class Map;

-class AddressProfiler {
-public:
-  // Constructors
-  AddressProfiler(int num_of_sequencers);
+class AddressProfiler
+{
+  public:
+    typedef Map<Address, AccessTraceForAddress> AddressMap;

-  // Destructor
+  public:
+    AddressProfiler(int num_of_sequencers);
    ~AddressProfiler();

-  // Public Methods
    void printStats(ostream& out) const;
    void clearStats();

-  void addTraceSample(Address data_addr, Address pc_addr, CacheRequestType type, AccessModeType access_mode, NodeID id, bool sharing_miss);
+    void addTraceSample(Address data_addr, Address pc_addr,
+                        CacheRequestType type, AccessModeType access_mode,
+                        NodeID id, bool sharing_miss);
    void profileRetry(const Address& data_addr, AccessType type, int count);
-  void profileGetX(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor);
-  void profileGetS(const Address& datablock, const Address& PC, const Set& owner, const Set& sharers, NodeID requestor);
+    void profileGetX(const Address& datablock, const Address& PC,
+                     const Set& owner, const Set& sharers, NodeID requestor);
+    void profileGetS(const Address& datablock, const Address& PC,
+                     const Set& owner, const Set& sharers, NodeID requestor);

    void print(ostream& out) const;

    //added by SS
    void setHotLines(bool hot_lines);
    void setAllInstructions(bool all_instructions);
-private:
-  // Private Methods

+  private:
    // Private copy constructor and assignment operator
    AddressProfiler(const AddressProfiler& obj);
    AddressProfiler& operator=(const AddressProfiler& obj);

-  // Data Members (m_ prefix)
    int64 m_sharing_miss_counter;

-  Map<Address, AccessTraceForAddress>* m_dataAccessTrace;
-  Map<Address, AccessTraceForAddress>* m_macroBlockAccessTrace;
-  Map<Address, AccessTraceForAddress>* m_programCounterAccessTrace;
-  Map<Address, AccessTraceForAddress>* m_retryProfileMap;
+    AddressMap* m_dataAccessTrace;
+    AddressMap* m_macroBlockAccessTrace;
+    AddressMap* m_programCounterAccessTrace;
+    AddressMap* m_retryProfileMap;
    Histogram m_retryProfileHisto;
    Histogram m_retryProfileHistoWrite;
    Histogram m_retryProfileHistoRead;
    Histogram m_getx_sharing_histogram;
    Histogram m_gets_sharing_histogram;
-//added by SS
+
+    //added by SS
    bool m_hot_lines;
    bool m_all_instructions;

    int m_num_of_sequencers;
 };

-// Output operator declaration
-ostream& operator<<(ostream& out, const AddressProfiler& obj);
-
-// ******************* Definitions *******************
-
-// Output operator definition
-extern inline
-ostream& operator<<(ostream& out, const AddressProfiler& obj)
+inline ostream&
+operator<<(ostream& out, const AddressProfiler& obj)
 {
    obj.print(out);
    out << flush;
    return out;
 }

-#endif //ADDRESSPROFILER_H
+#endif // __MEM_RUBY_PROFILER_ADDRESSPROFILER_HH__
--- a/src/mem/ruby/profiler/CacheProfiler.cc
+++ b/src/mem/ruby/profiler/CacheProfiler.cc
@ -1,4 +1,3 @@
-
 /*
 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
 * All rights reserved.
@ -27,21 +26,12 @@
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-/*
- * CacheProfiler.C
- *
- * Description: See CacheProfiler.hh
- *
- * $Id$
- *
- */
-
-#include "mem/ruby/profiler/CacheProfiler.hh"
-#include "mem/ruby/profiler/AccessTraceForAddress.hh"
 #include "mem/gems_common/PrioHeap.hh"
-#include "mem/ruby/system/System.hh"
-#include "mem/ruby/profiler/Profiler.hh"
 #include "mem/gems_common/Vector.hh"
+#include "mem/ruby/profiler/AccessTraceForAddress.hh"
+#include "mem/ruby/profiler/CacheProfiler.hh"
+#include "mem/ruby/profiler/Profiler.hh"
+#include "mem/ruby/system/System.hh"

 CacheProfiler::CacheProfiler(const string& description)
 {
@ -57,7 +47,8 @@ CacheProfiler::~CacheProfiler()
    delete m_requestTypeVec_ptr;
 }

-void CacheProfiler::printStats(ostream& out) const
+void
+CacheProfiler::printStats(ostream& out) const
 {
    out << "Cache Stats: " << m_description << endl;
    string description = "  " + m_description;
@ -71,39 +62,45 @@ void CacheProfiler::printStats(ostream& out) const

    int requests = 0;

-  for(int i=0; i<int(CacheRequestType_NUM); i++) {
+    for (int i = 0; i < int(CacheRequestType_NUM); i++) {
        requests += m_requestTypeVec_ptr->ref(i);
    }

    assert(m_misses == requests);

    if (requests > 0) {
-    for(int i=0; i<int(CacheRequestType_NUM); i++){
+        for (int i = 0; i < int(CacheRequestType_NUM); i++) {
            if (m_requestTypeVec_ptr->ref(i) > 0) {
-        out << description << "_request_type_" << CacheRequestType_to_string(CacheRequestType(i)) << ":   "
-            << (100.0 * double((m_requestTypeVec_ptr->ref(i)))) / double(requests)
+                out << description << "_request_type_"
+                    << CacheRequestType_to_string(CacheRequestType(i))
+                    << ":   "
+                    << 100.0 * (double)m_requestTypeVec_ptr->ref(i) /
+                    (double)requests
                    << "%" << endl;
            }
        }

        out << endl;

-    for(int i=0; i<AccessModeType_NUM; i++){
+        for (int i = 0; i < AccessModeType_NUM; i++){
            if (m_accessModeTypeHistogram[i] > 0) {
-        out << description << "_access_mode_type_" << (AccessModeType) i << ":   " << m_accessModeTypeHistogram[i]
-            << "    " << (100.0 * m_accessModeTypeHistogram[i]) / requests << "%" << endl;
+                out << description << "_access_mode_type_"
+                    << (AccessModeType) i << ":   "
+                    << m_accessModeTypeHistogram[i] << "    "
+                    << 100.0 * m_accessModeTypeHistogram[i] / requests
+                    << "%" << endl;
            }
        }
    }

    out << description << "_request_size: " << m_requestSize << endl;
    out << endl;
-
 }

-void CacheProfiler::clearStats()
+void
+CacheProfiler::clearStats()
 {
-  for(int i=0; i<int(CacheRequestType_NUM); i++) {
+    for (int i = 0; i < int(CacheRequestType_NUM); i++) {
        m_requestTypeVec_ptr->ref(i) = 0;
    }
    m_requestSize.clear();
@ -112,12 +109,15 @@ void CacheProfiler::clearStats()
    m_prefetches = 0;
    m_sw_prefetches = 0;
    m_hw_prefetches = 0;
-  for(int i=0; i<AccessModeType_NUM; i++){
+    for (int i = 0; i < AccessModeType_NUM; i++) {
        m_accessModeTypeHistogram[i] = 0;
    }
 }

-void CacheProfiler::addStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit)
+void
+CacheProfiler::addStatSample(CacheRequestType requestType,
+                             AccessModeType type, int msgSize,
+                             PrefetchBit pfBit)
 {
    m_misses++;

@ -130,7 +130,8 @@ void CacheProfiler::addStatSample(CacheRequestType requestType, AccessModeType t
    } else if (pfBit == PrefetchBit_Yes) {
        m_prefetches++;
        m_sw_prefetches++;
-  } else { // must be L1_HW || L2_HW prefetch
+    } else {
+        // must be L1_HW || L2_HW prefetch
        m_prefetches++;
        m_hw_prefetches++;
    }
--- a/src/mem/ruby/profiler/CacheProfiler.hh
+++ b/src/mem/ruby/profiler/CacheProfiler.hh
@ -1,4 +1,3 @@
-
 /*
 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
 * All rights reserved.
@ -27,53 +26,40 @@
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-/*
- * CacheProfiler.hh
- *
- * Description:
- *
- * $Id$
- *
- */
-
-#ifndef CACHEPROFILER_H
-#define CACHEPROFILER_H
+#ifndef __MEM_RUBY_PROFILER_CACHEPROFILER_HH__
+#define __MEM_RUBY_PROFILER_CACHEPROFILER_HH__

 #include <iostream>
 #include <string>

-#include "mem/ruby/common/Global.hh"
-#include "mem/ruby/system/NodeID.hh"
-#include "mem/ruby/common/Histogram.hh"
 #include "mem/protocol/AccessModeType.hh"
-#include "mem/protocol/PrefetchBit.hh"
 #include "mem/protocol/CacheRequestType.hh"
+#include "mem/protocol/PrefetchBit.hh"
+#include "mem/ruby/common/Global.hh"
+#include "mem/ruby/common/Histogram.hh"
+#include "mem/ruby/system/NodeID.hh"

 template <class TYPE> class Vector;

-class CacheProfiler {
-public:
-  // Constructors
+class CacheProfiler
+{
+  public:
    CacheProfiler(const std::string& description);
-
-  // Destructor
    ~CacheProfiler();

-  // Public Methods
    void printStats(std::ostream& out) const;
    void clearStats();

-  void addStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit);
+    void addStatSample(CacheRequestType requestType, AccessModeType type,
+                       int msgSize, PrefetchBit pfBit);

    void print(std::ostream& out) const;
-private:
-  // Private Methods

+  private:
    // Private copy constructor and assignment operator
    CacheProfiler(const CacheProfiler& obj);
    CacheProfiler& operator=(const CacheProfiler& obj);

-  // Data Members (m_ prefix)
    std::string m_description;
    Histogram m_requestSize;
    int64 m_misses;
@ -83,21 +69,15 @@ private:
    int64 m_hw_prefetches;
    int64 m_accessModeTypeHistogram[AccessModeType_NUM];

-  Vector < int >* m_requestTypeVec_ptr;
+    Vector <int>* m_requestTypeVec_ptr;
 };

-// Output operator declaration
-std::ostream& operator<<(std::ostream& out, const CacheProfiler& obj);
-
-// ******************* Definitions *******************
-
-// Output operator definition
-extern inline
-std::ostream& operator<<(std::ostream& out, const CacheProfiler& obj)
+inline std::ostream&
+operator<<(std::ostream& out, const CacheProfiler& obj)
 {
    obj.print(out);
    out << std::flush;
    return out;
 }

-#endif //CACHEPROFILER_H
+#endif // __MEM_RUBY_PROFILER_CACHEPROFILER_HH__
--- a/src/mem/ruby/profiler/MemCntrlProfiler.cc
+++ b/src/mem/ruby/profiler/MemCntrlProfiler.cc
@ -1,4 +1,3 @@
-
 /*
 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
 * All rights reserved.
@ -32,19 +31,14 @@
 using namespace std;

 MemCntrlProfiler::MemCntrlProfiler(const string& description,
-                                   int banks_per_rank,
-                                   int ranks_per_dimm,
-                                   int dimms_per_channel)
+    int banks_per_rank, int ranks_per_dimm, int dimms_per_channel)
 {
    m_description = description;
    m_banks_per_rank = banks_per_rank;
    m_ranks_per_dimm = ranks_per_dimm;
    m_dimms_per_channel = dimms_per_channel;

-    int totalBanks = banks_per_rank * 
-                     ranks_per_dimm * 
-                     dimms_per_channel;
-
+    int totalBanks = banks_per_rank * ranks_per_dimm * dimms_per_channel;
    m_memBankCount.setSize(totalBanks);

    clearStats();
@ -54,50 +48,65 @@ MemCntrlProfiler::~MemCntrlProfiler()
 {
 }

-void MemCntrlProfiler::printStats(ostream& out) const
+void
+MemCntrlProfiler::printStats(ostream& out) const
 {
-    if (m_memReq || m_memRefresh) {    // if there's a memory controller at all
+    if (!m_memReq && !m_memRefresh) {
+        out << "Memory Controller: " << m_description
+            << " no stats recorded." << endl
+            << endl
+            << endl;
+        return;
+    }
+
+    // if there's a memory controller at all
    uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles;
    double stallsPerReq = total_stalls * 1.0 / m_memReq;
    out << "Memory controller: " << m_description << ":" << endl;
-        out << "  memory_total_requests: " << m_memReq << endl;  // does not include refreshes
+
+    // does not include refreshes
+    out << "  memory_total_requests: " << m_memReq << endl;
    out << "  memory_reads: " << m_memRead << endl;
    out << "  memory_writes: " << m_memWrite << endl;
    out << "  memory_refreshes: " << m_memRefresh << endl;
    out << "  memory_total_request_delays: " << total_stalls << endl;
    out << "  memory_delays_per_request: " << stallsPerReq << endl;
    out << "  memory_delays_in_input_queue: " << m_memInputQ << endl;
-        out << "  memory_delays_behind_head_of_bank_queue: " << m_memBankQ << endl;
-        out << "  memory_delays_stalled_at_head_of_bank_queue: " << m_memWaitCycles << endl;
-        // Note:  The following "memory stalls" entries are a breakdown of the
-        // cycles which already showed up in m_memWaitCycles.  The order is
-        // significant; it is the priority of attributing the cycles.
-        // For example, bank_busy is before arbitration because if the bank was
-        // busy, we didn't even check arbitration.
-        // Note:  "not old enough" means that since we grouped waiting heads-of-queues
-        // into batches to avoid starvation, a request in a newer batch
-        // didn't try to arbitrate yet because there are older requests waiting.
+    out << "  memory_delays_behind_head_of_bank_queue: "
+        << m_memBankQ << endl;
+    out << "  memory_delays_stalled_at_head_of_bank_queue: "
+        << m_memWaitCycles << endl;
+
+    // Note: The following "memory stalls" entries are a breakdown of
+    // the cycles which already showed up in m_memWaitCycles.  The
+    // order is significant; it is the priority of attributing the
+    // cycles.  For example, bank_busy is before arbitration because
+    // if the bank was busy, we didn't even check arbitration.
+    // Note: "not old enough" means that since we grouped waiting
+    // heads-of-queues into batches to avoid starvation, a request in
+    // a newer batch didn't try to arbitrate yet because there are
+    // older requests waiting.
    out << "  memory_stalls_for_bank_busy: " << m_memBankBusy << endl;
    out << "  memory_stalls_for_random_busy: " << m_memRandBusy << endl;
    out << "  memory_stalls_for_anti_starvation: " << m_memNotOld << endl;
    out << "  memory_stalls_for_arbitration: " << m_memArbWait << endl;
    out << "  memory_stalls_for_bus: " << m_memBusBusy << endl;
    out << "  memory_stalls_for_tfaw: " << m_memTfawBusy << endl;
-        out << "  memory_stalls_for_read_write_turnaround: " << m_memReadWriteBusy << endl;
-        out << "  memory_stalls_for_read_read_turnaround: " << m_memDataBusBusy << endl;
+    out << "  memory_stalls_for_read_write_turnaround: "
+        << m_memReadWriteBusy << endl;
+    out << "  memory_stalls_for_read_read_turnaround: "
+        << m_memDataBusBusy << endl;
    out << "  accesses_per_bank: ";
-        for (int bank=0; bank < m_memBankCount.size(); bank++) {
+
+    for (int bank = 0; bank < m_memBankCount.size(); bank++) {
        out << m_memBankCount[bank] << "  ";
    }
-    }  else {
-        out << "Memory Controller: " << m_description
-            << " no stats recorded." << endl;
-    }    
    out << endl;
    out << endl;
 }

-void MemCntrlProfiler::clearStats()
+void
+MemCntrlProfiler::clearStats()
 {
    m_memReq = 0;
    m_memBankBusy = 0;
@ -115,71 +124,99 @@ void MemCntrlProfiler::clearStats()
    m_memRandBusy = 0;
    m_memNotOld = 0;

-    for (int bank=0; 
-         bank < m_memBankCount.size(); 
-         bank++) {
+    for (int bank = 0; bank < m_memBankCount.size(); bank++) {
        m_memBankCount[bank] = 0;
    }
 }

-void MemCntrlProfiler::profileMemReq(int bank) {
+void
+MemCntrlProfiler::profileMemReq(int bank)
+{
    m_memReq++;
    m_memBankCount[bank]++;
 }

-void MemCntrlProfiler::profileMemBankBusy() {    
+void
+MemCntrlProfiler::profileMemBankBusy()
+{
    m_memBankBusy++;
 }

-void MemCntrlProfiler::profileMemBusBusy() {    
+void
+MemCntrlProfiler::profileMemBusBusy()
+{
    m_memBusBusy++;
 }

-void MemCntrlProfiler::profileMemReadWriteBusy() {    
+void
+MemCntrlProfiler::profileMemReadWriteBusy()
+{
    m_memReadWriteBusy++;
 }

-void MemCntrlProfiler::profileMemDataBusBusy() {    
+void
+MemCntrlProfiler::profileMemDataBusBusy()
+{
    m_memDataBusBusy++;
 }

-void MemCntrlProfiler::profileMemTfawBusy() {    
+void
+MemCntrlProfiler::profileMemTfawBusy()
+{
    m_memTfawBusy++;
 }

-void MemCntrlProfiler::profileMemRefresh() {    
+void
+MemCntrlProfiler::profileMemRefresh()
+{
    m_memRefresh++;
 }

-void MemCntrlProfiler::profileMemRead() {    
+void
+MemCntrlProfiler::profileMemRead()
+{
    m_memRead++;
 }

-void MemCntrlProfiler::profileMemWrite() {    
+void
+MemCntrlProfiler::profileMemWrite()
+{
    m_memWrite++;
 }

-void MemCntrlProfiler::profileMemWaitCycles(int cycles) {
+void
+MemCntrlProfiler::profileMemWaitCycles(int cycles)
+{
    m_memWaitCycles += cycles;
 }

-void MemCntrlProfiler::profileMemInputQ(int cycles) {    
+void
+MemCntrlProfiler::profileMemInputQ(int cycles)
+{
    m_memInputQ += cycles;
 }

-void MemCntrlProfiler::profileMemBankQ(int cycles) {    
+void
+MemCntrlProfiler::profileMemBankQ(int cycles)
+{
    m_memBankQ += cycles;
 }

-void MemCntrlProfiler::profileMemArbWait(int cycles) {    
+void
+MemCntrlProfiler::profileMemArbWait(int cycles)
+{
    m_memArbWait += cycles;
 }

-void MemCntrlProfiler::profileMemRandBusy() {    
+void
+MemCntrlProfiler::profileMemRandBusy()
+{
    m_memRandBusy++;
 }

-void MemCntrlProfiler::profileMemNotOld() {    
+void
+MemCntrlProfiler::profileMemNotOld()
+{
    m_memNotOld++;
 }

--- a/src/mem/ruby/profiler/MemCntrlProfiler.hh
+++ b/src/mem/ruby/profiler/MemCntrlProfiler.hh
@ -1,4 +1,3 @@
-
 /*
 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
 * All rights reserved.
@ -27,17 +26,8 @@
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-/*
- * MemCntrlProfiler.hh
- *
- * Description:
- *
- * $Id$
- *
- */
-
-#ifndef MEM_CNTRL_PROFILER_H
-#define MEM_CNTRL_PROFILER_H
+#ifndef __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__
+#define __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__

 #include <iostream>
 #include <string>
@ -47,18 +37,13 @@

 template <class TYPE> class Vector;

-class MemCntrlProfiler {
-public:
-  // Constructors
- MemCntrlProfiler(const std::string& description,
-                   int banks_per_rank,
-                   int ranks_per_dimm,
-                   int dimms_per_channel);
-
-  // Destructor
+class MemCntrlProfiler
+{
+  public:
+    MemCntrlProfiler(const std::string& description, int banks_per_rank,
+                     int ranks_per_dimm, int dimms_per_channel);
    ~MemCntrlProfiler();

-  // Public Methods
    void printStats(std::ostream& out) const;
    void clearStats();

@ -79,14 +64,12 @@ public:
    void profileMemNotOld();

    void print(std::ostream& out) const;
-private:
-  // Private Methods

+private:
    // Private copy constructor and assignment operator
    MemCntrlProfiler(const MemCntrlProfiler& obj);
    MemCntrlProfiler& operator=(const MemCntrlProfiler& obj);

-  // Data Members (m_ prefix)
    std::string m_description;
    uint64 m_memReq;
    uint64 m_memBankBusy;
@ -109,18 +92,12 @@ private:
    int m_dimms_per_channel;
 };

-// Output operator declaration
-std::ostream& operator<<(std::ostream& out, const MemCntrlProfiler& obj);
-
-// ******************* Definitions *******************
-
-// Output operator definition
-extern inline
-std::ostream& operator<<(std::ostream& out, const MemCntrlProfiler& obj)
+inline std::ostream&
+operator<<(std::ostream& out, const MemCntrlProfiler& obj)
 {
    obj.print(out);
    out << std::flush;
    return out;
 }

-#endif //MEM_CNTRL_PROFILER_H
+#endif // __MEM_RUBY_PROFILER_MEMCNTRLPROFILER_HH__
--- a/src/mem/ruby/profiler/Profiler.cc
+++ b/src/mem/ruby/profiler/Profiler.cc
@ -42,34 +42,24 @@
   ----------------------------------------------------------------------
 */

-/*
- * Profiler.cc
- *
- * Description: See Profiler.hh
- *
- * $Id$
- *
- */
-
 // Allows use of times() library call, which determines virtual runtime
 #include <sys/resource.h>
 #include <sys/times.h>

-#include "mem/ruby/profiler/Profiler.hh"
-#include "mem/ruby/profiler/AddressProfiler.hh"
-#include "mem/ruby/system/System.hh"
-#include "mem/ruby/network/Network.hh"
-#include "mem/gems_common/PrioHeap.hh"
-#include "mem/protocol/CacheMsg.hh"
-#include "mem/protocol/Protocol.hh"
-#include "mem/gems_common/util.hh"
 #include "mem/gems_common/Map.hh"
-#include "mem/ruby/common/Debug.hh"
+#include "mem/gems_common/PrioHeap.hh"
+#include "mem/gems_common/util.hh"
+#include "mem/protocol/CacheMsg.hh"
 #include "mem/protocol/MachineType.hh"
-
+#include "mem/protocol/Protocol.hh"
+#include "mem/ruby/common/Debug.hh"
+#include "mem/ruby/network/Network.hh"
+#include "mem/ruby/profiler/AddressProfiler.hh"
+#include "mem/ruby/profiler/Profiler.hh"
+#include "mem/ruby/system/System.hh"
 #include "mem/ruby/system/System.hh"

-extern std::ostream * debug_cout_ptr;
+extern std::ostream* debug_cout_ptr;

 static double process_memory_total();
 static double process_memory_resident();
@ -95,13 +85,13 @@ Profiler::Profiler(const Params *p)
    m_all_instructions = false;

    m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
-  m_address_profiler_ptr -> setHotLines(m_hot_lines);
-  m_address_profiler_ptr -> setAllInstructions(m_all_instructions);
+    m_address_profiler_ptr->setHotLines(m_hot_lines);
+    m_address_profiler_ptr->setAllInstructions(m_all_instructions);

    if (m_all_instructions) {
        m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
-    m_inst_profiler_ptr -> setHotLines(m_hot_lines);
-    m_inst_profiler_ptr -> setAllInstructions(m_all_instructions);
+        m_inst_profiler_ptr->setHotLines(m_hot_lines);
+        m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
    }
 }

@ -114,51 +104,45 @@ Profiler::~Profiler()
    delete m_requestProfileMap_ptr;
 }

-void Profiler::wakeup()
+void
+Profiler::wakeup()
 {
    // FIXME - avoid the repeated code

    Vector<integer_t> perProcCycleCount;
    perProcCycleCount.setSize(m_num_of_sequencers);

-  for(int i=0; i < m_num_of_sequencers; i++) {
-    perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
+    for (int i = 0; i < m_num_of_sequencers; i++) {
+        perProcCycleCount[i] =
+            g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
        // The +1 allows us to avoid division by zero
    }

-  (*m_periodic_output_file_ptr) << "ruby_cycles: " 
-                                << g_eventQueue_ptr->getTime()-m_ruby_start 
-                                << endl;
+    ostream &out = *m_periodic_output_file_ptr;

-  (*m_periodic_output_file_ptr) << "mbytes_resident: " 
-                                << process_memory_resident() 
-                                << endl;
-
-  (*m_periodic_output_file_ptr) << "mbytes_total: " 
-                                << process_memory_total() 
-                                << endl;
+    out << "ruby_cycles: " << g_eventQueue_ptr->getTime()-m_ruby_start << endl
+        << "mbytes_resident: " << process_memory_resident() << endl
+        << "mbytes_total: " << process_memory_total() << endl;

    if (process_memory_total() > 0) {
-    (*m_periodic_output_file_ptr) << "resident_ratio: " 
-                          << process_memory_resident()/process_memory_total() 
-                          << endl;
+        out << "resident_ratio: " 
+            << process_memory_resident() / process_memory_total() << endl;
    }

-  (*m_periodic_output_file_ptr) << "miss_latency: " 
-                                << m_allMissLatencyHistogram 
-                                << endl;
+    out << "miss_latency: " << m_allMissLatencyHistogram << endl;

-  *m_periodic_output_file_ptr << endl;
+    out << endl;

    if (m_all_instructions) {
-    m_inst_profiler_ptr->printStats(*m_periodic_output_file_ptr);
+        m_inst_profiler_ptr->printStats(out);
    }

-  //g_system_ptr->getNetwork()->printStats(*m_periodic_output_file_ptr);
+    //g_system_ptr->getNetwork()->printStats(out);
    g_eventQueue_ptr->scheduleEvent(this, m_stats_period);
 }

-void Profiler::setPeriodicStatsFile(const string& filename)
+void
+Profiler::setPeriodicStatsFile(const string& filename)
 {
    cout << "Recording periodic statistics to file '" << filename << "' every "
         << m_stats_period << " Ruby cycles" << endl;
@ -171,7 +155,8 @@ void Profiler::setPeriodicStatsFile(const string& filename)
    g_eventQueue_ptr->scheduleEvent(this, 1);
 }

-void Profiler::setPeriodicStatsInterval(integer_t period)
+void
+Profiler::setPeriodicStatsInterval(integer_t period)
 {
    cout << "Recording periodic statistics every " << m_stats_period 
         << " Ruby cycles" << endl;
@ -180,7 +165,8 @@ void Profiler::setPeriodicStatsInterval(integer_t period)
    g_eventQueue_ptr->scheduleEvent(this, 1);
 }

-void Profiler::printConfig(ostream& out) const
+void
+Profiler::printConfig(ostream& out) const
 {
    out << endl;
    out << "Profiler Configuration" << endl;
@ -188,12 +174,14 @@ void Profiler::printConfig(ostream& out) const
    out << "periodic_stats_period: " << m_stats_period << endl;
 }

-void Profiler::print(ostream& out) const
+void
+Profiler::print(ostream& out) const
 {
    out << "[Profiler]";
 }

-void Profiler::printStats(ostream& out, bool short_stats)
+void
+Profiler::printStats(ostream& out, bool short_stats)
 {
    out << endl;
    if (short_stats) {
@ -204,9 +192,9 @@ void Profiler::printStats(ostream& out, bool short_stats)

    time_t real_time_current = time(NULL);
    double seconds = difftime(real_time_current, m_real_time_start_time);
-  double minutes = seconds/60.0;
-  double hours = minutes/60.0;
-  double days = hours/24.0;
+    double minutes = seconds / 60.0;
+    double hours = minutes / 60.0;
+    double days = hours / 24.0;
    Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start;

    if (!short_stats) {
@ -243,14 +231,14 @@ void Profiler::printStats(ostream& out, bool short_stats)
                << process_memory_resident()/process_memory_total() << endl;
        }
        out << endl;
-
    }

    Vector<integer_t> perProcCycleCount;
    perProcCycleCount.setSize(m_num_of_sequencers);

-  for(int i=0; i < m_num_of_sequencers; i++) {
-    perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
+    for (int i = 0; i < m_num_of_sequencers; i++) {
+        perProcCycleCount[i] =
+            g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
        // The +1 allows us to avoid division by zero
    }

@ -260,13 +248,14 @@ void Profiler::printStats(ostream& out, bool short_stats)

    if (!short_stats) {
        out << "Busy Controller Counts:" << endl;
-    for(int i=0; i < MachineType_NUM; i++) {
-      for(int j=0; j < MachineType_base_count((MachineType)i); j++) {
+        for (int i = 0; i < MachineType_NUM; i++) {
+            int size = MachineType_base_count((MachineType)i);
+            for (int j = 0; j < size; j++) {
                MachineID machID;
                machID.type = (MachineType)i;
                machID.num = j;
                out << machID << ":" << m_busyControllerCount[i][j] << "  ";
-        if ((j+1)%8 == 0) {
+                if ((j + 1) % 8 == 0) {
                    out << endl;
                }
            }
@ -277,7 +266,8 @@ void Profiler::printStats(ostream& out, bool short_stats)
        out << "Busy Bank Count:" << m_busyBankCount << endl;
        out << endl;

-    out << "sequencer_requests_outstanding: " << m_sequencer_requests << endl;
+        out << "sequencer_requests_outstanding: "
+            << m_sequencer_requests << endl;
        out << endl;
    }

@ -285,14 +275,16 @@ void Profiler::printStats(ostream& out, bool short_stats)
        out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
        out << "----------------------------------------" << endl;
        out << "miss_latency: " << m_allMissLatencyHistogram << endl;
-    for(int i=0; i<m_missLatencyHistograms.size(); i++) {
+        for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
            if (m_missLatencyHistograms[i].size() > 0) {
-        out << "miss_latency_" << RubyRequestType(i) << ": " << m_missLatencyHistograms[i] << endl;
+                out << "miss_latency_" << RubyRequestType(i) << ": "
+                    << m_missLatencyHistograms[i] << endl;
            }
        }
-    for(int i=0; i<m_machLatencyHistograms.size(); i++) {
+        for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
            if (m_machLatencyHistograms[i].size() > 0) {
-        out << "miss_latency_" << GenericMachineType(i) << ": " << m_machLatencyHistograms[i] << endl;
+                out << "miss_latency_" << GenericMachineType(i) << ": "
+                    << m_machLatencyHistograms[i] << endl;
            }
        }

@ -301,38 +293,55 @@ void Profiler::printStats(ostream& out, bool short_stats)
        out << "All Non-Zero Cycle SW Prefetch Requests" << endl;
        out << "------------------------------------" << endl;
        out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl;
-    for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) {
+        for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
            if (m_SWPrefetchLatencyHistograms[i].size() > 0) {
-        out << "prefetch_latency_" << CacheRequestType(i) << ": " << m_SWPrefetchLatencyHistograms[i] << endl;
+                out << "prefetch_latency_" << CacheRequestType(i) << ": "
+                    << m_SWPrefetchLatencyHistograms[i] << endl;
            }
        }
-    for(int i=0; i<m_SWPrefetchMachLatencyHistograms.size(); i++) {
+        for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
            if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) {
-        out << "prefetch_latency_" << GenericMachineType(i) << ": " << m_SWPrefetchMachLatencyHistograms[i] << endl;
+                out << "prefetch_latency_" << GenericMachineType(i) << ": "
+                    << m_SWPrefetchMachLatencyHistograms[i] << endl;
            }
        }
-    out << "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram << endl;
+        out << "prefetch_latency_L2Miss:"
+            << m_SWPrefetchL2MissLatencyHistogram << endl;

        if (m_all_sharing_histogram.size() > 0) {
            out << "all_sharing: " << m_all_sharing_histogram << endl;
            out << "read_sharing: " << m_read_sharing_histogram << endl;
            out << "write_sharing: " << m_write_sharing_histogram << endl;

-      out << "all_sharing_percent: "; m_all_sharing_histogram.printPercent(out); out << endl;
-      out << "read_sharing_percent: "; m_read_sharing_histogram.printPercent(out); out << endl;
-      out << "write_sharing_percent: "; m_write_sharing_histogram.printPercent(out); out << endl;
+            out << "all_sharing_percent: ";
+            m_all_sharing_histogram.printPercent(out);
+            out << endl;
+
+            out << "read_sharing_percent: ";
+            m_read_sharing_histogram.printPercent(out);
+            out << endl;
+
+            out << "write_sharing_percent: ";
+            m_write_sharing_histogram.printPercent(out);
+            out << endl;

            int64 total_miss = m_cache_to_cache +  m_memory_to_cache;
            out << "all_misses: " << total_miss << endl;
            out << "cache_to_cache_misses: " << m_cache_to_cache << endl;
            out << "memory_to_cache_misses: " << m_memory_to_cache << endl;
-      out << "cache_to_cache_percent: " << 100.0 * (double(m_cache_to_cache) / double(total_miss)) << endl;
-      out << "memory_to_cache_percent: " << 100.0 * (double(m_memory_to_cache) / double(total_miss)) << endl;
+            out << "cache_to_cache_percent: "
+                << 100.0 * (double(m_cache_to_cache) / double(total_miss))
+                << endl;
+            out << "memory_to_cache_percent: "
+                << 100.0 * (double(m_memory_to_cache) / double(total_miss))
+                << endl;
            out << endl;
        }

        if (m_outstanding_requests.size() > 0) {
-      out << "outstanding_requests: "; m_outstanding_requests.printPercent(out); out << endl;
+            out << "outstanding_requests: ";
+            m_outstanding_requests.printPercent(out);
+            out << endl;
            out << endl;
        }
    }
@ -345,9 +354,10 @@ void Profiler::printStats(ostream& out, bool short_stats)
        Vector<string> requestProfileKeys = m_requestProfileMap_ptr->keys();
        requestProfileKeys.sortVector();

-    for(int i=0; i<requestProfileKeys.size(); i++) {
-      int temp_int = m_requestProfileMap_ptr->lookup(requestProfileKeys[i]);
-      double percent = (100.0*double(temp_int))/double(m_requests);
+        for (int i = 0; i < requestProfileKeys.size(); i++) {
+            int temp_int =
+                m_requestProfileMap_ptr->lookup(requestProfileKeys[i]);
+            double percent = (100.0 * double(temp_int)) / double(m_requests);
            while (requestProfileKeys[i] != "") {
                out << setw(10) << string_split(requestProfileKeys[i], ':');
            }
@ -370,17 +380,19 @@ void Profiler::printStats(ostream& out, bool short_stats)
        out << "Message Delayed Cycles" << endl;
        out << "----------------------" << endl;
        out << "Total_delay_cycles: " <<   m_delayedCyclesHistogram << endl;
-    out << "Total_nonPF_delay_cycles: " << m_delayedCyclesNonPFHistogram << endl;
+        out << "Total_nonPF_delay_cycles: "
+            << m_delayedCyclesNonPFHistogram << endl;
        for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) {
-      out << "  virtual_network_" << i << "_delay_cycles: " << m_delayedCyclesVCHistograms[i] << endl;
+            out << "  virtual_network_" << i << "_delay_cycles: "
+                << m_delayedCyclesVCHistograms[i] << endl;
        }

        printResourceUsage(out);
    }
-
 }

-void Profiler::printResourceUsage(ostream& out) const
+void
+Profiler::printResourceUsage(ostream& out) const
 {
    out << endl;
    out << "Resource Usage" << endl;
@ -401,12 +413,13 @@ void Profiler::printResourceUsage(ostream& out) const
    out << "block_outputs: " << usage.ru_oublock << endl;
 }

-void Profiler::clearStats()
+void
+Profiler::clearStats()
 {
    m_ruby_start = g_eventQueue_ptr->getTime();

    m_cycles_executed_at_start.setSize(m_num_of_sequencers);
-  for (int i=0; i < m_num_of_sequencers; i++) {
+    for (int i = 0; i < m_num_of_sequencers; i++) {
        if (g_system_ptr == NULL) {
            m_cycles_executed_at_start[i] = 0;
        } else {
@ -415,9 +428,10 @@ void Profiler::clearStats()
    }

    m_busyControllerCount.setSize(MachineType_NUM); // all machines
-  for(int i=0; i < MachineType_NUM; i++) {
-    m_busyControllerCount[i].setSize(MachineType_base_count((MachineType)i));
-    for(int j=0; j < MachineType_base_count((MachineType)i); j++) {
+    for (int i = 0; i < MachineType_NUM; i++) {
+        int size = MachineType_base_count((MachineType)i);
+        m_busyControllerCount[i].setSize(size);
+        for (int j = 0; j < size; j++) {
            m_busyControllerCount[i][j] = 0;
        }
    }
@ -425,27 +439,28 @@ void Profiler::clearStats()

    m_delayedCyclesHistogram.clear();
    m_delayedCyclesNonPFHistogram.clear();
-  m_delayedCyclesVCHistograms.setSize(RubySystem::getNetwork()->getNumberOfVirtualNetworks());
-  for (int i = 0; i < RubySystem::getNetwork()->getNumberOfVirtualNetworks(); i++) {
+    int size = RubySystem::getNetwork()->getNumberOfVirtualNetworks();
+    m_delayedCyclesVCHistograms.setSize(size);
+    for (int i = 0; i < size; i++) {
        m_delayedCyclesVCHistograms[i].clear();
    }

    m_missLatencyHistograms.setSize(RubyRequestType_NUM);
-  for(int i=0; i<m_missLatencyHistograms.size(); i++) {
+    for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
        m_missLatencyHistograms[i].clear(200);
    }
    m_machLatencyHistograms.setSize(GenericMachineType_NUM+1);
-  for(int i=0; i<m_machLatencyHistograms.size(); i++) {
+    for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
        m_machLatencyHistograms[i].clear(200);
    }
    m_allMissLatencyHistogram.clear(200);

    m_SWPrefetchLatencyHistograms.setSize(CacheRequestType_NUM);
-  for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) {
+    for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
        m_SWPrefetchLatencyHistograms[i].clear(200);
    }
    m_SWPrefetchMachLatencyHistograms.setSize(GenericMachineType_NUM+1);
-  for(int i=0; i<m_SWPrefetchMachLatencyHistograms.size(); i++) {
+    for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
        m_SWPrefetchMachLatencyHistograms[i].clear(200);
    }
    m_allSWPrefetchLatencyHistogram.clear(200);
@ -466,26 +481,34 @@ void Profiler::clearStats()
    m_outstanding_requests.clear();
    m_outstanding_persistent_requests.clear();

-  // Flush the prefetches through the system - used so that there are no outstanding requests after stats are cleared
+    // Flush the prefetches through the system - used so that there
+    // are no outstanding requests after stats are cleared
    //g_eventQueue_ptr->triggerAllEvents();

    // update the start time
    m_ruby_start = g_eventQueue_ptr->getTime();
 }

-void Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id)
+void
+Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id)
 {
    if (msg.getType() != CacheRequestType_IFETCH) {
+        // Note: The following line should be commented out if you
+        // want to use the special profiling that is part of the GS320
+        // protocol

-    // Note: The following line should be commented out if you want to
-    // use the special profiling that is part of the GS320 protocol
-
-    // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be profiled by the AddressProfiler
-    m_address_profiler_ptr->addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), msg.getType(), msg.getAccessMode(), id, false);
+        // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
+        // profiled by the AddressProfiler
+        m_address_profiler_ptr->
+            addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
+                           msg.getType(), msg.getAccessMode(), id, false);
    }
 }

-void Profiler::profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner)
+void
+Profiler::profileSharing(const Address& addr, AccessType type,
+                         NodeID requestor, const Set& sharers,
+                         const Set& owner)
 {
    Set set_contacted(owner);
    if (type == AccessType_Write) {
@ -506,10 +529,11 @@ void Profiler::profileSharing(const Address& addr, AccessType type, NodeID reque
    } else {
        m_cache_to_cache++;
    }
-
 }

-void Profiler::profileMsgDelay(int virtualNetwork, int delayCycles) {
+void
+Profiler::profileMsgDelay(int virtualNetwork, int delayCycles)
+{
    assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
    m_delayedCyclesHistogram.add(delayCycles);
    m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
@ -519,7 +543,8 @@ void Profiler::profileMsgDelay(int virtualNetwork, int delayCycles) {
 }

 // profiles original cache requests including PUTs
-void Profiler::profileRequest(const string& requestStr)
+void
+Profiler::profileRequest(const string& requestStr)
 {
    m_requests++;

@ -530,41 +555,49 @@ void Profiler::profileRequest(const string& requestStr)
    }
 }

-void Profiler::controllerBusy(MachineID machID)
+void
+Profiler::controllerBusy(MachineID machID)
 {
    m_busyControllerCount[(int)machID.type][(int)machID.num]++;
 }

-void Profiler::profilePFWait(Time waitTime)
+void
+Profiler::profilePFWait(Time waitTime)
 {
    m_prefetchWaitHistogram.add(waitTime);
 }

-void Profiler::bankBusy()
+void
+Profiler::bankBusy()
 {
    m_busyBankCount++;
 }

 // non-zero cycle demand request
-void Profiler::missLatency(Time t, RubyRequestType type)
+void
+Profiler::missLatency(Time t, RubyRequestType type)
 {
    m_allMissLatencyHistogram.add(t);
    m_missLatencyHistograms[type].add(t);
 }

 // non-zero cycle prefetch request
-void Profiler::swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach)
+void
+Profiler::swPrefetchLatency(Time t, CacheRequestType type,
+                            GenericMachineType respondingMach)
 {
    m_allSWPrefetchLatencyHistogram.add(t);
    m_SWPrefetchLatencyHistograms[type].add(t);
    m_SWPrefetchMachLatencyHistograms[respondingMach].add(t);
-  if(respondingMach == GenericMachineType_Directory || respondingMach == GenericMachineType_NUM) {
+    if (respondingMach == GenericMachineType_Directory ||
+        respondingMach == GenericMachineType_NUM) {
        m_SWPrefetchL2MissLatencyHistogram.add(t);
    }
 }

-void Profiler::profileTransition(const string& component, NodeID version, Address addr,
-                                 const string& state, const string& event,
+void
+Profiler::profileTransition(const string& component, NodeID version,
+    Address addr, const string& state, const string& event,
    const string& next_state, const string& note)
 {
    const int EVENT_SPACES = 20;
@ -573,70 +606,80 @@ void Profiler::profileTransition(const string& component, NodeID version, Addres
    const int COMP_SPACES = 10;
    const int STATE_SPACES = 6;

-  if ((g_debug_ptr->getDebugTime() > 0) &&
-      (g_eventQueue_ptr->getTime() >= g_debug_ptr->getDebugTime())) {
-    (* debug_cout_ptr).flags(ios::right);
-    (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " ";
-    (* debug_cout_ptr) << setw(ID_SPACES) << version << " ";
-    (* debug_cout_ptr) << setw(COMP_SPACES) << component;
-    (* debug_cout_ptr) << setw(EVENT_SPACES) << event << " ";
+    if (g_debug_ptr->getDebugTime() <= 0 ||
+        g_eventQueue_ptr->getTime() < g_debug_ptr->getDebugTime())
+        return;

-    (* debug_cout_ptr).flags(ios::right);
-    (* debug_cout_ptr) << setw(STATE_SPACES) << state;
-    (* debug_cout_ptr) << ">";
-    (* debug_cout_ptr).flags(ios::left);
-    (* debug_cout_ptr) << setw(STATE_SPACES) << next_state;
+    ostream &out = *debug_cout_ptr;
+    out.flags(ios::right);
+    out << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " ";
+    out << setw(ID_SPACES) << version << " ";
+    out << setw(COMP_SPACES) << component;
+    out << setw(EVENT_SPACES) << event << " ";

-    (* debug_cout_ptr) << " " << addr << " " << note;
+    out.flags(ios::right);
+    out << setw(STATE_SPACES) << state;
+    out << ">";
+    out.flags(ios::left);
+    out << setw(STATE_SPACES) << next_state;

-    (* debug_cout_ptr) << endl;
-  }
+    out << " " << addr << " " << note;
+
+    out << endl;
 }

 // Helper function
-static double process_memory_total()
+static double
+process_memory_total()
 {
-  const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB,
+    // 4kB page size, 1024*1024 bytes per MB,
+    const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0); 
    ifstream proc_file;
    proc_file.open("/proc/self/statm");
    int total_size_in_pages = 0;
    int res_size_in_pages = 0;
    proc_file >> total_size_in_pages;
    proc_file >> res_size_in_pages;
-  return double(total_size_in_pages)*MULTIPLIER; // size in megabytes
+    return double(total_size_in_pages) * MULTIPLIER; // size in megabytes
 }

-static double process_memory_resident()
+static double
+process_memory_resident()
 {
-  const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB,
+    // 4kB page size, 1024*1024 bytes per MB,
+    const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0);
    ifstream proc_file;
    proc_file.open("/proc/self/statm");
    int total_size_in_pages = 0;
    int res_size_in_pages = 0;
    proc_file >> total_size_in_pages;
    proc_file >> res_size_in_pages;
-  return double(res_size_in_pages)*MULTIPLIER; // size in megabytes
+    return double(res_size_in_pages) * MULTIPLIER; // size in megabytes
 }

-void Profiler::rubyWatch(int id){
+void
+Profiler::rubyWatch(int id)
+{
    uint64 tr = 0;
    Address watch_address = Address(tr);
    const int ID_SPACES = 3;
    const int TIME_SPACES = 7;

-    (* debug_cout_ptr).flags(ios::right);
-    (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " ";
-    (* debug_cout_ptr) << setw(ID_SPACES) << id << " "
-                       << "RUBY WATCH "
-                       << watch_address
-                       << endl;
+    ostream &out = *debug_cout_ptr;

-    if(!m_watch_address_list_ptr->exist(watch_address)){
+    out.flags(ios::right);
+    out << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " ";
+    out << setw(ID_SPACES) << id << " "
+        << "RUBY WATCH " << watch_address << endl;
+
+    if (!m_watch_address_list_ptr->exist(watch_address)) {
        m_watch_address_list_ptr->add(watch_address, 1);
    }
 }

-bool Profiler::watchAddress(Address addr){
+bool
+Profiler::watchAddress(Address addr)
+{
    if (m_watch_address_list_ptr->exist(addr))
        return true;
    else
--- a/src/mem/ruby/profiler/Profiler.hh
+++ b/src/mem/ruby/profiler/Profiler.hh
@ -42,35 +42,24 @@
   ----------------------------------------------------------------------
 */

-/*
- * Profiler.hh
- *
- * Description:
- *
- * $Id$
- *
- */
+#ifndef __MEM_RUBY_PROFILER_PROFILER_HH__
+#define __MEM_RUBY_PROFILER_PROFILER_HH__

-#ifndef PROFILER_H
-#define PROFILER_H
-
-#include "mem/ruby/libruby.hh"
-
-#include "mem/ruby/common/Global.hh"
-#include "mem/protocol/GenericMachineType.hh"
-#include "mem/ruby/common/Histogram.hh"
-#include "mem/ruby/common/Consumer.hh"
 #include "mem/protocol/AccessModeType.hh"
 #include "mem/protocol/AccessType.hh"
-#include "mem/ruby/system/NodeID.hh"
-#include "mem/ruby/system/MachineID.hh"
+#include "mem/protocol/CacheRequestType.hh"
+#include "mem/protocol/GenericMachineType.hh"
+#include "mem/protocol/GenericRequestType.hh"
 #include "mem/protocol/PrefetchBit.hh"
 #include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/Consumer.hh"
+#include "mem/ruby/common/Global.hh"
+#include "mem/ruby/common/Histogram.hh"
 #include "mem/ruby/common/Set.hh"
-#include "mem/protocol/CacheRequestType.hh"
-#include "mem/protocol/GenericRequestType.hh"
+#include "mem/ruby/libruby.hh"
+#include "mem/ruby/system/MachineID.hh"
 #include "mem/ruby/system/MemoryControl.hh"
-
+#include "mem/ruby/system/NodeID.hh"
 #include "params/RubyProfiler.hh"
 #include "sim/sim_object.hh"

@ -79,16 +68,13 @@ class AddressProfiler;

 template <class KEY_TYPE, class VALUE_TYPE> class Map;

-class Profiler : public SimObject, public Consumer {
-public:
-  // Constructors
+class Profiler : public SimObject, public Consumer
+{
+  public:
    typedef RubyProfilerParams Params;
    Profiler(const Params *);
-
-  // Destructor
    ~Profiler();

-  // Public Methods
    void wakeup();

    void setPeriodicStatsFile(const string& filename);
@ -107,16 +93,33 @@ public:
    void addAddressTraceSample(const CacheMsg& msg, NodeID id);

    void profileRequest(const string& requestStr);
-  void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner);
+    void profileSharing(const Address& addr, AccessType type,
+                        NodeID requestor, const Set& sharers,
+                        const Set& owner);

    void profileMulticastRetry(const Address& addr, int count);

    void profileFilterAction(int action);

    void profileConflictingRequests(const Address& addr);
-  void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); }
-  void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); }
-  void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); }
+
+    void
+    profileOutstandingRequest(int outstanding)
+    {
+        m_outstanding_requests.add(outstanding);
+    }
+
+    void
+    profileOutstandingPersistentRequest(int outstanding)
+    {
+        m_outstanding_persistent_requests.add(outstanding);
+    }
+
+    void
+    profileAverageLatencyEstimate(int latency)
+    {
+        m_average_latency_estimate.add(latency);
+    }

    void recordPrediction(bool wasGood, bool wasPredicted);

@ -127,12 +130,14 @@ public:
    void controllerBusy(MachineID machID);
    void bankBusy();
    void missLatency(Time t, RubyRequestType type);
-  void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach);
+    void swPrefetchLatency(Time t, CacheRequestType type,
+                           GenericMachineType respondingMach);
    void sequencerRequests(int num) { m_sequencer_requests.add(num); }

-  void profileTransition(const string& component, NodeID version, Address addr,
-                         const string& state, const string& event,
-                         const string& next_state, const string& note);
+    void profileTransition(const string& component, NodeID version,
+                           Address addr, const string& state,
+                           const string& event, const string& next_state,
+                           const string& note);
    void profileMsgDelay(int virtualNetwork, int delayCycles);

    void print(ostream& out) const;
@ -141,21 +146,21 @@ public:
    bool watchAddress(Address addr);

    // return Ruby's start time
-  Time getRubyStartTime(){
+    Time
+    getRubyStartTime()
+    {
        return m_ruby_start;
    }

-  //added by SS
+    // added by SS
    bool getHotLines() { return m_hot_lines; }
    bool getAllInstructions() { return m_all_instructions; }

-private:
-
+  private:
    // Private copy constructor and assignment operator
    Profiler(const Profiler& obj);
    Profiler& operator=(const Profiler& obj);

-  // Data Members (m_ prefix)
    AddressProfiler* m_address_profiler_ptr;
    AddressProfiler* m_inst_profiler_ptr;

@ -168,7 +173,7 @@ private:
    Time m_ruby_start;
    time_t m_real_time_start_time;

-  Vector < Vector < integer_t > > m_busyControllerCount;
+    Vector <Vector<integer_t> > m_busyControllerCount;
    integer_t m_busyBankCount;
    Histogram m_multicast_retry_histogram;

@ -214,20 +219,14 @@ private:
    int m_num_of_sequencers;
 };

-// Output operator declaration
-ostream& operator<<(ostream& out, const Profiler& obj);
-
-// ******************* Definitions *******************
-
-// Output operator definition
-extern inline
-ostream& operator<<(ostream& out, const Profiler& obj)
+inline ostream&
+operator<<(ostream& out, const Profiler& obj)
 {
    obj.print(out);
    out << flush;
    return out;
 }

-#endif //PROFILER_H
+#endif // __MEM_RUBY_PROFILER_PROFILER_HH__


--- a/src/mem/ruby/profiler/StoreTrace.cc
+++ b/src/mem/ruby/profiler/StoreTrace.cc
@ -1,4 +1,3 @@
-
 /*
 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
 * All rights reserved.
@ -27,28 +26,28 @@
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-/*
- * $Id$
- *
- */
-
-#include "mem/ruby/profiler/StoreTrace.hh"
 #include "mem/ruby/eventqueue/RubyEventQueue.hh"
+#include "mem/ruby/profiler/StoreTrace.hh"

-bool StoreTrace::s_init = false; // Total number of store lifetimes of all lines
-int64 StoreTrace::s_total_samples = 0; // Total number of store lifetimes of all lines
+bool StoreTrace::s_init = false; // Total number of store lifetimes of
+                                 // all lines
+int64 StoreTrace::s_total_samples = 0; // Total number of store
+                                       // lifetimes of all lines
 Histogram* StoreTrace::s_store_count_ptr = NULL;
 Histogram* StoreTrace::s_store_first_to_stolen_ptr = NULL;
 Histogram* StoreTrace::s_store_last_to_stolen_ptr = NULL;
 Histogram* StoreTrace::s_store_first_to_last_ptr = NULL;

-StoreTrace::StoreTrace(const Address& addr) :
-  m_store_count(-1), m_store_first_to_stolen(-1), m_store_last_to_stolen(-1), m_store_first_to_last(-1)
+StoreTrace::StoreTrace(const Address& addr)
+    : m_store_count(-1), m_store_first_to_stolen(-1),
+      m_store_last_to_stolen(-1), m_store_first_to_last(-1)
 {
    StoreTrace::initSummary();
    m_addr = addr;
    m_total_samples = 0;
-  m_last_writer = -1;  // Really -1 isn't valid, so this will trigger the initilization code
+
+    // Really -1 isn't valid, so this will trigger the initilization code
+    m_last_writer = -1;
    m_stores_this_interval = 0;
 }

@ -56,18 +55,19 @@ StoreTrace::~StoreTrace()
 {
 }

-void StoreTrace::print(ostream& out) const
+void
+StoreTrace::print(ostream& out) const
 {
-  out << m_addr;
-  out << " total_samples: " << m_total_samples << endl;
-  out << "store_count: " << m_store_count << endl;
-  out << "store_first_to_stolen: " << m_store_first_to_stolen << endl;
-  out << "store_last_to_stolen: " << m_store_last_to_stolen << endl;
-  out << "store_first_to_last: " << m_store_first_to_last  << endl;
+    out << m_addr
+        << " total_samples: " << m_total_samples << endl
+        << "store_count: " << m_store_count << endl
+        << "store_first_to_stolen: " << m_store_first_to_stolen << endl
+        << "store_last_to_stolen: " << m_store_last_to_stolen << endl
+        << "store_first_to_last: " << m_store_first_to_last  << endl;
 }

-// Class method
-void StoreTrace::initSummary()
+void
+StoreTrace::initSummary()
 {
    if (!s_init) {
        s_total_samples = 0;
@ -79,8 +79,8 @@ void StoreTrace::initSummary()
    s_init = true;
 }

-// Class method
-void StoreTrace::printSummary(ostream& out)
+void
+StoreTrace::printSummary(ostream& out)
 {
    out << "total_samples: " << s_total_samples << endl;
    out << "store_count: " << (*s_store_count_ptr) << endl;
@ -89,8 +89,8 @@ void StoreTrace::printSummary(ostream& out)
    out << "store_first_to_last: " << (*s_store_first_to_last_ptr) << endl;
 }

-// Class method
-void StoreTrace::clearSummary()
+void
+StoreTrace::clearSummary()
 {
    StoreTrace::initSummary();
    s_total_samples = 0;
@ -100,7 +100,8 @@ void StoreTrace::clearSummary()
    s_store_first_to_last_ptr->clear();
 }

-void StoreTrace::store(NodeID node)
+void
+StoreTrace::store(NodeID node)
 {
    Time current = g_eventQueue_ptr->getTime();

@ -120,7 +121,8 @@ void StoreTrace::store(NodeID node)
    m_stores_this_interval++;
 }

-void StoreTrace::downgrade(NodeID node)
+void
+StoreTrace::downgrade(NodeID node)
 {
    if (node == m_last_writer) {
        Time current = g_eventQueue_ptr->getTime();
@ -151,8 +153,3 @@ void StoreTrace::downgrade(NodeID node)
        m_last_writer = -1;
    }
 }
-
-bool node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2)
-{
-  return (n1->getTotal() > n2->getTotal());
-}
--- a/src/mem/ruby/profiler/StoreTrace.hh
+++ b/src/mem/ruby/profiler/StoreTrace.hh
@ -1,4 +1,3 @@
-
 /*
 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
 * All rights reserved.
@ -27,30 +26,20 @@
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-/*
- * $Id$
- *
- * Description:
- *
- */
+#ifndef __MEM_RUBY_PROFILER_STORETRACE_HH__
+#define __MEM_RUBY_PROFILER_STORETRACE_HH__

-#ifndef StoreTrace_H
-#define StoreTrace_H
-
-#include "mem/ruby/common/Global.hh"
 #include "mem/ruby/common/Address.hh"
+#include "mem/ruby/common/Global.hh"
 #include "mem/ruby/common/Histogram.hh"

-class StoreTrace {
-public:
-  // Constructors
+class StoreTrace
+{
+  public:
    StoreTrace() { }
    explicit StoreTrace(const Address& addr);
-
-  // Destructor
    ~StoreTrace();

-  // Public Methods
    void store(NodeID node);
    void downgrade(NodeID node);
    int getTotal() const { return m_total_samples; }
@ -59,23 +48,16 @@ public:
    static void clearSummary();

    void print(ostream& out) const;
-private:
-  // Private Methods

-  // Private copy constructor and assignment operator
-  //  StoreTrace(const StoreTrace& obj);
-  //  StoreTrace& operator=(const StoreTrace& obj);
-
-  // Class Members (s_ prefix)
+  private:
    static bool s_init;
-  static int64 s_total_samples; // Total number of store lifetimes of all lines
+    static int64 s_total_samples; // Total number of store lifetimes
+                                  // of all lines
    static Histogram* s_store_count_ptr;
    static Histogram* s_store_first_to_stolen_ptr;
    static Histogram* s_store_last_to_stolen_ptr;
    static Histogram* s_store_first_to_last_ptr;

-  // Data Members (m_ prefix)
-
    Address m_addr;
    NodeID m_last_writer;
    Time m_first_store;
@ -89,20 +71,18 @@ private:
    Histogram m_store_first_to_last;
 };

-bool node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2);
+inline bool
+node_less_then_eq(const StoreTrace* n1, const StoreTrace* n2)
+{
+    return n1->getTotal() > n2->getTotal();
+}

-// Output operator declaration
-ostream& operator<<(ostream& out, const StoreTrace& obj);
-
-// ******************* Definitions *******************
-
-// Output operator definition
-extern inline
-ostream& operator<<(ostream& out, const StoreTrace& obj)
+inline ostream&
+operator<<(ostream& out, const StoreTrace& obj)
 {
    obj.print(out);
    out << flush;
    return out;
 }

-#endif //StoreTrace_H
+#endif // __MEM_RUBY_PROFILER_STORETRACE_HH__
--- a/util/regress
+++ b/util/regress
@ -36,28 +36,29 @@ from subprocess import call
 progname = os.path.basename(sys.argv[0])

 optparser = optparse.OptionParser()
-optparser.add_option('-v', '--verbose', dest='verbose', action='store_true',
+add_option = optparser.add_option
+add_option('-v', '--verbose', dest='verbose', action='store_true',
           default=False,
           help='echo commands before executing')
-optparser.add_option('--builds', dest='builds',
+add_option('--builds', dest='builds',
           default='ALPHA_SE,ALPHA_SE_MOESI_hammer,' \
           'ALPHA_SE_MESI_CMP_directory,'  \
           'ALPHA_SE_MOESI_CMP_directory,' \
           'ALPHA_SE_MOESI_CMP_token,' \
-                     'ALPHA_FS,MIPS_SE,' \
-                     'POWER_SE,SPARC_SE,SPARC_FS,X86_SE,ARM_SE',
-                     help='comma-separated list of build targets to test  '
-                     " (default: '%default')" )
-optparser.add_option('--variants', dest='variants',
-                     default='fast',
-                     help='comma-separated list of build variants to test '
-                     " (default: '%default')" )
-optparser.add_option('--scons-opts', dest='scons_opts', default='',
-                     help='scons options', metavar='OPTS')
-optparser.add_option('-j', '--jobs', type='int', default=1,
+           'ALPHA_FS,MIPS_SE,POWER_SE,SPARC_SE,SPARC_FS,X86_SE,ARM_SE',
+           help="comma-separated build targets to test (default: '%default')")
+add_option('--variants', dest='variants', default='fast',
+           help="comma-separated build variants to test (default: '%default')")
+add_option('--scons-opts', dest='scons_opts', default='', metavar='OPTS',
+           help='scons options')
+add_option('-j', '--jobs', type='int', default=1,
           help='number of parallel jobs to use')
-optparser.add_option('-k', '--keep-going', action='store_true',
+add_option('-k', '--keep-going', action='store_true',
           help='keep going after errors')
+add_option('-D', '--build-dir', default='',
+           help='build directory location')
+add_option('-n', "--no-exec", default=False, action='store_true',
+           help="don't actually invoke scons, just echo SCons command line")

 (options, tests) = optparser.parse_args()

@ -66,6 +67,8 @@ optparser.add_option('-k', '--keep-going', action='store_true',
 builds = options.builds.split(',')
 variants = options.variants.split(',')

+options.build_dir = os.path.join(options.build_dir, 'build')
+
 # Call os.system() and raise exception if return status is non-zero
 def system(cmd):
    try:
@ -91,11 +94,11 @@ def shellquote(s):

 if not tests:
    print "No tests specified, just building binaries."
-    targets = ['build/%s/m5.%s' % (build, variant)
+    targets = ['%s/%s/m5.%s' % (options.build_dir, build, variant)
               for build in builds
               for variant in variants]
 elif 'all' in tests:
-    targets = ['build/%s/tests/%s' % (build, variant)
+    targets = ['%s/%s/tests/%s' % (options.build_dir, build, variant)
               for build in builds
               for variant in variants]
 else:
@ -103,17 +106,36 @@ else:
    # If we ever get a quick SPARC_FS test, this code should be removed
    if 'quick' in tests and 'SPARC_FS' in builds:
        builds.remove('SPARC_FS')
-    targets = ['build/%s/tests/%s/%s' % (build, variant, test)
+    targets = ['%s/%s/tests/%s/%s' % (options.build_dir, build, variant, test)
               for build in builds
               for variant in variants
               for test in tests]

+def cpu_count():
+    if 'bsd' in sys.platform or sys.platform == 'darwin':
+        try:
+            return int(os.popen('sysctl -n hw.ncpu').read())
+        except ValueError:
+            pass
+    else:
+        try:
+            return os.sysconf('SC_NPROCESSORS_ONLN')
+        except (ValueError, OSError, AttributeError):
+            pass
+
+    raise NotImplementedError('cannot determine number of cpus')
+
 scons_opts = options.scons_opts
 if options.jobs != 1:
+    if options.jobs == 0:
+        options.jobs = cpu_count()
    scons_opts += ' -j %d' % options.jobs
 if options.keep_going:
    scons_opts += ' -k'

-system('scons IGNORE_STYLE=True %s %s' % (scons_opts, ' '.join(targets)))
-
-sys.exit(0)
+cmd = 'scons IGNORE_STYLE=True %s %s' % (scons_opts, ' '.join(targets))
+if options.no_exec:
+    print cmd
+else:
+    system(cmd)
+    sys.exit(0)