diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index 141ecfd8e..e42a92ba1 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -133,6 +133,6 @@ m5.instantiate(root)
 if options.maxtick:
     exit_event = m5.simulate(options.maxtick)
 else:
-    exit_event = m5.simulate()
+    exit_event = m5.simulate(10000000000000)
 
 print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause()
diff --git a/configs/splash2/run.py b/configs/splash2/run.py
index 93b166d77..7d56cb830 100644
--- a/configs/splash2/run.py
+++ b/configs/splash2/run.py
@@ -262,7 +262,7 @@ m5.instantiate(root)
 if options.maxtick:
     exit_event = m5.simulate(options.maxtick)
 else:
-    exit_event = m5.simulate()
+    exit_event = m5.simulate(1000000000000)
 
 print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause()
 
diff --git a/src/SConscript b/src/SConscript
index 44bcb5320..385047f7f 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -129,12 +129,13 @@ base_sources = Split('''
 
         mem/cache/cache_builder.cc
 
+        python/swig/main_wrap.cc
+
 	sim/builder.cc
 	sim/debug.cc
 	sim/eventq.cc
 	sim/faults.cc
 	sim/main.cc
-        python/swig/cc_main_wrap.cc
 	sim/param.cc
 	sim/root.cc
 	sim/serialize.cc
@@ -316,16 +317,17 @@ else:
 
 makeEnv('debug', '.do',
         CCFLAGS = Split('%s -O0' % debug_flag),
-        CPPDEFINES = 'DEBUG')
+        CPPDEFINES = ['DEBUG', 'TRACING_ON=1'])
 
 # Optimized binary
 makeEnv('opt', '.o',
-        CCFLAGS = Split('-g -O3'))
+        CCFLAGS = Split('-g -O3'),
+        CPPDEFINES = ['TRACING_ON=1'])
 
 # "Fast" binary
 makeEnv('fast', '.fo', strip = True,
         CCFLAGS = Split('-O3'),
-        CPPDEFINES = 'NDEBUG')
+        CPPDEFINES = ['NDEBUG', 'TRACING_ON=0'])
 
 # Profiled binary
 makeEnv('prof', '.po',
diff --git a/src/base/trace.hh b/src/base/trace.hh
index 8df5dd893..9b053990c 100644
--- a/src/base/trace.hh
+++ b/src/base/trace.hh
@@ -39,14 +39,6 @@
 #include "sim/host.hh"
 #include "sim/root.hh"
 
-#ifndef TRACING_ON
-#ifndef NDEBUG
-#define TRACING_ON	1
-#else
-#define TRACING_ON	0
-#endif
-#endif
-
 #include "base/traceflags.hh"
 
 namespace Trace {
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 9257778ef..788f77e3a 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -155,6 +155,10 @@ class BaseCPU : public MemObject
         int cpu_id;
 #if FULL_SYSTEM
         Tick profile;
+
+        bool do_statistics_insts;
+        bool do_checkpoint_insts;
+        bool do_quiesce;
 #endif
         Tick progress_interval;
         BaseCPU *checker;
diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc
index be8ad8de6..09ccc7f65 100644
--- a/src/cpu/o3/alpha/cpu_builder.cc
+++ b/src/cpu/o3/alpha/cpu_builder.cc
@@ -57,6 +57,10 @@ Param<int> cpu_id;
 SimObjectParam<AlphaISA::ITB *> itb;
 SimObjectParam<AlphaISA::DTB *> dtb;
 Param<Tick> profile;
+
+Param<bool> do_quiesce;
+Param<bool> do_checkpoint_insts;
+Param<bool> do_statistics_insts;
 #else
 SimObjectVectorParam<Process *> workload;
 #endif // FULL_SYSTEM
@@ -163,6 +167,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
     INIT_PARAM(itb, "Instruction translation buffer"),
     INIT_PARAM(dtb, "Data translation buffer"),
     INIT_PARAM(profile, ""),
+
+    INIT_PARAM(do_quiesce, ""),
+    INIT_PARAM(do_checkpoint_insts, ""),
+    INIT_PARAM(do_statistics_insts, ""),
 #else
     INIT_PARAM(workload, "Processes to run"),
 #endif // FULL_SYSTEM
@@ -306,6 +314,10 @@ CREATE_SIM_OBJECT(DerivO3CPU)
     params->itb = itb;
     params->dtb = dtb;
     params->profile = profile;
+
+    params->do_quiesce = do_quiesce;
+    params->do_checkpoint_insts = do_checkpoint_insts;
+    params->do_statistics_insts = do_statistics_insts;
 #else
     params->workload = workload;
 #endif // FULL_SYSTEM
diff --git a/src/cpu/ozone/cpu_builder.cc b/src/cpu/ozone/cpu_builder.cc
index 39337dbff..155f0ce09 100644
--- a/src/cpu/ozone/cpu_builder.cc
+++ b/src/cpu/ozone/cpu_builder.cc
@@ -64,6 +64,10 @@ Param<int> cpu_id;
 SimObjectParam<TheISA::ITB *> itb;
 SimObjectParam<TheISA::DTB *> dtb;
 Param<Tick> profile;
+
+Param<bool> do_quiesce;
+Param<bool> do_checkpoint_insts;
+Param<bool> do_statistics_insts
 #else
 SimObjectVectorParam<Process *> workload;
 //SimObjectParam<PageTable *> page_table;
@@ -184,6 +188,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
     INIT_PARAM(itb, "Instruction translation buffer"),
     INIT_PARAM(dtb, "Data translation buffer"),
     INIT_PARAM(profile, ""),
+    INIT_PARAM(do_quiesce, ""),
+    INIT_PARAM(do_checkpoint_insts, ""),
+    INIT_PARAM(do_statistics_insts, ""),
 #else
     INIT_PARAM(workload, "Processes to run"),
 //    INIT_PARAM(page_table, "Page table"),
@@ -341,6 +348,9 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
     params->itb = itb;
     params->dtb = dtb;
     params->profile = profile;
+    params->do_quiesce = do_quiesce;
+    params->do_checkpoint_insts = do_checkpoint_insts;
+    params->do_statistics_insts = do_statistics_insts;
 #else
     params->workload = workload;
 //    params->pTable = page_table;
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 4f68cfd6f..e9679cc7c 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -500,6 +500,10 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU)
     SimObjectParam<TheISA::ITB *> itb;
     SimObjectParam<TheISA::DTB *> dtb;
     Param<Tick> profile;
+
+    Param<bool> do_quiesce;
+    Param<bool> do_checkpoint_insts;
+    Param<bool> do_statistics_insts;
 #else
     SimObjectParam<Process *> workload;
 #endif // FULL_SYSTEM
@@ -532,6 +536,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU)
     INIT_PARAM(itb, "Instruction TLB"),
     INIT_PARAM(dtb, "Data TLB"),
     INIT_PARAM(profile, ""),
+    INIT_PARAM(do_quiesce, ""),
+    INIT_PARAM(do_checkpoint_insts, ""),
+    INIT_PARAM(do_statistics_insts, ""),
 #else
     INIT_PARAM(workload, "processes to run"),
 #endif // FULL_SYSTEM
@@ -569,6 +576,9 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU)
     params->itb = itb;
     params->dtb = dtb;
     params->profile = profile;
+    params->do_quiesce = do_quiesce;
+    params->do_checkpoint_insts = do_checkpoint_insts;
+    params->do_statistics_insts = do_statistics_insts;
 #else
     params->process = workload;
 #endif
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index abf316095..db2c940c0 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -665,6 +665,10 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU)
     SimObjectParam<TheISA::ITB *> itb;
     SimObjectParam<TheISA::DTB *> dtb;
     Param<Tick> profile;
+
+    Param<bool> do_quiesce;
+    Param<bool> do_checkpoint_insts;
+    Param<bool> do_statistics_insts;
 #else
     SimObjectParam<Process *> workload;
 #endif // FULL_SYSTEM
@@ -697,6 +701,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU)
     INIT_PARAM(itb, "Instruction TLB"),
     INIT_PARAM(dtb, "Data TLB"),
     INIT_PARAM(profile, ""),
+    INIT_PARAM(do_quiesce, ""),
+    INIT_PARAM(do_checkpoint_insts, ""),
+    INIT_PARAM(do_statistics_insts, ""),
 #else
     INIT_PARAM(workload, "processes to run"),
 #endif // FULL_SYSTEM
@@ -732,6 +739,9 @@ CREATE_SIM_OBJECT(TimingSimpleCPU)
     params->itb = itb;
     params->dtb = dtb;
     params->profile = profile;
+    params->do_quiesce = do_quiesce;
+    params->do_checkpoint_insts = do_checkpoint_insts;
+    params->do_statistics_insts = do_statistics_insts;
 #else
     params->process = workload;
 #endif
diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index c26d7782b..c16cb6945 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -102,21 +102,56 @@ BaseCache::CachePort::recvAtomic(PacketPtr pkt)
     return cache->doAtomicAccess(pkt, isCpuSide);
 }
 
-void
-BaseCache::CachePort::recvFunctional(PacketPtr pkt)
+bool
+BaseCache::CachePort::checkFunctional(PacketPtr pkt)
 {
     //Check storage here first
     list<PacketPtr>::iterator i = drainList.begin();
-    list<PacketPtr>::iterator end = drainList.end();
-    for (; i != end; ++i) {
+    list<PacketPtr>::iterator iend = drainList.end();
+    bool notDone = true;
+    while (i != iend && notDone) {
         PacketPtr target = *i;
         // If the target contains data, and it overlaps the
         // probed request, need to update data
         if (target->intersect(pkt)) {
-            fixPacket(pkt, target);
+            DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a drain\n",
+                    pkt->cmdString(), pkt->getAddr() & ~(cache->getBlockSize() - 1));
+            notDone = fixPacket(pkt, target);
         }
+        i++;
     }
-    cache->doFunctionalAccess(pkt, isCpuSide);
+    //Also check the response not yet ready to be on the list
+    std::list<std::pair<Tick,PacketPtr> >::iterator j = transmitList.begin();
+    std::list<std::pair<Tick,PacketPtr> >::iterator jend = transmitList.end();
+
+    while (j != jend && notDone) {
+        PacketPtr target = j->second;
+        // If the target contains data, and it overlaps the
+        // probed request, need to update data
+        if (target->intersect(pkt)) {
+            DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a response\n",
+                    pkt->cmdString(), pkt->getAddr() & ~(cache->getBlockSize() - 1));
+            notDone = fixDelayedResponsePacket(pkt, target);
+        }
+        j++;
+    }
+    return notDone;
+}
+
+void
+BaseCache::CachePort::recvFunctional(PacketPtr pkt)
+{
+    bool notDone = checkFunctional(pkt);
+    if (notDone)
+        cache->doFunctionalAccess(pkt, isCpuSide);
+}
+
+void
+BaseCache::CachePort::checkAndSendFunctional(PacketPtr pkt)
+{
+    bool notDone = checkFunctional(pkt);
+    if (notDone)
+        sendFunctional(pkt);
 }
 
 void
@@ -135,7 +170,7 @@ BaseCache::CachePort::recvRetry()
                 isCpuSide && cache->doSlaveRequest()) {
 
                 DPRINTF(CachePort, "%s has more responses/requests\n", name());
-                BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this);
+                BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this, false);
                 reqCpu->schedule(curTick + 1);
             }
             waitingOnRetry = false;
@@ -176,7 +211,7 @@ BaseCache::CachePort::recvRetry()
         {
             DPRINTF(CachePort, "%s has more requests\n", name());
             //Still more to issue, rerequest in 1 cycle
-            BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this);
+            BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this, false);
             reqCpu->schedule(curTick + 1);
         }
     }
@@ -194,7 +229,7 @@ BaseCache::CachePort::recvRetry()
         {
             DPRINTF(CachePort, "%s has more requests\n", name());
             //Still more to issue, rerequest in 1 cycle
-            BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this);
+            BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(this, false);
             reqCpu->schedule(curTick + 1);
         }
     }
@@ -226,23 +261,19 @@ BaseCache::CachePort::clearBlocked()
     }
 }
 
-BaseCache::CacheEvent::CacheEvent(CachePort *_cachePort)
-    : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort)
+BaseCache::CacheEvent::CacheEvent(CachePort *_cachePort, bool _newResponse)
+    : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort),
+      newResponse(_newResponse)
 {
-    this->setFlags(AutoDelete);
+    if (!newResponse)
+        this->setFlags(AutoDelete);
     pkt = NULL;
 }
 
-BaseCache::CacheEvent::CacheEvent(CachePort *_cachePort, PacketPtr _pkt)
-    : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort), pkt(_pkt)
-{
-    this->setFlags(AutoDelete);
-}
-
 void
 BaseCache::CacheEvent::process()
 {
-    if (!pkt)
+    if (!newResponse)
     {
         if (cachePort->waitingOnRetry) return;
        //We have some responses to drain first
@@ -322,8 +353,16 @@ BaseCache::CacheEvent::process()
         }
         return;
     }
-    //Response
-    //Know the packet to send
+    //Else it's a response
+    assert(cachePort->transmitList.size());
+    assert(cachePort->transmitList.front().first <= curTick);
+    pkt = cachePort->transmitList.front().second;
+    cachePort->transmitList.pop_front();
+    if (!cachePort->transmitList.empty()) {
+        Tick time = cachePort->transmitList.front().first;
+        schedule(time <= curTick ? curTick+1 : time);
+    }
+
     if (pkt->flags & NACKED_LINE)
         pkt->result = Packet::Nacked;
     else
@@ -343,7 +382,7 @@ BaseCache::CacheEvent::process()
     }
 
     // Check if we're done draining once this list is empty
-    if (cachePort->drainList.empty())
+    if (cachePort->drainList.empty() && cachePort->transmitList.empty())
         cachePort->cache->checkDrain();
 }
 
@@ -358,8 +397,10 @@ BaseCache::getPort(const std::string &if_name, int idx)
 {
     if (if_name == "")
     {
-        if(cpuSidePort == NULL)
+        if(cpuSidePort == NULL) {
             cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true);
+            sendEvent = new CacheEvent(cpuSidePort, true);
+        }
         return cpuSidePort;
     }
     else if (if_name == "functional")
@@ -368,8 +409,10 @@ BaseCache::getPort(const std::string &if_name, int idx)
     }
     else if (if_name == "cpu_side")
     {
-        if(cpuSidePort == NULL)
+        if(cpuSidePort == NULL) {
             cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true);
+            sendEvent = new CacheEvent(cpuSidePort, true);
+        }
         return cpuSidePort;
     }
     else if (if_name == "mem_side")
@@ -377,6 +420,7 @@ BaseCache::getPort(const std::string &if_name, int idx)
         if (memSidePort != NULL)
             panic("Already have a mem side for this cache\n");
         memSidePort = new CachePort(name() + "-mem_side_port", this, false);
+        memSendEvent = new CacheEvent(memSidePort, true);
         return memSidePort;
     }
     else panic("Port name %s unrecognized\n", if_name);
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index ea7544fbb..584c2d5df 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -105,7 +105,11 @@ class BaseCache : public MemObject
 
         void clearBlocked();
 
-        bool canDrain() { return drainList.empty(); }
+        bool checkFunctional(PacketPtr pkt);
+
+        void checkAndSendFunctional(PacketPtr pkt);
+
+        bool canDrain() { return drainList.empty() && transmitList.empty(); }
 
         bool blocked;
 
@@ -117,15 +121,16 @@ class BaseCache : public MemObject
 
         std::list<PacketPtr> drainList;
 
+        std::list<std::pair<Tick,PacketPtr> > transmitList;
     };
 
     struct CacheEvent : public Event
     {
         CachePort *cachePort;
         PacketPtr pkt;
+        bool newResponse;
 
-        CacheEvent(CachePort *_cachePort);
-        CacheEvent(CachePort *_cachePort, PacketPtr _pkt);
+        CacheEvent(CachePort *_cachePort, bool response);
         void process();
         const char *description();
     };
@@ -133,6 +138,9 @@ class BaseCache : public MemObject
   public: //Made public so coherence can get at it.
     CachePort *cpuSidePort;
 
+    CacheEvent *sendEvent;
+    CacheEvent *memSendEvent;
+
   protected:
     CachePort *memSidePort;
 
@@ -353,6 +361,12 @@ class BaseCache : public MemObject
         snoopRangesSent = false;
     }
 
+    ~BaseCache()
+    {
+        delete sendEvent;
+        delete memSendEvent;
+    }
+
     virtual void init();
 
     /**
@@ -467,7 +481,8 @@ class BaseCache : public MemObject
     {
         if (!doMasterRequest() && !memSidePort->waitingOnRetry)
         {
-            BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(memSidePort);
+            BaseCache::CacheEvent * reqCpu =
+                new BaseCache::CacheEvent(memSidePort, false);
             reqCpu->schedule(time);
         }
         uint8_t flag = 1<<cause;
@@ -503,7 +518,8 @@ class BaseCache : public MemObject
     {
         if (!doSlaveRequest() && !cpuSidePort->waitingOnRetry)
         {
-            BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(cpuSidePort);
+            BaseCache::CacheEvent * reqCpu =
+                new BaseCache::CacheEvent(cpuSidePort, false);
             reqCpu->schedule(time);
         }
         uint8_t flag = 1<<cause;
@@ -528,9 +544,44 @@ class BaseCache : public MemObject
      */
     void respond(PacketPtr pkt, Tick time)
     {
+        assert(time >= curTick);
         if (pkt->needsResponse()) {
-            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
+/*            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
             reqCpu->schedule(time);
+*/
+            if (cpuSidePort->transmitList.empty()) {
+                assert(!sendEvent->scheduled());
+                sendEvent->schedule(time);
+                cpuSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
+                                                    (time,pkt));
+                return;
+            }
+
+            // something is on the list and this belongs at the end
+            if (time >= cpuSidePort->transmitList.back().first) {
+                cpuSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
+                                                    (time,pkt));
+                return;
+            }
+            // Something is on the list and this belongs somewhere else
+            std::list<std::pair<Tick,PacketPtr> >::iterator i =
+                cpuSidePort->transmitList.begin();
+            std::list<std::pair<Tick,PacketPtr> >::iterator end =
+                cpuSidePort->transmitList.end();
+            bool done = false;
+
+            while (i != end && !done) {
+                if (time < i->first) {
+                    if (i == cpuSidePort->transmitList.begin()) {
+                        //Inserting at begining, reschedule
+                        sendEvent->reschedule(time);
+                    }
+                    cpuSidePort->transmitList.insert(i,std::pair<Tick,PacketPtr>
+                                                     (time,pkt));
+                    done = true;
+                }
+                i++;
+            }
         }
         else {
             if (pkt->cmd != Packet::UpgradeReq)
@@ -548,12 +599,48 @@ class BaseCache : public MemObject
      */
     void respondToMiss(PacketPtr pkt, Tick time)
     {
+        assert(time >= curTick);
         if (!pkt->req->isUncacheable()) {
-            missLatency[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += time - pkt->time;
+            missLatency[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
+                time - pkt->time;
         }
         if (pkt->needsResponse()) {
-            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
+/*            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
             reqCpu->schedule(time);
+*/
+            if (cpuSidePort->transmitList.empty()) {
+                assert(!sendEvent->scheduled());
+                sendEvent->schedule(time);
+                cpuSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
+                                                    (time,pkt));
+                return;
+            }
+
+            // something is on the list and this belongs at the end
+            if (time >= cpuSidePort->transmitList.back().first) {
+                cpuSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
+                                                    (time,pkt));
+                return;
+            }
+            // Something is on the list and this belongs somewhere else
+            std::list<std::pair<Tick,PacketPtr> >::iterator i =
+                cpuSidePort->transmitList.begin();
+            std::list<std::pair<Tick,PacketPtr> >::iterator end =
+                cpuSidePort->transmitList.end();
+            bool done = false;
+
+            while (i != end && !done) {
+                if (time < i->first) {
+                    if (i == cpuSidePort->transmitList.begin()) {
+                        //Inserting at begining, reschedule
+                        sendEvent->reschedule(time);
+                    }
+                    cpuSidePort->transmitList.insert(i,std::pair<Tick,PacketPtr>
+                                                     (time,pkt));
+                    done = true;
+                }
+                i++;
+            }
         }
         else {
             if (pkt->cmd != Packet::UpgradeReq)
@@ -570,9 +657,43 @@ class BaseCache : public MemObject
      */
     void respondToSnoop(PacketPtr pkt, Tick time)
     {
+        assert(time >= curTick);
         assert (pkt->needsResponse());
-        CacheEvent *reqMem = new CacheEvent(memSidePort, pkt);
+/*        CacheEvent *reqMem = new CacheEvent(memSidePort, pkt);
         reqMem->schedule(time);
+*/
+        if (memSidePort->transmitList.empty()) {
+            assert(!memSendEvent->scheduled());
+            memSendEvent->schedule(time);
+            memSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
+                                                (time,pkt));
+            return;
+        }
+
+        // something is on the list and this belongs at the end
+        if (time >= memSidePort->transmitList.back().first) {
+            memSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
+                                                (time,pkt));
+            return;
+        }
+        // Something is on the list and this belongs somewhere else
+        std::list<std::pair<Tick,PacketPtr> >::iterator i =
+            memSidePort->transmitList.begin();
+        std::list<std::pair<Tick,PacketPtr> >::iterator end =
+            memSidePort->transmitList.end();
+        bool done = false;
+
+        while (i != end && !done) {
+            if (time < i->first) {
+                if (i == memSidePort->transmitList.begin()) {
+                    //Inserting at begining, reschedule
+                    memSendEvent->reschedule(time);
+                }
+                memSidePort->transmitList.insert(i,std::pair<Tick,PacketPtr>(time,pkt));
+                done = true;
+            }
+            i++;
+        }
     }
 
     /**
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 9bb72e85c..df59b0a4f 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -53,6 +53,8 @@
 
 #include "sim/sim_exit.hh" // for SimExitEvent
 
+bool SIGNAL_NACK_HACK;
+
 template<class TagStore, class Buffering, class Coherence>
 bool
 Cache<TagStore,Buffering,Coherence>::
@@ -242,6 +244,11 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
         missQueue->handleMiss(pkt, size, curTick + hitLatency);
     }
 
+    if (pkt->cmd == Packet::Writeback) {
+        //Need to clean up the packet on a writeback miss, but leave the request
+        delete pkt;
+    }
+
     return true;
 }
 
@@ -265,6 +272,7 @@ Cache<TagStore,Buffering,Coherence>::getPacket()
 
     assert(!doMasterRequest() || missQueue->havePending());
     assert(!pkt || pkt->time <= curTick);
+    SIGNAL_NACK_HACK = false;
     return pkt;
 }
 
@@ -273,16 +281,15 @@ void
 Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr,
                                                 bool success)
 {
-    if (success && !(pkt && (pkt->flags & NACKED_LINE))) {
-        if (!mshr->pkt->needsResponse()
-            && !(mshr->pkt->cmd == Packet::UpgradeReq)
-            && (pkt && (pkt->flags & SATISFIED))) {
-            //Writeback, clean up the non copy version of the packet
-            delete pkt;
-        }
+    if (success && !(SIGNAL_NACK_HACK)) {
+        //Remember if it was an upgrade because writeback MSHR's are removed
+        //in Mark in Service
+        bool upgrade = (mshr->pkt && mshr->pkt->cmd == Packet::UpgradeReq);
+
         missQueue->markInService(mshr->pkt, mshr);
+
         //Temp Hack for UPGRADES
-        if (mshr->pkt && mshr->pkt->cmd == Packet::UpgradeReq) {
+        if (upgrade) {
             assert(pkt);  //Upgrades need to be fixed
             pkt->flags &= ~CACHE_LINE_FILL;
             BlkType *blk = tags->findBlock(pkt);
@@ -300,6 +307,7 @@ Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr,
         }
     } else if (pkt && !pkt->req->isUncacheable()) {
         pkt->flags &= ~NACKED_LINE;
+        SIGNAL_NACK_HACK = false;
         pkt->flags &= ~SATISFIED;
         pkt->flags &= ~SNOOP_COMMIT;
 
@@ -333,6 +341,8 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(PacketPtr &pkt)
         DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
 
         if (pkt->isCacheFill() && !pkt->isNoAllocate()) {
+            DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
+                    pkt->getAddr());
             blk = tags->findBlock(pkt);
             CacheBlk::State old_state = (blk) ? blk->status : 0;
             PacketList writebacks;
@@ -402,6 +412,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(PacketPtr &pkt)
                     assert(!(pkt->flags & SATISFIED));
                     pkt->flags |= SATISFIED;
                     pkt->flags |= NACKED_LINE;
+                    SIGNAL_NACK_HACK = true;
                     ///@todo NACK's from other levels
                     //warn("NACKs from devices not connected to the same bus "
                     //"not implemented\n");
@@ -474,6 +485,13 @@ Cache<TagStore,Buffering,Coherence>::snoop(PacketPtr &pkt)
     }
     CacheBlk::State new_state;
     bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
+
+    if (blk && mshr && !mshr->inService && new_state == 0) {
+            //There was a outstanding write to a shared block, not need ReadEx
+            //not update, so change No Allocate param in MSHR
+            mshr->pkt->flags &= ~NO_ALLOCATE;
+    }
+
     if (satisfy) {
         DPRINTF(Cache, "Cache snooped a %s request for addr %x and "
                 "now supplying data, new state is %i\n",
@@ -486,6 +504,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(PacketPtr &pkt)
     if (blk)
         DPRINTF(Cache, "Cache snooped a %s request for addr %x, "
                 "new state is %i\n", pkt->cmdString(), blk_addr, new_state);
+
     tags->handleSnoop(blk, new_state);
 }
 
@@ -534,9 +553,9 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update,
         }
     }
 
-    if (!update && (pkt->isWrite() || (otherSidePort == cpuSidePort))) {
+    if (!update && (otherSidePort == cpuSidePort)) {
         // Still need to change data in all locations.
-        otherSidePort->sendFunctional(pkt);
+        otherSidePort->checkAndSendFunctional(pkt);
         if (pkt->isRead() && pkt->result == Packet::Success)
             return 0;
     }
@@ -560,30 +579,33 @@ Cache<TagStore,Buffering,Coherence>::probe(PacketPtr &pkt, bool update,
     missQueue->findWrites(blk_addr, writes);
 
     if (!update) {
+        bool notDone = !(pkt->flags & SATISFIED); //Hit in cache (was a block)
         // Check for data in MSHR and writebuffer.
         if (mshr) {
             MSHR::TargetList *targets = mshr->getTargetList();
             MSHR::TargetList::iterator i = targets->begin();
             MSHR::TargetList::iterator end = targets->end();
-            for (; i != end; ++i) {
+            for (; i != end && notDone; ++i) {
                 PacketPtr target = *i;
                 // If the target contains data, and it overlaps the
                 // probed request, need to update data
                 if (target->intersect(pkt)) {
-                    fixPacket(pkt, target);
+                    DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a MSHR\n",
+                            pkt->cmdString(), blk_addr);
+                    notDone = fixPacket(pkt, target);
                 }
             }
         }
-        for (int i = 0; i < writes.size(); ++i) {
+        for (int i = 0; i < writes.size() && notDone; ++i) {
             PacketPtr write = writes[i]->pkt;
             if (write->intersect(pkt)) {
-                fixPacket(pkt, write);
+                DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a writeback\n",
+                        pkt->cmdString(), blk_addr);
+                notDone = fixPacket(pkt, write);
             }
         }
-        if (pkt->isRead()
-            && pkt->result != Packet::Success
-            && otherSidePort == memSidePort) {
-            otherSidePort->sendFunctional(pkt);
+        if (notDone && otherSidePort == memSidePort) {
+            otherSidePort->checkAndSendFunctional(pkt);
             assert(pkt->result == Packet::Success);
         }
         return 0;
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index d3a7a7933..6cb62429d 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -198,11 +198,6 @@ MSHRQueue::markInService(MSHR* mshr)
     //assert(mshr == pendingList.front());
     if (!mshr->pkt->needsResponse() && !(mshr->pkt->cmd == Packet::UpgradeReq)) {
         assert(mshr->getNumTargets() == 0);
-        if ((mshr->pkt->flags & SATISFIED) && (mshr->pkt->cmd == Packet::Writeback)) {
-            //Writeback hit, so delete it
-            //otherwise the consumer will delete it
-            delete mshr->pkt->req;
-        }
         deallocate(mshr);
         return;
     }
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index a342af634..e2faf4527 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -143,6 +143,24 @@ Packet::intersect(PacketPtr p)
     return !(s1 > e2 || e1 < s2);
 }
 
+bool
+fixDelayedResponsePacket(PacketPtr func, PacketPtr timing)
+{
+    bool result;
+
+    if (timing->isRead() || timing->isWrite()) {
+        timing->toggleData();
+        result = fixPacket(func, timing);
+        timing->toggleData();
+    }
+    else {
+        //Don't toggle if it isn't a read/write response
+        result = fixPacket(func, timing);
+    }
+
+    return result;
+}
+
 bool
 fixPacket(PacketPtr func, PacketPtr timing)
 {
@@ -168,6 +186,7 @@ fixPacket(PacketPtr func, PacketPtr timing)
             memcpy(func->getPtr<uint8_t>(), timing->getPtr<uint8_t>() +
                     funcStart - timingStart, func->getSize());
             func->result = Packet::Success;
+            func->flags |= SATISFIED;
             return false;
         } else {
             // In this case the timing packet only partially satisfies the
@@ -182,11 +201,11 @@ fixPacket(PacketPtr func, PacketPtr timing)
         if (funcStart >= timingStart) {
             memcpy(timing->getPtr<uint8_t>() + (funcStart - timingStart),
                    func->getPtr<uint8_t>(),
-                   std::min(funcEnd, timingEnd) - funcStart);
+                   (std::min(funcEnd, timingEnd) - funcStart) + 1);
         } else { // timingStart > funcStart
             memcpy(timing->getPtr<uint8_t>(),
                    func->getPtr<uint8_t>() + (timingStart - funcStart),
-                   std::min(funcEnd, timingEnd) - timingStart);
+                   (std::min(funcEnd, timingEnd) - timingStart) + 1);
         }
         // we always want to keep going with a write
         return true;
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index cb97dd036..2bc51bf12 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -344,6 +344,13 @@ class Packet
         srcValid = false;
     }
 
+
+    void toggleData() {
+        int icmd = (int)cmd;
+        icmd ^= HasData;
+        cmd = (Command)icmd;
+    }
+
     /**
      * Take a request packet and modify it in place to be suitable for
      * returning as a response to that request.
@@ -448,7 +455,6 @@ class Packet
     bool intersect(PacketPtr p);
 };
 
-
 /** This function given a functional packet and a timing packet either satisfies
  * the timing packet, or updates the timing packet to reflect the updated state
  * in the timing packet. It returns if the functional packet should continue to
@@ -456,6 +462,12 @@ class Packet
  */
 bool fixPacket(PacketPtr func, PacketPtr timing);
 
+/** This function is a wrapper for the fixPacket field that toggles the hasData bit
+ * it is used when a response is waiting in the caches, but hasn't been marked as a
+ * response yet (so the fixPacket needs to get the correct value for the hasData)
+ */
+bool fixDelayedResponsePacket(PacketPtr func, PacketPtr timing);
+
 std::ostream & operator<<(std::ostream &o, const Packet &p);
 
 #endif //__MEM_PACKET_HH
diff --git a/src/mem/physical.cc b/src/mem/physical.cc
index 39eb63108..94f60ad80 100644
--- a/src/mem/physical.cc
+++ b/src/mem/physical.cc
@@ -288,6 +288,21 @@ PhysicalMemory::MemoryPort::recvAtomic(PacketPtr pkt)
 void
 PhysicalMemory::MemoryPort::recvFunctional(PacketPtr pkt)
 {
+    //Since we are overriding the function, make sure to have the impl of the
+    //check or functional accesses here.
+    std::list<std::pair<Tick,PacketPtr> >::iterator i = transmitList.begin();
+    std::list<std::pair<Tick,PacketPtr> >::iterator end = transmitList.end();
+    bool notDone = true;
+
+    while (i != end && notDone) {
+        PacketPtr target = i->second;
+        // If the target contains data, and it overlaps the
+        // probed request, need to update data
+        if (target->intersect(pkt))
+            notDone = fixPacket(pkt, target);
+        i++;
+    }
+
     // Default implementation of SimpleTimingPort::recvFunctional()
     // calls recvAtomic() and throws away the latency; we can save a
     // little here by just not calculating the latency.
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index 086d91279..c43c9aac0 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -35,14 +35,14 @@ SimpleTimingPort::recvFunctional(PacketPtr pkt)
 {
     std::list<std::pair<Tick,PacketPtr> >::iterator i = transmitList.begin();
     std::list<std::pair<Tick,PacketPtr> >::iterator end = transmitList.end();
-    bool done = false;
+    bool notDone = true;
 
-    while (i != end && !done) {
+    while (i != end && notDone) {
         PacketPtr target = i->second;
         // If the target contains data, and it overlaps the
         // probed request, need to update data
         if (target->intersect(pkt))
-            done = fixPacket(pkt, target);
+            notDone = fixPacket(pkt, target);
 
         i++;
     }
@@ -118,8 +118,14 @@ SimpleTimingPort::sendTiming(PacketPtr pkt, Tick time)
     bool done = false;
 
     while (i != end && !done) {
-        if (time+curTick < i->first)
+        if (time+curTick < i->first) {
+            if (i == transmitList.begin()) {
+                //Inserting at begining, reschedule
+                sendEvent.reschedule(time+curTick);
+            }
             transmitList.insert(i,std::pair<Tick,PacketPtr>(time+curTick,pkt));
+            done = true;
+        }
         i++;
     }
 }
diff --git a/src/python/SConscript b/src/python/SConscript
index c9e713199..5c351c32a 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -98,12 +98,12 @@ pyzip_files.append('m5/defines.py')
 pyzip_files.append('m5/info.py')
 pyzip_files.append(join(env['ROOT'], 'util/pbs/jobfile.py'))
 
-env.Command(['swig/cc_main_wrap.cc', 'm5/cc_main.py'],
-            'swig/cc_main.i',
+env.Command(['swig/main_wrap.cc', 'm5/internal/main.py'],
+            'swig/main.i',
             '$SWIG $SWIGFLAGS -outdir ${TARGETS[1].dir} '
             '-o ${TARGETS[0]} $SOURCES')
 
-pyzip_dep_files.append('m5/cc_main.py')
+pyzip_dep_files.append('m5/internal/main.py')
 
 # Action function to build the zip archive.  Uses the PyZipFile module
 # included in the standard Python library.
diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py
index 18b3fff55..934358298 100644
--- a/src/python/m5/SimObject.py
+++ b/src/python/m5/SimObject.py
@@ -695,7 +695,7 @@ class SimObject(object):
     def getCCObject(self):
         if not self._ccObject:
             self._ccObject = -1 # flag to catch cycles in recursion
-            self._ccObject = cc_main.createSimObject(self.path())
+            self._ccObject = internal.main.createSimObject(self.path())
         elif self._ccObject == -1:
             raise RuntimeError, "%s: recursive call to getCCObject()" \
                   % self.path()
@@ -730,13 +730,13 @@ class SimObject(object):
             # i don't know if there's a better way to do this - calling
             # setMemoryMode directly from self._ccObject results in calling
             # SimObject::setMemoryMode, not the System::setMemoryMode
-            system_ptr = cc_main.convertToSystemPtr(self._ccObject)
+            system_ptr = internal.main.convertToSystemPtr(self._ccObject)
             system_ptr.setMemoryMode(mode)
         for child in self._children.itervalues():
             child.changeTiming(mode)
 
     def takeOverFrom(self, old_cpu):
-        cpu_ptr = cc_main.convertToBaseCPUPtr(old_cpu._ccObject)
+        cpu_ptr = internal.main.convertToBaseCPUPtr(old_cpu._ccObject)
         self._ccObject.takeOverFrom(cpu_ptr)
 
     # generate output file for 'dot' to display as a pretty graph.
@@ -795,8 +795,7 @@ def resolveSimObject(name):
 # short to avoid polluting other namespaces.
 __all__ = ['SimObject', 'ParamContext']
 
-
 # see comment on imports at end of __init__.py.
 import proxy
-import cc_main
+import internal
 import m5
diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py
index 579562b38..f39cc670a 100644
--- a/src/python/m5/__init__.py
+++ b/src/python/m5/__init__.py
@@ -30,11 +30,11 @@
 import atexit, os, sys
 
 # import the SWIG-wrapped main C++ functions
-import cc_main
+import internal
 # import a few SWIG-wrapped items (those that are likely to be used
 # directly by user scripts) completely into this module for
 # convenience
-from cc_main import simulate, SimLoopExitEvent
+from internal.main import simulate, SimLoopExitEvent
 
 # import the m5 compile options
 import defines
@@ -85,10 +85,10 @@ def instantiate(root):
     root.print_ini()
     sys.stdout.close() # close config.ini
     sys.stdout = sys.__stdout__ # restore to original
-    cc_main.loadIniFile(resolveSimObject)  # load config.ini into C++
+    internal.main.loadIniFile(resolveSimObject)  # load config.ini into C++
     root.createCCObject()
     root.connectPorts()
-    cc_main.finalInit()
+    internal.main.finalInit()
     noDot = True # temporary until we fix dot
     if not noDot:
        dot = pydot.Dot()
@@ -102,10 +102,10 @@ def instantiate(root):
 
 # Export curTick to user script.
 def curTick():
-    return cc_main.cvar.curTick
+    return internal.main.cvar.curTick
 
 # register our C++ exit callback function with Python
-atexit.register(cc_main.doExitCleanup)
+atexit.register(internal.main.doExitCleanup)
 
 # This loops until all objects have been fully drained.
 def doDrain(root):
@@ -119,7 +119,7 @@ def doDrain(root):
 # be drained.
 def drain(root):
     all_drained = False
-    drain_event = cc_main.createCountedDrain()
+    drain_event = internal.main.createCountedDrain()
     unready_objects = root.startDrain(drain_event, True)
     # If we've got some objects that can't drain immediately, then simulate
     if unready_objects > 0:
@@ -127,7 +127,7 @@ def drain(root):
         simulate()
     else:
         all_drained = True
-    cc_main.cleanupCountedDrain(drain_event)
+    internal.main.cleanupCountedDrain(drain_event)
     return all_drained
 
 def resume(root):
@@ -138,12 +138,12 @@ def checkpoint(root, dir):
         raise TypeError, "Object is not a root object. Checkpoint must be called on a root object."
     doDrain(root)
     print "Writing checkpoint"
-    cc_main.serializeAll(dir)
+    internal.main.serializeAll(dir)
     resume(root)
 
 def restoreCheckpoint(root, dir):
     print "Restoring from checkpoint"
-    cc_main.unserializeAll(dir)
+    internal.main.unserializeAll(dir)
     resume(root)
 
 def changeToAtomic(system):
@@ -152,7 +152,7 @@ def changeToAtomic(system):
         "called on a root object."
     doDrain(system)
     print "Changing memory mode to atomic"
-    system.changeTiming(cc_main.SimObject.Atomic)
+    system.changeTiming(internal.main.SimObject.Atomic)
 
 def changeToTiming(system):
     if not isinstance(system, objects.Root) and not isinstance(system, objects.System):
@@ -160,7 +160,7 @@ def changeToTiming(system):
         "called on a root object."
     doDrain(system)
     print "Changing memory mode to timing"
-    system.changeTiming(cc_main.SimObject.Timing)
+    system.changeTiming(internal.main.SimObject.Timing)
 
 def switchCpus(cpuList):
     print "switching cpus"
@@ -180,7 +180,7 @@ def switchCpus(cpuList):
             raise TypeError, "%s is not of type BaseCPU" % cpu
 
     # Drain all of the individual CPUs
-    drain_event = cc_main.createCountedDrain()
+    drain_event = internal.main.createCountedDrain()
     unready_cpus = 0
     for old_cpu in old_cpus:
         unready_cpus += old_cpu.startDrain(drain_event, False)
@@ -188,7 +188,7 @@ def switchCpus(cpuList):
     if unready_cpus > 0:
         drain_event.setCount(unready_cpus)
         simulate()
-    cc_main.cleanupCountedDrain(drain_event)
+    internal.main.cleanupCountedDrain(drain_event)
     # Now all of the CPUs are ready to be switched out
     for old_cpu in old_cpus:
         old_cpu._ccObject.switchOut()
diff --git a/src/python/m5/main.py b/src/python/m5/main.py
index ef37f62ac..1e224c0cf 100644
--- a/src/python/m5/main.py
+++ b/src/python/m5/main.py
@@ -211,7 +211,7 @@ def parse_args():
     return opts,args
 
 def main():
-    import cc_main
+    import internal
 
     parse_args()
 
@@ -249,7 +249,7 @@ def main():
         print "M5 Simulator System"
         print brief_copyright
         print
-        print "M5 compiled %s" % cc_main.cvar.compileDate;
+        print "M5 compiled %s" % internal.main.cvar.compileDate;
         print "M5 started %s" % datetime.now().ctime()
         print "M5 executing on %s" % socket.gethostname()
         print "command line:",
@@ -264,7 +264,7 @@ def main():
         usage(2)
 
     # tell C++ about output directory
-    cc_main.setOutputDir(options.outdir)
+    internal.main.setOutputDir(options.outdir)
 
     # update the system path with elements from the -p option
     sys.path[0:0] = options.path
diff --git a/src/python/m5/objects/BaseCPU.py b/src/python/m5/objects/BaseCPU.py
index b6e05627d..2f702a4bf 100644
--- a/src/python/m5/objects/BaseCPU.py
+++ b/src/python/m5/objects/BaseCPU.py
@@ -15,6 +15,12 @@ class BaseCPU(SimObject):
     cpu_id = Param.Int("CPU identifier")
 
     if build_env['FULL_SYSTEM']:
+        do_quiesce = Param.Bool(True, "enable quiesce instructions")
+        do_checkpoint_insts = Param.Bool(True,
+            "enable checkpoint pseudo instructions")
+        do_statistics_insts = Param.Bool(True,
+            "enable statistics pseudo instructions")
+
         if build_env['TARGET_ISA'] == 'sparc':
             dtb = Param.SparcDTB(SparcDTB(), "Data TLB")
             itb = Param.SparcITB(SparcITB(), "Instruction TLB")
diff --git a/src/python/m5/params.py b/src/python/m5/params.py
index 4b5953bcb..9e5f985c3 100644
--- a/src/python/m5/params.py
+++ b/src/python/m5/params.py
@@ -830,8 +830,9 @@ class PortRef(object):
         if self.ccConnected: # already done this
             return
         peer = self.peer
-        cc_main.connectPorts(self.simobj.getCCObject(), self.name, self.index,
-                             peer.simobj.getCCObject(), peer.name, peer.index)
+        internal.main.connectPorts(self.simobj.getCCObject(), self.name,
+                                   self.index, peer.simobj.getCCObject(),
+                                   peer.name, peer.index)
         self.ccConnected = True
         peer.ccConnected = True
 
@@ -970,4 +971,4 @@ __all__ = ['Param', 'VectorParam',
 from SimObject import isSimObject, isSimObjectSequence, isSimObjectClass
 import proxy
 import objects
-import cc_main
+import internal
diff --git a/src/sim/main.cc b/src/sim/main.cc
index 5b44102a8..6037283a4 100644
--- a/src/sim/main.cc
+++ b/src/sim/main.cc
@@ -117,7 +117,9 @@ abortHandler(int sigtype)
 #endif
 }
 
-extern "C" { void init_cc_main(); }
+extern "C" {
+void init_main();
+}
 
 int
 main(int argc, char **argv)
@@ -155,8 +157,8 @@ main(int argc, char **argv)
     Py_Initialize();
     PySys_SetArgv(argc, argv);
 
-    // initialize SWIG 'cc_main' module
-    init_cc_main();
+    // initialize SWIG 'm5.internal.main' module
+    init_main();
 
     PyRun_SimpleString("import m5.main");
     PyRun_SimpleString("m5.main.main()");
diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc
index 66036def1..4a8c0eb66 100644
--- a/src/sim/pseudo_inst.cc
+++ b/src/sim/pseudo_inst.cc
@@ -40,7 +40,6 @@
 #include "cpu/thread_context.hh"
 #include "cpu/quiesce_event.hh"
 #include "arch/kernel_stats.hh"
-#include "sim/param.hh"
 #include "sim/pseudo_inst.hh"
 #include "sim/serialize.hh"
 #include "sim/sim_exit.hh"
@@ -57,10 +56,6 @@ using namespace TheISA;
 
 namespace AlphaPseudo
 {
-    bool doStatisticsInsts;
-    bool doCheckpointInsts;
-    bool doQuiesce;
-
     void
     arm(ThreadContext *tc)
     {
@@ -71,7 +66,7 @@ namespace AlphaPseudo
     void
     quiesce(ThreadContext *tc)
     {
-        if (!doQuiesce)
+        if (!tc->getCpuPtr()->params->do_quiesce)
             return;
 
         DPRINTF(Quiesce, "%s: quiesce()\n", tc->getCpuPtr()->name());
@@ -84,7 +79,7 @@ namespace AlphaPseudo
     void
     quiesceNs(ThreadContext *tc, uint64_t ns)
     {
-        if (!doQuiesce || ns == 0)
+        if (!tc->getCpuPtr()->params->do_quiesce || ns == 0)
             return;
 
         EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent();
@@ -107,7 +102,7 @@ namespace AlphaPseudo
     void
     quiesceCycles(ThreadContext *tc, uint64_t cycles)
     {
-        if (!doQuiesce || cycles == 0)
+        if (!tc->getCpuPtr()->params->do_quiesce || cycles == 0)
             return;
 
         EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent();
@@ -197,7 +192,7 @@ namespace AlphaPseudo
     void
     resetstats(ThreadContext *tc, Tick delay, Tick period)
     {
-        if (!doStatisticsInsts)
+        if (!tc->getCpuPtr()->params->do_statistics_insts)
             return;
 
 
@@ -211,7 +206,7 @@ namespace AlphaPseudo
     void
     dumpstats(ThreadContext *tc, Tick delay, Tick period)
     {
-        if (!doStatisticsInsts)
+        if (!tc->getCpuPtr()->params->do_statistics_insts)
             return;
 
 
@@ -252,7 +247,7 @@ namespace AlphaPseudo
     void
     dumpresetstats(ThreadContext *tc, Tick delay, Tick period)
     {
-        if (!doStatisticsInsts)
+        if (!tc->getCpuPtr()->params->do_statistics_insts)
             return;
 
 
@@ -266,7 +261,7 @@ namespace AlphaPseudo
     void
     m5checkpoint(ThreadContext *tc, Tick delay, Tick period)
     {
-        if (!doCheckpointInsts)
+        if (!tc->getCpuPtr()->params->do_checkpoint_insts)
             return;
 
         Tick when = curTick + delay * Clock::Int::ns;
@@ -278,7 +273,7 @@ namespace AlphaPseudo
     uint64_t
     readfile(ThreadContext *tc, Addr vaddr, uint64_t len, uint64_t offset)
     {
-        const string &file = tc->getCpuPtr()->system->params()->readfile;
+        const string &file = tc->getSystemPtr()->params()->readfile;
         if (file.empty()) {
             return ULL(0);
         }
@@ -310,33 +305,6 @@ namespace AlphaPseudo
         return result;
     }
 
-    class Context : public ParamContext
-    {
-      public:
-        Context(const string &section) : ParamContext(section) {}
-        void checkParams();
-    };
-
-    Context context("pseudo_inst");
-
-    Param<bool> __quiesce(&context, "quiesce",
-                          "enable quiesce instructions",
-                          true);
-    Param<bool> __statistics(&context, "statistics",
-                             "enable statistics pseudo instructions",
-                             true);
-    Param<bool> __checkpoint(&context, "checkpoint",
-                             "enable checkpoint pseudo instructions",
-                             true);
-
-    void
-    Context::checkParams()
-    {
-        doQuiesce = __quiesce;
-        doStatisticsInsts = __statistics;
-        doCheckpointInsts = __checkpoint;
-    }
-
     void debugbreak(ThreadContext *tc)
     {
         debug_break();
diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
index 0426166d9..d34c19255 100644
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
@@ -1,40 +1,40 @@
 
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                          682                       # Number of BTB hits
-global.BPredUnit.BTBLookups                      2437                       # Number of BTB lookups
+global.BPredUnit.BTBHits                          675                       # Number of BTB hits
+global.BPredUnit.BTBLookups                      2343                       # Number of BTB lookups
 global.BPredUnit.RASInCorrect                      76                       # Number of incorrect RAS predictions.
-global.BPredUnit.condIncorrect                    443                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                   1570                       # Number of conditional branches predicted
-global.BPredUnit.lookups                         5322                       # Number of BP lookups
-global.BPredUnit.usedRAS                         2820                       # Number of times the RAS was used to get a target.
-host_inst_rate                                   9098                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 180112                       # Number of bytes of host memory used
-host_seconds                                     0.62                       # Real time elapsed on the host
-host_tick_rate                                2277354                       # Simulator tick rate (ticks/s)
-memdepunit.memDep.conflictingLoads                 27                       # Number of conflicting loads.
-memdepunit.memDep.conflictingStores               144                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                  3819                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 3727                       # Number of stores inserted to the mem dependence unit.
+global.BPredUnit.condIncorrect                    437                       # Number of conditional branches incorrect
+global.BPredUnit.condPredicted                   1563                       # Number of conditional branches predicted
+global.BPredUnit.lookups                         5229                       # Number of BP lookups
+global.BPredUnit.usedRAS                         2821                       # Number of times the RAS was used to get a target.
+host_inst_rate                                  15039                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 180156                       # Number of bytes of host memory used
+host_seconds                                     0.37                       # Real time elapsed on the host
+host_tick_rate                                3741816                       # Simulator tick rate (ticks/s)
+memdepunit.memDep.conflictingLoads                 23                       # Number of conflicting loads.
+memdepunit.memDep.conflictingStores               117                       # Number of conflicting stores.
+memdepunit.memDep.insertedLoads                  3775                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 3734                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        5623                       # Number of instructions simulated
 sim_seconds                                  0.000001                       # Number of seconds simulated
-sim_ticks                                     1408131                       # Number of ticks simulated
+sim_ticks                                     1400135                       # Number of ticks simulated
 system.cpu.commit.COM:branches                    862                       # Number of branches committed
-system.cpu.commit.COM:bw_lim_events                94                       # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events                97                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples        58722                      
+system.cpu.commit.COM:committed_per_cycle.samples        51243                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0        56096   9552.81%           
-                               1         1495    254.59%           
-                               2          457     77.82%           
-                               3          225     38.32%           
-                               4          133     22.65%           
-                               5           92     15.67%           
-                               6           98     16.69%           
-                               7           32      5.45%           
-                               8           94     16.01%           
+                               0        48519   9468.42%           
+                               1         1590    310.29%           
+                               2          483     94.26%           
+                               3          227     44.30%           
+                               4          131     25.56%           
+                               5          104     20.30%           
+                               6           61     11.90%           
+                               7           31      6.05%           
+                               8           97     18.93%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
@@ -43,69 +43,69 @@ system.cpu.commit.COM:loads                       979                       # Nu
 system.cpu.commit.COM:membars                       0                       # Number of memory barriers committed
 system.cpu.commit.COM:refs                       1791                       # Number of memory references committed
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts               374                       # The number of times a branch was mispredicted
+system.cpu.commit.branchMispredicts               368                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts           5640                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls              17                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts           13826                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts           13830                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                        5623                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                  5623                       # Number of Instructions Simulated
-system.cpu.cpi                             250.423439                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                       250.423439                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               1597                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  6940.988166                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency  6843.030303                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   1428                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency        1173027                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.105823                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                  169                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits                70                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency       677460                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.061991                       # mshr miss rate for ReadReq accesses
+system.cpu.cpi                             249.001423                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                       249.001423                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses               1600                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency  6986.684848                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency  6882.626263                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                   1435                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency        1152803                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.103125                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                  165                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_hits                66                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_miss_latency       681380                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.061875                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses              99                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses               812                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency  5305.074803                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency  5141.328767                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency  5293.047244                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency  5141.082192                       # average WriteReq mshr miss latency
 system.cpu.dcache.WriteReq_hits                   558                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       1347489                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency       1344434                       # number of WriteReq miss cycles
 system.cpu.dcache.WriteReq_miss_rate         0.312808                       # miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_misses                 254                       # number of WriteReq misses
 system.cpu.dcache.WriteReq_mshr_hits              181                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency       375317                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency       375299                       # number of WriteReq MSHR miss cycles
 system.cpu.dcache.WriteReq_mshr_miss_rate     0.089901                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses             73                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_blocked_cycles_no_targets  3389.604651                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  11.546512                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_blocked_cycles_no_targets  3366.651163                       # average number of cycles each access was blocked
+system.cpu.dcache.avg_refs                  11.587209                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets               43                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
-system.cpu.dcache.blocked_cycles_no_targets       145753                       # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets       144766                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                2409                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  5958.666667                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  6120.796512                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    1986                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         2520516                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.175592                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   423                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                251                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      1052777                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.071399                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_accesses                2412                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency  5959.992840                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  6143.482558                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                    1993                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         2497237                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.173715                       # miss rate for demand accesses
+system.cpu.dcache.demand_misses                   419                       # number of demand (read+write) misses
+system.cpu.dcache.demand_mshr_hits                247                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_miss_latency      1056679                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.071310                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses              172                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               2409                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  5958.666667                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  6120.796512                       # average overall mshr miss latency
+system.cpu.dcache.overall_accesses               2412                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency  5959.992840                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  6143.482558                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   1986                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        2520516                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.175592                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  423                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits               251                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      1052777                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.071399                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_hits                   1993                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        2497237                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.173715                       # miss rate for overall accesses
+system.cpu.dcache.overall_misses                  419                       # number of overall misses
+system.cpu.dcache.overall_mshr_hits               247                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_miss_latency      1056679                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.071310                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses             172                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -121,89 +121,89 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                    172                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                101.103948                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     1986                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                101.349720                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     1993                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles          16535                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BlockedCycles          17501                       # Number of cycles decode is blocked
 system.cpu.decode.DECODE:BranchMispred             70                       # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved           167                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts           29787                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles             36497                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles               5653                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles            2641                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:BranchResolved           168                       # Number of times decode resolved a branch
+system.cpu.decode.DECODE:DecodedInsts           29666                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles             28130                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles               5553                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles            2529                       # Number of cycles decode is squashing
 system.cpu.decode.DECODE:SquashedInsts            200                       # Number of squashed instructions handled by decode
-system.cpu.decode.DECODE:UnblockCycles             38                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                        5322                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                      6542                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                         21461                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   388                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          35708                       # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles                    2149                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.086728                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles               6542                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches               3502                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        0.581905                       # Number of inst fetches per cycle
+system.cpu.decode.DECODE:UnblockCycles             60                       # Number of cycles decode is unblocking
+system.cpu.fetch.Branches                        5229                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                      6371                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                         13322                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes                   296                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                          35572                       # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles                    2057                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate                  0.097242                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles               6371                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches               3496                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        0.661522                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples               61364                      
+system.cpu.fetch.rateDist.samples               53773                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0        54337   8854.87%           
-                               1          197     32.10%           
-                               2          585     95.33%           
-                               3         1433    233.52%           
-                               4         1461    238.09%           
-                               5          241     39.27%           
-                               6          330     53.78%           
-                               7         1227    199.95%           
-                               8         1553    253.08%           
+                               0        46825   8707.90%           
+                               1          199     37.01%           
+                               2          504     93.73%           
+                               3         1429    265.75%           
+                               4         1462    271.88%           
+                               5          245     45.56%           
+                               6          322     59.88%           
+                               7         1223    227.44%           
+                               8         1564    290.85%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
-system.cpu.icache.ReadReq_accesses               6541                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  5110.042601                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  4297.762058                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   6095                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        2279079                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.068185                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_accesses               6370                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency  5088.614350                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4278.032258                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                   5924                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        2269522                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.070016                       # miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_misses                  446                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits               135                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency      1336604                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.047546                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses             311                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_mshr_hits               136                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency      1326190                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.048666                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses             310                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_blocked_cycles_no_targets  3658.571429                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                  19.598071                       # Average number of references to valid blocks.
+system.cpu.icache.avg_blocked_cycles_no_targets  3444.375000                       # average number of cycles each access was blocked
+system.cpu.icache.avg_refs                  19.109677                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
-system.cpu.icache.blocked_no_targets                7                       # number of cycles access was blocked
+system.cpu.icache.blocked_no_targets                8                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
-system.cpu.icache.blocked_cycles_no_targets        25610                       # number of cycles access was blocked
+system.cpu.icache.blocked_cycles_no_targets        27555                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                6541                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  5110.042601                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  4297.762058                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    6095                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         2279079                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.068185                       # miss rate for demand accesses
+system.cpu.icache.demand_accesses                6370                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency  5088.614350                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4278.032258                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                    5924                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency         2269522                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.070016                       # miss rate for demand accesses
 system.cpu.icache.demand_misses                   446                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                135                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      1336604                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.047546                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses              311                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_hits                136                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_miss_latency      1326190                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.048666                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_misses              310                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               6541                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  5110.042601                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  4297.762058                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses               6370                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency  5088.614350                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4278.032258                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   6095                       # number of overall hits
-system.cpu.icache.overall_miss_latency        2279079                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.068185                       # miss rate for overall accesses
+system.cpu.icache.overall_hits                   5924                       # number of overall hits
+system.cpu.icache.overall_miss_latency        2269522                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.070016                       # miss rate for overall accesses
 system.cpu.icache.overall_misses                  446                       # number of overall misses
-system.cpu.icache.overall_mshr_hits               135                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      1336604                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.047546                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses             311                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_hits               136                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_miss_latency      1326190                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.048666                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_misses             310                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.icache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -216,61 +216,61 @@ system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit            0
 system.cpu.icache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.icache.replacements                      0                       # number of replacements
-system.cpu.icache.sampled_refs                    311                       # Sample count of references to valid blocks.
+system.cpu.icache.sampled_refs                    310                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                147.733346                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     6095                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                147.070827                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     5924                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.idleCycles                         1346768                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     2391                       # Number of branches executed
-system.cpu.iew.EXEC:nop                            45                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.222997                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         5561                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                       2148                       # Number of stores executed
+system.cpu.idleCycles                         1346363                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                     2364                       # Number of branches executed
+system.cpu.iew.EXEC:nop                            48                       # number of nop insts executed
+system.cpu.iew.EXEC:rate                     0.251650                       # Inst execution rate
+system.cpu.iew.EXEC:refs                         5460                       # number of memory reference insts executed
+system.cpu.iew.EXEC:stores                       2123                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                      6673                       # num instructions consuming a value
-system.cpu.iew.WB:count                         11743                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.790499                       # average fanout of values written-back
+system.cpu.iew.WB:consumers                      6466                       # num instructions consuming a value
+system.cpu.iew.WB:count                         11620                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     0.798639                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      5275                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.191366                       # insts written-back per cycle
-system.cpu.iew.WB:sent                          11811                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                  404                       # Number of branch mispredicts detected at execute
-system.cpu.iew.iewBlockCycles                    6301                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                  3819                       # Number of dispatched load instructions
-system.cpu.iew.iewDispNonSpecInsts                 23                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts              2540                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                 3727                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts               19466                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  3413                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts               276                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                 13684                       # Number of executed instructions
-system.cpu.iew.iewIQFullEvents                      5                       # Number of times the IQ has become full, causing a stall
+system.cpu.iew.WB:producers                      5164                       # num instructions producing a value
+system.cpu.iew.WB:rate                       0.216094                       # insts written-back per cycle
+system.cpu.iew.WB:sent                          11692                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts                  401                       # Number of branch mispredicts detected at execute
+system.cpu.iew.iewBlockCycles                    7230                       # Number of cycles IEW is blocking
+system.cpu.iew.iewDispLoadInsts                  3775                       # Number of dispatched load instructions
+system.cpu.iew.iewDispNonSpecInsts                 24                       # Number of dispatched non-speculative instructions
+system.cpu.iew.iewDispSquashedInsts              2557                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts                 3734                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts               19465                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts                  3337                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts               308                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                 13532                       # Number of executed instructions
+system.cpu.iew.iewIQFullEvents                     10                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     1                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                   2641                       # Number of cycles IEW is squashing
-system.cpu.iew.iewUnblockCycles                    34                       # Number of cycles IEW is unblocking
+system.cpu.iew.iewSquashCycles                   2529                       # Number of cycles IEW is squashing
+system.cpu.iew.iewUnblockCycles                    39                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            1                       # Number of blocked loads due to partial load-store forwarding
-system.cpu.iew.lsq.thread.0.cacheBlocked         1736                       # Number of times an access to memory failed due to the cache being blocked
+system.cpu.iew.lsq.thread.0.cacheBlocked         1656                       # Number of times an access to memory failed due to the cache being blocked
 system.cpu.iew.lsq.thread.0.forwLoads              81                       # Number of loads that had data forwarded from stores
 system.cpu.iew.lsq.thread.0.ignoredResponses            3                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation           45                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation           40                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads         2840                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores         2915                       # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents             45                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          283                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect            121                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc                               0.003993                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         0.003993                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                   13960                       # Type of FU issued
+system.cpu.iew.lsq.thread.0.squashedLoads         2796                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores         2922                       # Number of stores squashed
+system.cpu.iew.memOrderViolationEvents             40                       # Number of memory order violations
+system.cpu.iew.predictedNotTakenIncorrect          281                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect            120                       # Number of branches that were predicted taken incorrectly
+system.cpu.ipc                               0.004016                       # IPC: Instructions Per Cycle
+system.cpu.ipc_total                         0.004016                       # IPC: Total IPC of All Threads
+system.cpu.iq.ISSUE:FU_type_0                   13840                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
                           (null)            2      0.01%            # Type of FU issued
-                          IntAlu         8277     59.29%            # Type of FU issued
+                          IntAlu         8249     59.60%            # Type of FU issued
                          IntMult            1      0.01%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            2      0.01%            # Type of FU issued
@@ -279,16 +279,16 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         3509     25.14%            # Type of FU issued
-                        MemWrite         2169     15.54%            # Type of FU issued
+                         MemRead         3432     24.80%            # Type of FU issued
+                        MemWrite         2154     15.56%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt                    93                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.006662                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_cnt                    86                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_rate             0.006214                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                           (null)            0      0.00%            # attempts to use FU when none available
-                          IntAlu            3      3.23%            # attempts to use FU when none available
+                          IntAlu            1      1.16%            # attempts to use FU when none available
                          IntMult            0      0.00%            # attempts to use FU when none available
                           IntDiv            0      0.00%            # attempts to use FU when none available
                         FloatAdd            0      0.00%            # attempts to use FU when none available
@@ -297,78 +297,78 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                        FloatMult            0      0.00%            # attempts to use FU when none available
                         FloatDiv            0      0.00%            # attempts to use FU when none available
                        FloatSqrt            0      0.00%            # attempts to use FU when none available
-                         MemRead           54     58.06%            # attempts to use FU when none available
-                        MemWrite           36     38.71%            # attempts to use FU when none available
+                         MemRead           53     61.63%            # attempts to use FU when none available
+                        MemWrite           32     37.21%            # attempts to use FU when none available
                        IprAccess            0      0.00%            # attempts to use FU when none available
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples        61364                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples        53773                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0        54449   8873.12%           
-                               1         3310    539.40%           
-                               2         1268    206.64%           
-                               3         1704    277.69%           
-                               4          325     52.96%           
-                               5          194     31.61%           
-                               6           79     12.87%           
-                               7           22      3.59%           
-                               8           13      2.12%           
+                               0        46903   8722.41%           
+                               1         3262    606.62%           
+                               2         1316    244.73%           
+                               3         1665    309.63%           
+                               4          333     61.93%           
+                               5          188     34.96%           
+                               6           73     13.58%           
+                               7           23      4.28%           
+                               8           10      1.86%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     0.227495                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                      19398                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                     13960                       # Number of instructions issued
-system.cpu.iq.iqNonSpecInstsAdded                  23                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined           13240                       # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedInstsIssued                66                       # Number of squashed instructions issued
-system.cpu.iq.iqSquashedNonSpecRemoved              6                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined         9412                       # Number of squashed operands that are examined and possibly removed from graph
-system.cpu.l2cache.ReadReq_accesses               483                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  4537.301455                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2307.006237                       # average ReadReq mshr miss latency
+system.cpu.iq.ISSUE:rate                     0.257378                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                      19393                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                     13840                       # Number of instructions issued
+system.cpu.iq.iqNonSpecInstsAdded                  24                       # Number of non-speculative instructions added to the IQ
+system.cpu.iq.iqSquashedInstsExamined           13381                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsIssued                72                       # Number of squashed instructions issued
+system.cpu.iq.iqSquashedNonSpecRemoved              7                       # Number of squashed non-spec instructions that were removed
+system.cpu.iq.iqSquashedOperandsExamined         9575                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.l2cache.ReadReq_accesses               482                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency  4520.691667                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2303.372917                       # average ReadReq mshr miss latency
 system.cpu.l2cache.ReadReq_hits                     2                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_miss_latency       2182442                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_rate         0.995859                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 481                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      1109670                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_rate     0.995859                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            481                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_miss_latency       2169932                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate         0.995851                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_misses                 480                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_mshr_miss_latency      1105619                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate     0.995851                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_misses            480                       # number of ReadReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.l2cache.avg_refs                  0.004158                       # Average number of references to valid blocks.
+system.cpu.l2cache.avg_refs                  0.004167                       # Average number of references to valid blocks.
 system.cpu.l2cache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses                483                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  4537.301455                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency  2307.006237                       # average overall mshr miss latency
+system.cpu.l2cache.demand_accesses                482                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_avg_miss_latency  4520.691667                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  2303.372917                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      2                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        2182442                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_rate          0.995859                       # miss rate for demand accesses
-system.cpu.l2cache.demand_misses                  481                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_miss_latency        2169932                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_rate          0.995851                       # miss rate for demand accesses
+system.cpu.l2cache.demand_misses                  480                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency      1109670                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_rate     0.995859                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_misses             481                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_miss_latency      1105619                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_rate     0.995851                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_misses             480                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses               483                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  4537.301455                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency  2307.006237                       # average overall mshr miss latency
+system.cpu.l2cache.overall_accesses               482                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_avg_miss_latency  4520.691667                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  2303.372917                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     2                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       2182442                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_rate         0.995859                       # miss rate for overall accesses
-system.cpu.l2cache.overall_misses                 481                       # number of overall misses
+system.cpu.l2cache.overall_miss_latency       2169932                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_rate         0.995851                       # miss rate for overall accesses
+system.cpu.l2cache.overall_misses                 480                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency      1109670                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_rate     0.995859                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_misses            481                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_miss_latency      1105619                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_rate     0.995851                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_misses            480                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@@ -381,29 +381,29 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit            0
 system.cpu.l2cache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.l2cache.replacements                     0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   481                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   480                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               248.876875                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               248.469634                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       2                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                            61364                       # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles             6939                       # Number of cycles rename is blocking
+system.cpu.numCycles                            53773                       # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles             7860                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           4051                       # Number of HB maps that are committed
 system.cpu.rename.RENAME:IQFullEvents               2                       # Number of times rename has blocked due to IQ full
-system.cpu.rename.RENAME:IdleCycles             36651                       # Number of cycles rename is idle
-system.cpu.rename.RENAME:LSQFullEvents            412                       # Number of times rename has blocked due to LSQ full
-system.cpu.rename.RENAME:ROBFullEvents              9                       # Number of times rename has blocked due to ROB full
-system.cpu.rename.RENAME:RenameLookups          36093                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           29280                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands        20221                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles               5480                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles            2641                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:UnblockCycles            493                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps             16170                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles         9160                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:IdleCycles             28280                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:LSQFullEvents            453                       # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:ROBFullEvents              8                       # Number of times rename has blocked due to ROB full
+system.cpu.rename.RENAME:RenameLookups          36016                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts           29203                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands        20142                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles               5460                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles            2529                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:UnblockCycles            483                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps             16091                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles         9161                       # count of cycles rename stalled for serializing inst
 system.cpu.rename.RENAME:serializingInsts           27                       # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts                927                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:skidInsts                828                       # count of insts added to the skid buffer
 system.cpu.rename.RENAME:tempSerializingInsts           21                       # count of temporary serializing insts renamed
 system.cpu.timesIdled                             369                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload.PROG:num_syscalls              17                       # Number of system calls
diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
index 44f155480..ce44cab28 100644
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
@@ -2,39 +2,39 @@
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
 global.BPredUnit.BTBHits                          200                       # Number of BTB hits
-global.BPredUnit.BTBLookups                       711                       # Number of BTB lookups
+global.BPredUnit.BTBLookups                       718                       # Number of BTB lookups
 global.BPredUnit.RASInCorrect                      42                       # Number of incorrect RAS predictions.
-global.BPredUnit.condIncorrect                    221                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                    451                       # Number of conditional branches predicted
-global.BPredUnit.lookups                          891                       # Number of BP lookups
-global.BPredUnit.usedRAS                          172                       # Number of times the RAS was used to get a target.
-host_inst_rate                                  20134                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 179640                       # Number of bytes of host memory used
+global.BPredUnit.condIncorrect                    218                       # Number of conditional branches incorrect
+global.BPredUnit.condPredicted                    459                       # Number of conditional branches predicted
+global.BPredUnit.lookups                          898                       # Number of BP lookups
+global.BPredUnit.usedRAS                          171                       # Number of times the RAS was used to get a target.
+host_inst_rate                                  19676                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 179796                       # Number of bytes of host memory used
 host_seconds                                     0.12                       # Real time elapsed on the host
-host_tick_rate                                6326998                       # Simulator tick rate (ticks/s)
+host_tick_rate                                6183068                       # Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads                 10                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores                 8                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                   784                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                  376                       # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads                   783                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                  381                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        2387                       # Number of instructions simulated
 sim_seconds                                  0.000001                       # Number of seconds simulated
-sim_ticks                                      752027                       # Number of ticks simulated
+sim_ticks                                      752028                       # Number of ticks simulated
 system.cpu.commit.COM:branches                    396                       # Number of branches committed
-system.cpu.commit.COM:bw_lim_events                56                       # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events                51                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples        28113                      
+system.cpu.commit.COM:committed_per_cycle.samples        28200                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0        27203   9676.31%           
-                               1          230     81.81%           
-                               2          313    111.34%           
-                               3          133     47.31%           
-                               4           80     28.46%           
-                               5           53     18.85%           
-                               6           27      9.60%           
-                               7           18      6.40%           
-                               8           56     19.92%           
+                               0        27270   9670.21%           
+                               1          239     84.75%           
+                               2          332    117.73%           
+                               3          127     45.04%           
+                               4           83     29.43%           
+                               5           54     19.15%           
+                               6           26      9.22%           
+                               7           18      6.38%           
+                               8           51     18.09%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 
@@ -43,69 +43,69 @@ system.cpu.commit.COM:loads                       415                       # Nu
 system.cpu.commit.COM:membars                       0                       # Number of memory barriers committed
 system.cpu.commit.COM:refs                        709                       # Number of memory references committed
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts               144                       # The number of times a branch was mispredicted
+system.cpu.commit.branchMispredicts               141                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts           2576                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls               4                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts            1694                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts            1703                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                        2387                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                  2387                       # Number of Instructions Simulated
-system.cpu.cpi                             315.051110                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                       315.051110                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses                562                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  7254.010870                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency  7288.590164                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                    470                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency         667369                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.163701                       # miss rate for ReadReq accesses
+system.cpu.cpi                             315.051529                       # CPI: Cycles Per Instruction
+system.cpu.cpi_total                       315.051529                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses                560                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency  7231.967391                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency  7288.377049                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                    468                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency         665341                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.164286                       # miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_misses                   92                       # number of ReadReq misses
 system.cpu.dcache.ReadReq_mshr_hits                31                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency       444604                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.108541                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_miss_latency       444591                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.108929                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses              61                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses               294                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency  6647.600000                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency  6571.583333                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency  6647.685714                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency  6571.666667                       # average WriteReq mshr miss latency
 system.cpu.dcache.WriteReq_hits                   224                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency        465332                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency        465338                       # number of WriteReq miss cycles
 system.cpu.dcache.WriteReq_miss_rate         0.238095                       # miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_misses                  70                       # number of WriteReq misses
 system.cpu.dcache.WriteReq_mshr_hits               46                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency       157718                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency       157720                       # number of WriteReq MSHR miss cycles
 system.cpu.dcache.WriteReq_mshr_miss_rate     0.081633                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses             24                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_blocked_cycles_no_targets  2980.125000                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                   8.164706                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_blocked_cycles_no_targets         2980                       # average number of cycles each access was blocked
+system.cpu.dcache.avg_refs                   8.141176                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                8                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
-system.cpu.dcache.blocked_cycles_no_targets        23841                       # number of cycles access was blocked
+system.cpu.dcache.blocked_cycles_no_targets        23840                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                 856                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  6991.981481                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  7086.141176                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                     694                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         1132701                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.189252                       # miss rate for demand accesses
+system.cpu.dcache.demand_accesses                 854                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency  6979.500000                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  7086.011765                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                     692                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         1130679                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.189696                       # miss rate for demand accesses
 system.cpu.dcache.demand_misses                   162                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                 77                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency       602322                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.099299                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_latency       602311                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.099532                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses               85                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses                856                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  6991.981481                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  7086.141176                       # average overall mshr miss latency
+system.cpu.dcache.overall_accesses                854                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency  6979.500000                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  7086.011765                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                    694                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        1132701                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.189252                       # miss rate for overall accesses
+system.cpu.dcache.overall_hits                    692                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        1130679                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.189696                       # miss rate for overall accesses
 system.cpu.dcache.overall_misses                  162                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits                77                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency       602322                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.099299                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_latency       602311                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.099532                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses              85                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -121,88 +121,88 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                     85                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                 46.684937                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                      694                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                 46.684988                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                      692                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles          21872                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BlockedCycles          21865                       # Number of cycles decode is blocked
 system.cpu.decode.DECODE:BranchMispred             79                       # Number of times decode detected a branch misprediction
 system.cpu.decode.DECODE:BranchResolved           150                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts            4868                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles              5315                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles                925                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles             338                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:DecodedInsts            4900                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:IdleCycles              5406                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:RunCycles                928                       # Number of cycles decode is running
+system.cpu.decode.DECODE:SquashCycles             336                       # Number of cycles decode is squashing
 system.cpu.decode.DECODE:SquashedInsts            286                       # Number of squashed instructions handled by decode
 system.cpu.decode.DECODE:UnblockCycles              2                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                         891                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                       814                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                          1788                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   145                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                           5562                       # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles                     260                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.031316                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles                814                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches                372                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        0.195487                       # Number of inst fetches per cycle
+system.cpu.fetch.Branches                         898                       # Number of branches that fetch encountered
+system.cpu.fetch.CacheLines                       813                       # Number of cache lines fetched
+system.cpu.fetch.Cycles                          1774                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.IcacheSquashes                   146                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.Insts                           5593                       # Number of instructions fetch has processed
+system.cpu.fetch.SquashCycles                     258                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.branchRate                  0.031468                       # Number of branch fetches per cycle
+system.cpu.fetch.icacheStallCycles                813                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.predictedBranches                371                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.rate                        0.195991                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples               28452                      
+system.cpu.fetch.rateDist.samples               28537                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0        27494   9663.29%           
-                               1           51     17.92%           
-                               2           92     32.34%           
-                               3           74     26.01%           
-                               4          117     41.12%           
-                               5           71     24.95%           
-                               6           43     15.11%           
-                               7           56     19.68%           
-                               8          454    159.57%           
+                               0        27576   9663.24%           
+                               1           50     17.52%           
+                               2           92     32.24%           
+                               3           74     25.93%           
+                               4          117     41.00%           
+                               5           71     24.88%           
+                               6           43     15.07%           
+                               7           56     19.62%           
+                               8          458    160.49%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
-system.cpu.icache.ReadReq_accesses                814                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  4971.589641                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  4152.244565                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                    563                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        1247869                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.308354                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_accesses                813                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency  4955.450199                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4151.809783                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                    562                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency        1243818                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.308733                       # miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_misses                  251                       # number of ReadReq misses
 system.cpu.icache.ReadReq_mshr_hits                67                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency       764013                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.226044                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_miss_latency       763933                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.226322                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             184                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets         3445                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                   3.059783                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                   3.054348                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                4                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets        13780                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                 814                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  4971.589641                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  4152.244565                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                     563                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         1247869                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.308354                       # miss rate for demand accesses
+system.cpu.icache.demand_accesses                 813                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency  4955.450199                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4151.809783                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                     562                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency         1243818                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.308733                       # miss rate for demand accesses
 system.cpu.icache.demand_misses                   251                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                 67                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency       764013                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.226044                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_latency       763933                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.226322                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              184                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses                814                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  4971.589641                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  4152.244565                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses                813                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency  4955.450199                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4151.809783                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                    563                       # number of overall hits
-system.cpu.icache.overall_miss_latency        1247869                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.308354                       # miss rate for overall accesses
+system.cpu.icache.overall_hits                    562                       # number of overall hits
+system.cpu.icache.overall_miss_latency        1243818                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.308733                       # miss rate for overall accesses
 system.cpu.icache.overall_misses                  251                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                67                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency       764013                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.226044                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_latency       763933                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.226322                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             184                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@@ -218,59 +218,59 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      0                       # number of replacements
 system.cpu.icache.sampled_refs                    184                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                 91.596526                       # Cycle average of tags in use
-system.cpu.icache.total_refs                      563                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                 91.596649                       # Cycle average of tags in use
+system.cpu.icache.total_refs                      562                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.idleCycles                          723576                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                      571                       # Number of branches executed
-system.cpu.iew.EXEC:nop                           266                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.119043                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         1018                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                        343                       # Number of stores executed
+system.cpu.idleCycles                          723492                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.iew.EXEC:branches                      566                       # Number of branches executed
+system.cpu.iew.EXEC:nop                           267                       # number of nop insts executed
+system.cpu.iew.EXEC:rate                     0.118022                       # Inst execution rate
+system.cpu.iew.EXEC:refs                         1013                       # number of memory reference insts executed
+system.cpu.iew.EXEC:stores                        341                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                      1875                       # num instructions consuming a value
-system.cpu.iew.WB:count                          3246                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.785067                       # average fanout of values written-back
+system.cpu.iew.WB:consumers                      1860                       # num instructions consuming a value
+system.cpu.iew.WB:count                          3219                       # cumulative count of insts written-back
+system.cpu.iew.WB:fanout                     0.785484                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      1472                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.114087                       # insts written-back per cycle
-system.cpu.iew.WB:sent                           3258                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                  160                       # Number of branch mispredicts detected at execute
-system.cpu.iew.iewBlockCycles                   14741                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                   784                       # Number of dispatched load instructions
+system.cpu.iew.WB:producers                      1461                       # num instructions producing a value
+system.cpu.iew.WB:rate                       0.112801                       # insts written-back per cycle
+system.cpu.iew.WB:sent                           3234                       # cumulative count of insts sent to commit
+system.cpu.iew.branchMispredicts                  152                       # Number of branch mispredicts detected at execute
+system.cpu.iew.iewBlockCycles                   14742                       # Number of cycles IEW is blocking
+system.cpu.iew.iewDispLoadInsts                   783                       # Number of dispatched load instructions
 system.cpu.iew.iewDispNonSpecInsts                  6                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts                71                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                  376                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts                4271                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                   675                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts               113                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                  3387                       # Number of executed instructions
-system.cpu.iew.iewIQFullEvents                      9                       # Number of times the IQ has become full, causing a stall
+system.cpu.iew.iewDispSquashedInsts                79                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispStoreInsts                  381                       # Number of dispatched store instructions
+system.cpu.iew.iewDispatchedInsts                4280                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewExecLoadInsts                   672                       # Number of load instructions executed
+system.cpu.iew.iewExecSquashedInsts               123                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecutedInsts                  3368                       # Number of executed instructions
+system.cpu.iew.iewIQFullEvents                      8                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                    338                       # Number of cycles IEW is squashing
-system.cpu.iew.iewUnblockCycles                    13                       # Number of cycles IEW is unblocking
+system.cpu.iew.iewSquashCycles                    336                       # Number of cycles IEW is squashing
+system.cpu.iew.iewUnblockCycles                    12                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.0.cacheBlocked           82                       # Number of times an access to memory failed due to the cache being blocked
 system.cpu.iew.lsq.thread.0.forwLoads              29                       # Number of loads that had data forwarded from stores
 system.cpu.iew.lsq.thread.0.ignoredResponses            0                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation           11                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation           12                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            0                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads          369                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores           82                       # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents             11                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect           99                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect             61                       # Number of branches that were predicted taken incorrectly
+system.cpu.iew.lsq.thread.0.squashedLoads          368                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedStores           87                       # Number of stores squashed
+system.cpu.iew.memOrderViolationEvents             12                       # Number of memory order violations
+system.cpu.iew.predictedNotTakenIncorrect           96                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedTakenIncorrect             56                       # Number of branches that were predicted taken incorrectly
 system.cpu.ipc                               0.003174                       # IPC: Instructions Per Cycle
 system.cpu.ipc_total                         0.003174                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                    3500                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0                    3491                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
                           (null)            0      0.00%            # Type of FU issued
-                          IntAlu         2460     70.29%            # Type of FU issued
+                          IntAlu         2447     70.09%            # Type of FU issued
                          IntMult            1      0.03%            # Type of FU issued
                           IntDiv            0      0.00%            # Type of FU issued
                         FloatAdd            0      0.00%            # Type of FU issued
@@ -279,16 +279,16 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist
                        FloatMult            0      0.00%            # Type of FU issued
                         FloatDiv            0      0.00%            # Type of FU issued
                        FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead          695     19.86%            # Type of FU issued
-                        MemWrite          344      9.83%            # Type of FU issued
+                         MemRead          694     19.88%            # Type of FU issued
+                        MemWrite          349     10.00%            # Type of FU issued
                        IprAccess            0      0.00%            # Type of FU issued
                     InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt                    35                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.010000                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_cnt                    34                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_rate             0.009739                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                           (null)            0      0.00%            # attempts to use FU when none available
-                          IntAlu            2      5.71%            # attempts to use FU when none available
+                          IntAlu            1      2.94%            # attempts to use FU when none available
                          IntMult            0      0.00%            # attempts to use FU when none available
                           IntDiv            0      0.00%            # attempts to use FU when none available
                         FloatAdd            0      0.00%            # attempts to use FU when none available
@@ -297,41 +297,41 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                        FloatMult            0      0.00%            # attempts to use FU when none available
                         FloatDiv            0      0.00%            # attempts to use FU when none available
                        FloatSqrt            0      0.00%            # attempts to use FU when none available
-                         MemRead           11     31.43%            # attempts to use FU when none available
-                        MemWrite           22     62.86%            # attempts to use FU when none available
+                         MemRead           11     32.35%            # attempts to use FU when none available
+                        MemWrite           22     64.71%            # attempts to use FU when none available
                        IprAccess            0      0.00%            # attempts to use FU when none available
                     InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples        28452                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples        28537                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0        26938   9467.88%           
-                               1          609    214.04%           
-                               2          344    120.91%           
-                               3          248     87.16%           
-                               4          180     63.26%           
-                               5           81     28.47%           
-                               6           35     12.30%           
-                               7           12      4.22%           
-                               8            5      1.76%           
+                               0        27012   9465.61%           
+                               1          616    215.86%           
+                               2          356    124.75%           
+                               3          247     86.55%           
+                               4          177     62.02%           
+                               5           81     28.38%           
+                               6           32     11.21%           
+                               7           11      3.85%           
+                               8            5      1.75%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 
-system.cpu.iq.ISSUE:rate                     0.123014                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                       3999                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                      3500                       # Number of instructions issued
+system.cpu.iq.ISSUE:rate                     0.122332                       # Inst issue rate
+system.cpu.iq.iqInstsAdded                       4007                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsIssued                      3491                       # Number of instructions issued
 system.cpu.iq.iqNonSpecInstsAdded                   6                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined            1423                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsExamined            1470                       # Number of squashed instructions iterated over during squash; mainly for profiling
 system.cpu.iq.iqSquashedInstsIssued                25                       # Number of squashed instructions issued
 system.cpu.iq.iqSquashedNonSpecRemoved              2                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined          761                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.iq.iqSquashedOperandsExamined          801                       # Number of squashed operands that are examined and possibly removed from graph
 system.cpu.l2cache.ReadReq_accesses               269                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  4622.063197                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2296.591078                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       1243335                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_avg_miss_latency  4621.724907                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2296.401487                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_miss_latency       1243244                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_misses                 269                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency       617783                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency       617732                       # number of ReadReq MSHR miss cycles
 system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_mshr_misses            269                       # number of ReadReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
@@ -343,29 +343,29 @@ system.cpu.l2cache.blocked_cycles_no_mshrs            0                       #
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
 system.cpu.l2cache.demand_accesses                269                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  4622.063197                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency  2296.591078                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_miss_latency  4621.724907                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  2296.401487                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        1243335                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        1243244                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  269                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency       617783                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency       617732                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_misses             269                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
 system.cpu.l2cache.overall_accesses               269                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  4622.063197                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency  2296.591078                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_miss_latency  4621.724907                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  2296.401487                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       1243335                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       1243244                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 269                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency       617783                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency       617732                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_misses            269                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -382,25 +382,25 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.sampled_refs                   269                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               138.802720                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               138.802893                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                            28452                       # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles            14785                       # Number of cycles rename is blocking
+system.cpu.numCycles                            28537                       # number of cpu cycles simulated
+system.cpu.rename.RENAME:BlockCycles            14783                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           1768                       # Number of HB maps that are committed
 system.cpu.rename.RENAME:IQFullEvents              18                       # Number of times rename has blocked due to IQ full
-system.cpu.rename.RENAME:IdleCycles              5396                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:IdleCycles              5489                       # Number of cycles rename is idle
 system.cpu.rename.RENAME:LSQFullEvents              1                       # Number of times rename has blocked due to LSQ full
 system.cpu.rename.RENAME:ROBFullEvents              2                       # Number of times rename has blocked due to ROB full
-system.cpu.rename.RENAME:RenameLookups           5263                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts            4690                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands         3393                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles                851                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles             338                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:RenameLookups           5285                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenamedInsts            4708                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedOperands         3399                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RunCycles                852                       # Number of cycles rename is running
+system.cpu.rename.RENAME:SquashCycles             336                       # Number of cycles rename is squashing
 system.cpu.rename.RENAME:UnblockCycles             25                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps              1625                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles         7057                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:UndoneMaps              1631                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles         7052                       # count of cycles rename stalled for serializing inst
 system.cpu.rename.RENAME:serializingInsts            8                       # count of serializing insts renamed
 system.cpu.rename.RENAME:skidInsts                 88                       # count of insts added to the skid buffer
 system.cpu.rename.RENAME:tempSerializingInsts            6                       # count of temporary serializing insts renamed