Merge vm1.(none):/home/stever/bk/newmem-head

into vm1.(none):/home/stever/bk/newmem-cache2 --HG-- extra : convert_revision : aa50af3094f5d459f75b514179b6e3ec5e0bf1df
2007-06-22 16:13:53 -07:00 · 2007-06-22 16:13:53 -07:00 · ed1db23b41
commit ed1db23b41
parent 4d1bcbcd36 16c1b5484f
68 changed files with 4383 additions and 7289 deletions
--- a/src/arch/sparc/isa/formats/mem/basicmem.isa
+++ b/src/arch/sparc/isa/formats/mem/basicmem.isa
@ -57,10 +57,12 @@ let {{
        addrCalcImm = 'EA = Rs1 + imm;'
        iop = InstObjParams(name, Name, 'Mem',
                {"code": code, "postacc_code" : postacc_code,
-                 "fault_check": faultCode, "ea_code": addrCalcReg}, opt_flags)
+                 "fault_check": faultCode, "ea_code": addrCalcReg,
                 "EA_trunc": TruncateEA}, opt_flags)
        iop_imm = InstObjParams(name, Name + "Imm", 'MemImm',
                {"code": code, "postacc_code" : postacc_code,
-                "fault_check": faultCode, "ea_code": addrCalcImm}, opt_flags)
+                 "fault_check": faultCode, "ea_code": addrCalcImm,
                 "EA_trunc": TruncateEA}, opt_flags)
        header_output = MemDeclare.subst(iop) + MemDeclare.subst(iop_imm)
        decoder_output = BasicConstructor.subst(iop) + BasicConstructor.subst(iop_imm)
        decode_block = ROrImmDecode.subst(iop)
--- a/src/arch/sparc/isa/formats/mem/blockmem.isa
+++ b/src/arch/sparc/isa/formats/mem/blockmem.isa
@ -298,11 +298,13 @@ let {{
            iop = InstObjParams(name, Name, 'BlockMem',
                    {"code": pcedCode, "ea_code": addrCalcReg,
                    "fault_check": faultCode, "micro_pc": microPc,
-                    "set_flags": flag_code}, opt_flags)
+                    "set_flags": flag_code, "EA_trunc" : TruncateEA},
                    opt_flags)
            iop_imm = InstObjParams(name, Name + 'Imm', 'BlockMemImm',
                    {"code": pcedCode, "ea_code": addrCalcImm,
                    "fault_check": faultCode, "micro_pc": microPc,
-                    "set_flags": flag_code}, opt_flags)
+                    "set_flags": flag_code, "EA_trunc" : TruncateEA},
                    opt_flags)
            decoder_output += BlockMemMicroConstructor.subst(iop)
            decoder_output += BlockMemMicroConstructor.subst(iop_imm)
            exec_output += doDualSplitExecute(
--- a/src/arch/sparc/isa/formats/mem/swap.isa
+++ b/src/arch/sparc/isa/formats/mem/swap.isa
@ -51,6 +51,7 @@ def template SwapExecute {{
            }
            if(storeCond && fault == NoFault)
            {
                %(EA_trunc)s
                fault = xc->write((uint%(mem_acc_size)s_t)Mem,
                        EA, %(asi_val)s, &mem_data);
            }
@ -91,6 +92,7 @@ def template SwapInitiateAcc {{
            }
            if(fault == NoFault)
            {
                %(EA_trunc)s
                fault = xc->write((uint%(mem_acc_size)s_t)Mem,
                        EA, %(asi_val)s, &mem_data);
            }
@ -157,12 +159,14 @@ let {{
        addrCalcReg = 'EA = Rs1;'
        iop = InstObjParams(name, Name, 'Mem',
                {"code": code, "postacc_code" : postacc_code,
-                 "fault_check": faultCode, "ea_code": addrCalcReg}, opt_flags)
+                 "fault_check": faultCode, "ea_code": addrCalcReg,
                 "EA_trunc" : TruncateEA}, opt_flags)
        header_output = MemDeclare.subst(iop)
        decoder_output = BasicConstructor.subst(iop)
        decode_block = BasicDecode.subst(iop)
        microParams = {"code": code, "postacc_code" : postacc_code,
-            "ea_code" : addrCalcReg, "fault_check" : faultCode}
+            "ea_code" : addrCalcReg, "fault_check" : faultCode,
            "EA_trunc" : TruncateEA}
        exec_output = doSplitExecute(execute, name, Name, asi,
                ["IsStoreConditional"], microParams);
        return (header_output, decoder_output, exec_output, decode_block)
--- a/src/arch/sparc/isa/formats/mem/util.isa
+++ b/src/arch/sparc/isa/formats/mem/util.isa
@ -149,6 +149,7 @@ def template LoadExecute {{
            %(fault_check)s;
            if(fault == NoFault)
            {
                %(EA_trunc)s
                fault = xc->read(EA, (%(mem_acc_type)s%(mem_acc_size)s_t&)Mem, %(asi_val)s);
            }
            if(fault == NoFault)
@ -179,6 +180,7 @@ def template LoadInitiateAcc {{
            %(fault_check)s;
            if(fault == NoFault)
            {
                %(EA_trunc)s
                fault = xc->read(EA, (%(mem_acc_type)s%(mem_acc_size)s_t&)Mem, %(asi_val)s);
            }
            return fault;
@ -224,6 +226,7 @@ def template StoreExecute {{
            }
            if(storeCond && fault == NoFault)
            {
                %(EA_trunc)s
                fault = xc->write((%(mem_acc_type)s%(mem_acc_size)s_t)Mem,
                        EA, %(asi_val)s, 0);
            }
@ -257,6 +260,7 @@ def template StoreInitiateAcc {{
            }
            if(storeCond && fault == NoFault)
            {
                %(EA_trunc)s
                fault = xc->write((%(mem_acc_type)s%(mem_acc_size)s_t)Mem,
                        EA, %(asi_val)s, 0);
            }
@ -317,6 +321,11 @@ let {{
            fault = new PrivilegedAction;
    '''
    TruncateEA = '''
 #if !FULL_SYSTEM
                EA = Pstate<3:> ? EA<31:0> : EA;
 #endif
    '''
 }};
 //A simple function to generate the name of the macro op of a certain
@ -346,7 +355,8 @@ let {{
                (eaRegCode, nameReg, NameReg),
                (eaImmCode, nameImm, NameImm)):
            microParams = {"code": code, "postacc_code" : postacc_code,
-                "ea_code": eaCode, "fault_check": faultCode}
+                "ea_code": eaCode, "fault_check": faultCode,
                "EA_trunc" : TruncateEA}
            executeCode += doSplitExecute(execute, name, Name,
                    asi, opt_flags, microParams)
        return executeCode
--- a/src/arch/sparc/isa/operands.isa
+++ b/src/arch/sparc/isa/operands.isa
@ -149,7 +149,8 @@ def operands {{
    'Fprs':		('ControlReg', 'udw', 'MISCREG_FPRS', None, 43),
    'Pcr':		('ControlReg', 'udw', 'MISCREG_PCR', None, 44),
    'Pic':		('ControlReg', 'udw', 'MISCREG_PIC', None, 45),
-    'Gsr':		('ControlReg', 'udw', 'MISCREG_GSR', None, 46),
+#   'Gsr':		('ControlReg', 'udw', 'MISCREG_GSR', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 46),
    'Gsr':		('IntReg', 'udw', 'NumIntArchRegs + 8', None, 46),
    'Softint':		('ControlReg', 'udw', 'MISCREG_SOFTINT', None, 47),
    'SoftintSet':	('ControlReg', 'udw', 'MISCREG_SOFTINT_SET', None, 48),
    'SoftintClr':	('ControlReg', 'udw', 'MISCREG_SOFTINT_CLR', None, 49),
@ -187,7 +188,7 @@ def operands {{
    'Hver':		('ControlReg', 'udw', 'MISCREG_HVER', None, 74),
    'StrandStsReg':	('ControlReg', 'udw', 'MISCREG_STRAND_STS_REG', None, 75),
-    'Fsr':		('ControlReg', 'udw', 'MISCREG_FSR', None, 80),
+    'Fsr':		('ControlReg', 'udw', 'MISCREG_FSR', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 80),
    # Mem gets a large number so it's always last
    'Mem': 		('Mem', 'udw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 100)
--- a/src/arch/sparc/isa_traits.hh
+++ b/src/arch/sparc/isa_traits.hh
@ -58,7 +58,7 @@ namespace SparcISA
    // These enumerate all the registers for dependence tracking.
    enum DependenceTags {
-        FP_Base_DepTag = 32*3+8,
+        FP_Base_DepTag = 32*3+9,
        Ctrl_Base_DepTag = FP_Base_DepTag + 64
    };
--- a/src/arch/sparc/process.cc
+++ b/src/arch/sparc/process.cc
@ -88,8 +88,8 @@ Sparc32LiveProcess::startup()
    //From the SPARC ABI
-    //The process runs in user mode
+    //The process runs in user mode with 32 bit addresses
-    threadContexts[0]->setMiscReg(MISCREG_PSTATE, 0x02);
+    threadContexts[0]->setMiscReg(MISCREG_PSTATE, 0x0a);
    //Setup default FP state
    threadContexts[0]->setMiscRegNoEffect(MISCREG_FSR, 0);
--- a/src/arch/sparc/sparc_traits.hh
+++ b/src/arch/sparc/sparc_traits.hh
@ -42,7 +42,7 @@ namespace SparcISA
    // Number of register windows, can legally be 3 to 32
    const int NWindows = 8;
    //const int NumMicroIntRegs = 1;
-    const int NumMicroIntRegs = 8;
+    const int NumMicroIntRegs = 9;
 //    const int NumRegularIntRegs = MaxGL * 8 + NWindows * 16;
 //    const int NumMicroIntRegs = 1;
--- a/src/arch/sparc/types.hh
+++ b/src/arch/sparc/types.hh
@ -59,7 +59,7 @@ namespace SparcISA
    typedef int RegContextVal;
-    typedef uint8_t RegIndex;
+    typedef uint16_t RegIndex;
 }
 #endif
--- a/src/arch/x86/isa/includes.isa
+++ b/src/arch/x86/isa/includes.isa
@ -103,7 +103,6 @@ output header {{
 #include "base/misc.hh"
 #include "cpu/static_inst.hh"
 #include "mem/packet.hh"
 #include "mem/request.hh"  // some constructors use MemReq flags
 #include "sim/faults.hh"
 }};
--- a/src/arch/x86/isa/insts/control_transfer/call.py
+++ b/src/arch/x86/isa/insts/control_transfer/call.py
@ -61,8 +61,8 @@ def macroop CALL_I
    limm t2, imm
    rdip t1
-    subi "INTREG_RSP", "INTREG_RSP", dsz
+    subi rsp, rsp, dsz
-    st t1, ss, [0, t0, "INTREG_RSP"]
+    st t1, ss, [0, t0, rsp]
    wrip t1, t2
 };
 '''
--- a/src/arch/x86/isa/insts/data_transfer/stack_operations.py
+++ b/src/arch/x86/isa/insts/data_transfer/stack_operations.py
@ -58,16 +58,16 @@ def macroop POP_R {
    # Make the default data size of pops 64 bits in 64 bit mode
    .adjust_env oszIn64Override
-    ld reg, ss, [0, t0, "INTREG_RSP"]
+    ld reg, ss, [0, t0, rsp]
-    addi "INTREG_RSP", "INTREG_RSP", dsz
+    addi rsp, rsp, dsz
 };
 def macroop PUSH_R {
    # Make the default data size of pops 64 bits in 64 bit mode
    .adjust_env oszIn64Override
-    subi "INTREG_RSP", "INTREG_RSP", dsz
+    subi rsp, rsp, dsz
-    st reg, ss, [0, t0, "INTREG_RSP"]
+    st reg, ss, [0, t0, rsp]
 };
 '''
 #let {{
--- a/src/arch/x86/isa/microasm.isa
+++ b/src/arch/x86/isa/microasm.isa
@ -91,6 +91,9 @@ let {{
        "osz" : "env.operandSize",
        "ssz" : "env.stackSize"
    }
    for reg in ('ax', 'bx', 'cx', 'dx', 'sp', 'bp', 'si', 'di'):
        assembler.symbols["r%s" % reg] = "INTREG_R%s" % reg.upper()
    assembler.symbols.update(symbols)
    # Code literal which forces a default 64 bit operand size in 64 bit mode.
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@ -209,6 +209,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
    /** PC of this instruction. */
    Addr PC;
    /** Micro PC of this instruction. */
    Addr microPC;
  protected:
    /** Next non-speculative PC.  It is not filled in at fetch, but rather
     *  once the target of the branch is truly known (either decode or
@ -219,12 +222,18 @@ class BaseDynInst : public FastAlloc, public RefCounted
    /** Next non-speculative NPC. Target PC for Mips or Sparc. */
    Addr nextNPC;
    /** Next non-speculative micro PC. */
    Addr nextMicroPC;
    /** Predicted next PC. */
    Addr predPC;
    /** Predicted next NPC. */
    Addr predNPC;
    /** Predicted next microPC */
    Addr predMicroPC;
    /** If this is a branch that was predicted taken */
    bool predTaken;
@ -340,6 +349,17 @@ class BaseDynInst : public FastAlloc, public RefCounted
    {
        _flatDestRegIdx[idx] = flattened_dest;
    }
    /** BaseDynInst constructor given a binary instruction.
     *  @param staticInst A StaticInstPtr to the underlying instruction.
     *  @param PC The PC of the instruction.
     *  @param pred_PC The predicted next PC.
     *  @param pred_NPC The predicted next NPC.
     *  @param seq_num The sequence number of the instruction.
     *  @param cpu Pointer to the instruction's CPU.
     */
    BaseDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC,
            Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC,
            InstSeqNum seq_num, ImplCPU *cpu);
    /** BaseDynInst constructor given a binary instruction.
     *  @param inst The binary instruction.
@ -349,8 +369,8 @@ class BaseDynInst : public FastAlloc, public RefCounted
     *  @param seq_num The sequence number of the instruction.
     *  @param cpu Pointer to the instruction's CPU.
     */
-    BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
+    BaseDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC,
-            Addr pred_PC, Addr pred_NPC,
+            Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC,
            InstSeqNum seq_num, ImplCPU *cpu);
    /** BaseDynInst constructor given a StaticInst pointer.
@ -402,11 +422,18 @@ class BaseDynInst : public FastAlloc, public RefCounted
 #endif
    }
    Addr readNextMicroPC()
    {
        return nextMicroPC;
    }
    /** Set the predicted target of this current instruction. */
-    void setPredTarg(Addr predicted_PC, Addr predicted_NPC)
+    void setPredTarg(Addr predicted_PC, Addr predicted_NPC,
            Addr predicted_MicroPC)
    {
        predPC = predicted_PC;
        predNPC = predicted_NPC;
        predMicroPC = predicted_MicroPC;
    }
    /** Returns the predicted PC immediately after the branch. */
@ -415,6 +442,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
    /** Returns the predicted PC two instructions after the branch */
    Addr readPredNPC() { return predNPC; }
    /** Returns the predicted micro PC after the branch */
    Addr readPredMicroPC() { return predMicroPC; }
    /** Returns whether the instruction was predicted taken or not. */
    bool readPredTaken()
    {
@ -430,7 +460,8 @@ class BaseDynInst : public FastAlloc, public RefCounted
    bool mispredicted()
    {
        return readPredPC() != readNextPC() ||
-            readPredNPC() != readNextNPC();
+            readPredNPC() != readNextNPC() ||
            readPredMicroPC() != readNextMicroPC();
    }
    //
@ -467,6 +498,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
    bool isQuiesce() const { return staticInst->isQuiesce(); }
    bool isIprAccess() const { return staticInst->isIprAccess(); }
    bool isUnverifiable() const { return staticInst->isUnverifiable(); }
    bool isMacroop() const { return staticInst->isMacroop(); }
    bool isMicroop() const { return staticInst->isMicroop(); }
    bool isDelayedCommit() const { return staticInst->isDelayedCommit(); }
    bool isLastMicroop() const { return staticInst->isLastMicroop(); }
    bool isFirstMicroop() const { return staticInst->isFirstMicroop(); }
    bool isMicroBranch() const { return staticInst->isMicroBranch(); }
    /** Temporarily sets this instruction as a serialize before instruction. */
    void setSerializeBefore() { status.set(SerializeBefore); }
@ -700,16 +737,26 @@ class BaseDynInst : public FastAlloc, public RefCounted
    /** Read the PC of this instruction. */
    const Addr readPC() const { return PC; }
    /**Read the micro PC of this instruction. */
    const Addr readMicroPC() const { return microPC; }
    /** Set the next PC of this instruction (its actual target). */
-    void setNextPC(uint64_t val)
+    void setNextPC(Addr val)
    {
        nextPC = val;
    }
    /** Set the next NPC of this instruction (the target in Mips or Sparc).*/
-    void setNextNPC(uint64_t val)
+    void setNextNPC(Addr val)
    {
 #if ISA_HAS_DELAY_SLOT
        nextNPC = val;
 #endif
    }
    void setNextMicroPC(Addr val)
    {
        nextMicroPC = val;
    }
    /** Sets the ASID. */
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@ -62,19 +62,66 @@ my_hash_t thishash;
 #endif
 template <class Impl>
-BaseDynInst<Impl>::BaseDynInst(TheISA::ExtMachInst machInst,
+BaseDynInst<Impl>::BaseDynInst(StaticInstPtr _staticInst,
                               Addr inst_PC, Addr inst_NPC,
                               Addr inst_MicroPC,
                               Addr pred_PC, Addr pred_NPC,
                               Addr pred_MicroPC,
                               InstSeqNum seq_num, ImplCPU *cpu)
-  : staticInst(machInst, inst_PC), traceData(NULL), cpu(cpu)
+  : staticInst(_staticInst), traceData(NULL), cpu(cpu)
 {
    seqNum = seq_num;
    bool nextIsMicro =
        staticInst->isMicroop() && !staticInst->isLastMicroop();
    PC = inst_PC;
    microPC = inst_MicroPC;
    if (nextIsMicro) {
        nextPC = inst_PC;
        nextNPC = inst_NPC;
        nextMicroPC = microPC + 1;
    } else {
        nextPC = inst_NPC;
        nextNPC = nextPC + sizeof(TheISA::MachInst);
        nextMicroPC = 0;
    }
    predPC = pred_PC;
    predNPC = pred_NPC;
    predMicroPC = pred_MicroPC;
    predTaken = false;
    initVars();
 }
 template <class Impl>
 BaseDynInst<Impl>::BaseDynInst(TheISA::ExtMachInst inst,
                               Addr inst_PC, Addr inst_NPC,
                               Addr inst_MicroPC,
                               Addr pred_PC, Addr pred_NPC,
                               Addr pred_MicroPC,
                               InstSeqNum seq_num, ImplCPU *cpu)
  : staticInst(inst, inst_PC), traceData(NULL), cpu(cpu)
 {
    seqNum = seq_num;
    bool nextIsMicro =
        staticInst->isMicroop() && !staticInst->isLastMicroop();
    PC = inst_PC;
    microPC = inst_MicroPC;
    if (nextIsMicro) {
        nextPC = inst_PC;
        nextNPC = inst_NPC;
        nextMicroPC = microPC + 1;
    } else {
        nextPC = inst_NPC;
        nextNPC = nextPC + sizeof(TheISA::MachInst);
        nextMicroPC = 0;
    }
    predPC = pred_PC;
    predNPC = pred_NPC;
    predMicroPC = pred_MicroPC;
    predTaken = false;
    initVars();
--- a/src/cpu/o3/alpha/dyn_inst.hh
+++ b/src/cpu/o3/alpha/dyn_inst.hh
@ -73,8 +73,13 @@ class AlphaDynInst : public BaseDynInst<Impl>
  public:
    /** BaseDynInst constructor given a binary instruction. */
-    AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC,
+    AlphaDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC,
-                 Addr Pred_PC, Addr Pred_NPC,
+                 Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
                 InstSeqNum seq_num, O3CPU *cpu);
    /** BaseDynInst constructor given a binary instruction. */
    AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC, Addr microPC,
                 Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
                 InstSeqNum seq_num, O3CPU *cpu);
    /** BaseDynInst constructor given a static inst pointer. */
--- a/src/cpu/o3/alpha/dyn_inst_impl.hh
+++ b/src/cpu/o3/alpha/dyn_inst_impl.hh
@ -31,10 +31,25 @@
 #include "cpu/o3/alpha/dyn_inst.hh"
 template <class Impl>
-AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst, Addr PC, Addr NPC,
+AlphaDynInst<Impl>::AlphaDynInst(StaticInstPtr staticInst,
                                 Addr PC, Addr NPC, Addr microPC,
                                 Addr Pred_PC, Addr Pred_NPC,
                                 Addr Pred_MicroPC,
                                 InstSeqNum seq_num, O3CPU *cpu)
-    : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
+    : BaseDynInst<Impl>(staticInst, PC, NPC, microPC,
            Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
 {
    initVars();
 }
 template <class Impl>
 AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst,
                                 Addr PC, Addr NPC, Addr microPC,
                                 Addr Pred_PC, Addr Pred_NPC,
                                 Addr Pred_MicroPC,
                                 InstSeqNum seq_num, O3CPU *cpu)
    : BaseDynInst<Impl>(inst, PC, NPC, microPC,
            Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
 {
    initVars();
 }
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@ -87,10 +87,10 @@ struct DefaultIEWDefaultCommit {
    bool squash[Impl::MaxThreads];
    bool branchMispredict[Impl::MaxThreads];
    bool branchTaken[Impl::MaxThreads];
-    bool squashDelaySlot[Impl::MaxThreads];
+    Addr mispredPC[Impl::MaxThreads];
-    uint64_t mispredPC[Impl::MaxThreads];
+    Addr nextPC[Impl::MaxThreads];
-    uint64_t nextPC[Impl::MaxThreads];
+    Addr nextNPC[Impl::MaxThreads];
-    uint64_t nextNPC[Impl::MaxThreads];
+    Addr nextMicroPC[Impl::MaxThreads];
    InstSeqNum squashedSeqNum[Impl::MaxThreads];
    bool includeSquashInst[Impl::MaxThreads];
@ -114,15 +114,15 @@ struct TimeBufStruct {
        uint64_t branchAddr;
        InstSeqNum doneSeqNum;
        InstSeqNum bdelayDoneSeqNum;
        // @todo: Might want to package this kind of branch stuff into a single
        // struct as it is used pretty frequently.
        bool branchMispredict;
        bool branchTaken;
-        uint64_t mispredPC;
+        Addr mispredPC;
-        uint64_t nextPC;
+        Addr nextPC;
-        uint64_t nextNPC;
+        Addr nextNPC;
        Addr nextMicroPC;
        unsigned branchCount;
    };
@ -160,18 +160,16 @@ struct TimeBufStruct {
        bool branchMispredict;
        bool branchTaken;
-        uint64_t mispredPC;
+        Addr mispredPC;
-        uint64_t nextPC;
+        Addr nextPC;
-        uint64_t nextNPC;
+        Addr nextNPC;
        Addr nextMicroPC;
        // Represents the instruction that has either been retired or
        // squashed.  Similar to having a single bus that broadcasts the
        // retired or squashed sequence number.
        InstSeqNum doneSeqNum;
        InstSeqNum bdelayDoneSeqNum;
        bool squashDelaySlot;
        //Just in case we want to do a commit/squash on a cycle
        //(necessary for multiple ROBs?)
        bool commitInsts;
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@ -279,25 +279,37 @@ class DefaultCommit
    /** Returns the PC of the head instruction of the ROB.
     * @todo: Probably remove this function as it returns only thread 0.
     */
-    uint64_t readPC() { return PC[0]; }
+    Addr readPC() { return PC[0]; }
    /** Returns the PC of a specific thread. */
-    uint64_t readPC(unsigned tid) { return PC[tid]; }
+    Addr readPC(unsigned tid) { return PC[tid]; }
    /** Sets the PC of a specific thread. */
-    void setPC(uint64_t val, unsigned tid) { PC[tid] = val; }
+    void setPC(Addr val, unsigned tid) { PC[tid] = val; }
    /** Reads the micro PC of a specific thread. */
    Addr readMicroPC(unsigned tid) { return microPC[tid]; }
    /** Sets the micro PC of a specific thread */
    void setMicroPC(Addr val, unsigned tid) { microPC[tid] = val; }
    /** Reads the next PC of a specific thread. */
-    uint64_t readNextPC(unsigned tid) { return nextPC[tid]; }
+    Addr readNextPC(unsigned tid) { return nextPC[tid]; }
    /** Sets the next PC of a specific thread. */
-    void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; }
+    void setNextPC(Addr val, unsigned tid) { nextPC[tid] = val; }
    /** Reads the next NPC of a specific thread. */
-    uint64_t readNextNPC(unsigned tid) { return nextNPC[tid]; }
+    Addr readNextNPC(unsigned tid) { return nextNPC[tid]; }
    /** Sets the next NPC of a specific thread. */
-    void setNextNPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; }
+    void setNextNPC(Addr val, unsigned tid) { nextNPC[tid] = val; }
    /** Reads the micro PC of a specific thread. */
    Addr readNextMicroPC(unsigned tid) { return nextMicroPC[tid]; }
    /** Sets the micro PC of a specific thread */
    void setNextMicroPC(Addr val, unsigned tid) { nextMicroPC[tid] = val; }
  private:
    /** Time buffer interface. */
@ -402,12 +414,20 @@ class DefaultCommit
     */
    Addr PC[Impl::MaxThreads];
    /** The commit micro PC of each thread.  Refers to the instruction that
     * is currently being processed/committed.
     */
    Addr microPC[Impl::MaxThreads];
    /** The next PC of each thread. */
    Addr nextPC[Impl::MaxThreads];
    /** The next NPC of each thread. */
    Addr nextNPC[Impl::MaxThreads];
    /** The next micro PC of each thread. */
    Addr nextMicroPC[Impl::MaxThreads];
    /** The sequence number of the youngest valid instruction in the ROB. */
    InstSeqNum youngestSeqNum[Impl::MaxThreads];
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@ -124,7 +124,7 @@ DefaultCommit<Impl>::DefaultCommit(O3CPU *_cpu, Params *params)
        committedStores[i] = false;
        trapSquash[i] = false;
        tcSquash[i] = false;
-        PC[i] = nextPC[i] = nextNPC[i] = 0;
+        microPC[i] = nextMicroPC[i] = PC[i] = nextPC[i] = nextNPC[i] = 0;
    }
 #if FULL_SYSTEM
    interrupt = NoFault;
@ -508,6 +508,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid)
    toIEW->commitInfo[tid].nextPC = PC[tid];
    toIEW->commitInfo[tid].nextNPC = nextPC[tid];
    toIEW->commitInfo[tid].nextMicroPC = nextMicroPC[tid];
 }
 template <class Impl>
@ -738,38 +739,15 @@ DefaultCommit<Impl>::commit()
            // then use one older sequence number.
            InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid];
 #if ISA_HAS_DELAY_SLOT
            InstSeqNum bdelay_done_seq_num = squashed_inst;
            bool squash_bdelay_slot = fromIEW->squashDelaySlot[tid];
            bool branchMispredict = fromIEW->branchMispredict[tid];
            // Squashing/not squashing the branch delay slot only makes
            // sense when you're squashing from a branch, ie from a branch
            // mispredict.
            if (branchMispredict && !squash_bdelay_slot) {
                bdelay_done_seq_num++;
            }
 #endif
            if (fromIEW->includeSquashInst[tid] == true) {
                squashed_inst--;
 #if ISA_HAS_DELAY_SLOT
                bdelay_done_seq_num--;
 #endif
            }
            // All younger instructions will be squashed. Set the sequence
            // number as the youngest instruction in the ROB.
            youngestSeqNum[tid] = squashed_inst;
 #if ISA_HAS_DELAY_SLOT
            rob->squash(bdelay_done_seq_num, tid);
            toIEW->commitInfo[tid].squashDelaySlot = squash_bdelay_slot;
            toIEW->commitInfo[tid].bdelayDoneSeqNum = bdelay_done_seq_num;
 #else
            rob->squash(squashed_inst, tid);
            toIEW->commitInfo[tid].squashDelaySlot = true;
 #endif
            changedROBNumEntries[tid] = true;
            toIEW->commitInfo[tid].doneSeqNum = squashed_inst;
@ -788,6 +766,7 @@ DefaultCommit<Impl>::commit()
            toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid];
            toIEW->commitInfo[tid].nextNPC = fromIEW->nextNPC[tid];
            toIEW->commitInfo[tid].nextMicroPC = fromIEW->nextMicroPC[tid];
            toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid];
@ -806,10 +785,6 @@ DefaultCommit<Impl>::commit()
        // Try to commit any instructions.
        commitInsts();
    } else {
 #if ISA_HAS_DELAY_SLOT
        skidInsert();
 #endif
    }
    //Check for any activity
@ -901,6 +876,7 @@ DefaultCommit<Impl>::commitInsts()
            PC[tid] = head_inst->readPC();
            nextPC[tid] = head_inst->readNextPC();
            nextNPC[tid] = head_inst->readNextNPC();
            nextMicroPC[tid] = head_inst->readNextMicroPC();
            // Increment the total number of non-speculative instructions
            // executed.
@ -929,12 +905,10 @@ DefaultCommit<Impl>::commitInsts()
                }
                PC[tid] = nextPC[tid];
 #if ISA_HAS_DELAY_SLOT
                nextPC[tid] = nextNPC[tid];
                nextNPC[tid] = nextNPC[tid] + sizeof(TheISA::MachInst);
-#else
+                microPC[tid] = nextMicroPC[tid];
-                nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst);
+                nextMicroPC[tid] = microPC[tid] + 1;
 #endif
 #if FULL_SYSTEM
                int count = 0;
@ -1161,37 +1135,13 @@ DefaultCommit<Impl>::getInsts()
 {
    DPRINTF(Commit, "Getting instructions from Rename stage.\n");
 #if ISA_HAS_DELAY_SLOT
    // Read any renamed instructions and place them into the ROB.
    int insts_to_process = std::min((int)renameWidth,
                               (int)(fromRename->size + skidBuffer.size()));
    int rename_idx = 0;
    DPRINTF(Commit, "%i insts available to process. Rename Insts:%i "
            "SkidBuffer Insts:%i\n", insts_to_process, fromRename->size,
            skidBuffer.size());
 #else
    // Read any renamed instructions and place them into the ROB.
    int insts_to_process = std::min((int)renameWidth, fromRename->size);
 #endif
    for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) {
        DynInstPtr inst;
 #if ISA_HAS_DELAY_SLOT
        // Get insts from skidBuffer or from Rename
        if (skidBuffer.size() > 0) {
            DPRINTF(Commit, "Grabbing skidbuffer inst.\n");
            inst = skidBuffer.front();
            skidBuffer.pop();
        } else {
            DPRINTF(Commit, "Grabbing rename inst.\n");
            inst = fromRename->insts[rename_idx++];
        }
 #else
        inst = fromRename->insts[inst_num];
 #endif
        int tid = inst->threadNumber;
        if (!inst->isSquashed() &&
@ -1213,30 +1163,6 @@ DefaultCommit<Impl>::getInsts()
                    inst->readPC(), inst->seqNum, tid);
        }
    }
 #if ISA_HAS_DELAY_SLOT
    if (rename_idx < fromRename->size) {
        DPRINTF(Commit,"Placing Rename Insts into skidBuffer.\n");
        for (;
             rename_idx < fromRename->size;
             rename_idx++) {
            DynInstPtr inst = fromRename->insts[rename_idx];
            if (!inst->isSquashed()) {
                DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ",
                        "skidBuffer.\n", inst->readPC(), inst->seqNum,
                        inst->threadNumber);
                skidBuffer.push(inst);
            } else {
                DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was "
                        "squashed, skipping.\n",
                        inst->readPC(), inst->seqNum, inst->threadNumber);
            }
        }
    }
 #endif
 }
 template <class Impl>
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@ -694,7 +694,7 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
    // Squash Throughout Pipeline
    InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum;
-    fetch.squash(0, sizeof(TheISA::MachInst), squash_seq_num, true, tid);
+    fetch.squash(0, sizeof(TheISA::MachInst), 0, squash_seq_num, tid);
    decode.squash(tid);
    rename.squash(squash_seq_num, tid);
    iew.squash(tid);
@ -1148,6 +1148,20 @@ FullO3CPU<Impl>::setPC(Addr new_PC,unsigned tid)
    commit.setPC(new_PC, tid);
 }
 template <class Impl>
 uint64_t
 FullO3CPU<Impl>::readMicroPC(unsigned tid)
 {
    return commit.readMicroPC(tid);
 }
 template <class Impl>
 void
 FullO3CPU<Impl>::setMicroPC(Addr new_PC,unsigned tid)
 {
    commit.setMicroPC(new_PC, tid);
 }
 template <class Impl>
 uint64_t
 FullO3CPU<Impl>::readNextPC(unsigned tid)
@ -1176,6 +1190,20 @@ FullO3CPU<Impl>::setNextNPC(uint64_t val,unsigned tid)
    commit.setNextNPC(val, tid);
 }
 template <class Impl>
 uint64_t
 FullO3CPU<Impl>::readNextMicroPC(unsigned tid)
 {
    return commit.readNextMicroPC(tid);
 }
 template <class Impl>
 void
 FullO3CPU<Impl>::setNextMicroPC(Addr new_PC,unsigned tid)
 {
    commit.setNextMicroPC(new_PC, tid);
 }
 template <class Impl>
 typename FullO3CPU<Impl>::ListIt
 FullO3CPU<Impl>::addInst(DynInstPtr &inst)
@ -1224,9 +1252,7 @@ FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst)
 template <class Impl>
 void
-FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid,
+FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid)
                                     bool squash_delay_slot,
                                     const InstSeqNum &delay_slot_seq_num)
 {
    DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction"
            " list.\n", tid);
@ -1257,12 +1283,6 @@ FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid,
    while (inst_it != end_it) {
        assert(!instList.empty());
 #if ISA_HAS_DELAY_SLOT
        if(!squash_delay_slot &&
           delay_slot_seq_num >= (*inst_it)->seqNum) {
            break;
        }
 #endif
        squashInstIt(inst_it, tid);
        inst_it--;
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@ -434,22 +434,34 @@ class FullO3CPU : public BaseO3CPU
    void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid);
    /** Reads the commit PC of a specific thread. */
-    uint64_t readPC(unsigned tid);
+    Addr readPC(unsigned tid);
    /** Sets the commit PC of a specific thread. */
    void setPC(Addr new_PC, unsigned tid);
    /** Reads the commit micro PC of a specific thread. */
    Addr readMicroPC(unsigned tid);
    /** Sets the commmit micro PC of a specific thread. */
    void setMicroPC(Addr new_microPC, unsigned tid);
    /** Reads the next PC of a specific thread. */
-    uint64_t readNextPC(unsigned tid);
+    Addr readNextPC(unsigned tid);
    /** Sets the next PC of a specific thread. */
-    void setNextPC(uint64_t val, unsigned tid);
+    void setNextPC(Addr val, unsigned tid);
    /** Reads the next NPC of a specific thread. */
-    uint64_t readNextNPC(unsigned tid);
+    Addr readNextNPC(unsigned tid);
    /** Sets the next NPC of a specific thread. */
-    void setNextNPC(uint64_t val, unsigned tid);
+    void setNextNPC(Addr val, unsigned tid);
    /** Reads the commit next micro PC of a specific thread. */
    Addr readNextMicroPC(unsigned tid);
    /** Sets the commit next micro PC of a specific thread. */
    void setNextMicroPC(Addr val, unsigned tid);
    /** Function to add instruction onto the head of the list of the
     *  instructions.  Used when new instructions are fetched.
@ -469,8 +481,7 @@ class FullO3CPU : public BaseO3CPU
    /** Remove all instructions that are not currently in the ROB.
     *  There's also an option to not squash delay slot instructions.*/
-    void removeInstsNotInROB(unsigned tid, bool squash_delay_slot,
+    void removeInstsNotInROB(unsigned tid);
                             const InstSeqNum &delay_slot_seq_num);
    /** Remove all instructions younger than the given sequence number. */
    void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid);
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@ -49,8 +49,6 @@ DefaultDecode<Impl>::DefaultDecode(O3CPU *_cpu, Params *params)
        stalls[i].rename = false;
        stalls[i].iew = false;
        stalls[i].commit = false;
        squashAfterDelaySlot[i] = false;
    }
    // @todo: Make into a parameter
@ -275,20 +273,16 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
    ///explicitly for ISAs with delay slots.
    toFetch->decodeInfo[tid].nextNPC =
        inst->branchTarget() + sizeof(TheISA::MachInst);
    toFetch->decodeInfo[tid].nextMicroPC = inst->readMicroPC();
 #if ISA_HAS_DELAY_SLOT
    toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() !=
        (inst->readNextPC() + sizeof(TheISA::MachInst));
    toFetch->decodeInfo[tid].bdelayDoneSeqNum = bdelayDoneSeqNum[tid];
    squashAfterDelaySlot[tid] = false;
    InstSeqNum squash_seq_num = bdelayDoneSeqNum[tid];
 #else
    toFetch->decodeInfo[tid].branchTaken =
        inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
 #endif
    InstSeqNum squash_seq_num = inst->seqNum;
 #endif
    // Might have to tell fetch to unblock.
    if (decodeStatus[tid] == Blocked ||
@ -309,30 +303,10 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
    // Clear the instruction list and skid buffer in case they have any
    // insts in them.
    while (!insts[tid].empty()) {
 #if ISA_HAS_DELAY_SLOT
        if (insts[tid].front()->seqNum <= squash_seq_num) {
            DPRINTF(Decode, "[tid:%i]: Cannot remove incoming decode "
                    "instructions before delay slot [sn:%i]. %i insts"
                    "left in decode.\n", tid, squash_seq_num,
                    insts[tid].size());
            break;
        }
 #endif
        insts[tid].pop();
    }
    while (!skidBuffer[tid].empty()) {
 #if ISA_HAS_DELAY_SLOT
        if (skidBuffer[tid].front()->seqNum <= squash_seq_num) {
            DPRINTF(Decode, "[tid:%i]: Cannot remove skidBuffer "
                    "instructions before delay slot [sn:%i]. %i insts"
                    "left in decode.\n", tid, squash_seq_num,
                    insts[tid].size());
            break;
        }
 #endif
        skidBuffer[tid].pop();
    }
@ -760,47 +734,12 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
                // Might want to set some sort of boolean and just do
                // a check at the end
 #if !ISA_HAS_DELAY_SLOT
                squash(inst, inst->threadNumber);
                Addr target = inst->branchTarget();
-                inst->setPredTarg(target, target + sizeof(TheISA::MachInst));
+                //The micro pc after an instruction level branch should be 0
                inst->setPredTarg(target, target + sizeof(TheISA::MachInst), 0);
                break;
 #else
                // If mispredicted as taken, then ignore delay slot
                // instruction... else keep delay slot and squash
                // after it is sent to rename
                if (inst->readPredTaken() && inst->isCondDelaySlot()) {
                    DPRINTF(Decode, "[tid:%i]: Conditional delay slot inst."
                            "[sn:%i] PC %#x mispredicted as taken.\n", tid,
                            inst->seqNum, inst->PC);
                    bdelayDoneSeqNum[tid] = inst->seqNum;
                    squash(inst, inst->threadNumber);
                    Addr target = inst->branchTarget();
                    inst->setPredTarg(target,
                            target + sizeof(TheISA::MachInst));
                    break;
                } else {
                    DPRINTF(Decode, "[tid:%i]: Misprediction detected at "
                            "[sn:%i] PC %#x, will squash after delay slot "
                            "inst. is sent to Rename\n",
                            tid, inst->seqNum, inst->PC);
                    bdelayDoneSeqNum[tid] = inst->seqNum + 1;
                    squashAfterDelaySlot[tid] = true;
                    squashInst[tid] = inst;
                    continue;
            }
 #endif
            }
        }
        if (squashAfterDelaySlot[tid]) {
            assert(!inst->isSquashed());
            squash(squashInst[tid], squashInst[tid]->threadNumber);
            Addr target = squashInst[tid]->branchTarget();
            squashInst[tid]->setPredTarg(target,
                    target + sizeof(TheISA::MachInst));
            assert(!inst->isSquashed());
            break;
        }
    }
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@ -227,7 +227,7 @@ class DefaultFetch
     * @param next_NPC Used for ISAs which use delay slots.
     * @return Whether or not a branch was predicted as taken.
     */
-    bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC);
+    bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC, Addr &next_MicroPC);
    /**
     * Fetches the cache line that contains fetch_PC.  Returns any
@ -242,12 +242,14 @@ class DefaultFetch
    bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid);
    /** Squashes a specific thread and resets the PC. */
-    inline void doSquash(const Addr &new_PC, const Addr &new_NPC, unsigned tid);
+    inline void doSquash(const Addr &new_PC, const Addr &new_NPC,
                         const Addr &new_MicroPC, unsigned tid);
    /** Squashes a specific thread and resets the PC. Also tells the CPU to
     * remove any instructions between fetch and decode that should be sqaushed.
     */
    void squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
                          const Addr &new_MicroPC,
                          const InstSeqNum &seq_num, unsigned tid);
    /** Checks if a thread is stalled. */
@ -263,8 +265,8 @@ class DefaultFetch
     * squash should be the commit stage.
     */
    void squash(const Addr &new_PC, const Addr &new_NPC,
-                const InstSeqNum &seq_num,
+                const Addr &new_MicroPC,
-                bool squash_delay_slot, unsigned tid);
+                const InstSeqNum &seq_num, unsigned tid);
    /** Ticks the fetch stage, processing all inputs signals and fetching
     * as many instructions as possible.
@ -347,16 +349,12 @@ class DefaultFetch
    /** Per-thread fetch PC. */
    Addr PC[Impl::MaxThreads];
    /** Per-thread fetch micro PC. */
    Addr microPC[Impl::MaxThreads];
    /** Per-thread next PC. */
    Addr nextPC[Impl::MaxThreads];
    /** Per-thread next Next PC.
     *  This is not a real register but is used for
     *  architectures that use a branch-delay slot.
     *  (such as MIPS or Sparc)
     */
    Addr nextNPC[Impl::MaxThreads];
    /** Memory request used to access cache. */
    RequestPtr memReq[Impl::MaxThreads];
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@ -313,7 +313,7 @@ DefaultFetch<Impl>::initStage()
    for (int tid = 0; tid < numThreads; tid++) {
        PC[tid] = cpu->readPC(tid);
        nextPC[tid] = cpu->readNextPC(tid);
-        nextNPC[tid] = cpu->readNextNPC(tid);
+        microPC[tid] = cpu->readMicroPC(tid);
    }
    for (int tid=0; tid < numThreads; tid++) {
@ -440,11 +440,7 @@ DefaultFetch<Impl>::takeOverFrom()
        stalls[i].commit = 0;
        PC[i] = cpu->readPC(i);
        nextPC[i] = cpu->readNextPC(i);
-#if ISA_HAS_DELAY_SLOT
+        microPC[i] = cpu->readMicroPC(i);
        nextNPC[i] = cpu->readNextNPC(i);
 #else
        nextNPC[i] = nextPC[i] + sizeof(TheISA::MachInst);
 #endif
        fetchStatus[i] = Running;
    }
    numInst = 0;
@ -494,7 +490,7 @@ DefaultFetch<Impl>::switchToInactive()
 template <class Impl>
 bool
 DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
-                                          Addr &next_NPC)
+                                          Addr &next_NPC, Addr &next_MicroPC)
 {
    // Do branch prediction check here.
    // A bit of a misnomer...next_PC is actually the current PC until
@ -502,13 +498,22 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
    bool predict_taken;
    if (!inst->isControl()) {
        if (inst->isMicroop() && !inst->isLastMicroop()) {
            next_MicroPC++;
        } else {
            next_PC  = next_NPC;
            next_NPC = next_NPC + instSize;
-        inst->setPredTarg(next_PC, next_NPC);
+            next_MicroPC = 0;
        }
        inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
        inst->setPredTaken(false);
        return false;
    }
    //Assume for now that all control flow is to a different macroop which
    //would reset the micro pc to 0.
    next_MicroPC = 0;
    int tid = inst->threadNumber;
    Addr pred_PC = next_PC;
    predict_taken = branchPred.predict(inst, pred_PC, tid);
@ -535,7 +540,7 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
 #endif
 /*    DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
            tid, next_PC, next_NPC);*/
-    inst->setPredTarg(next_PC, next_NPC);
+    inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
    inst->setPredTaken(predict_taken);
    ++fetchedBranches;
@ -659,14 +664,14 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
 template <class Impl>
 inline void
 DefaultFetch<Impl>::doSquash(const Addr &new_PC,
-        const Addr &new_NPC, unsigned tid)
+        const Addr &new_NPC, const Addr &new_microPC, unsigned tid)
 {
    DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n",
            tid, new_PC, new_NPC);
    PC[tid] = new_PC;
    nextPC[tid] = new_NPC;
-    nextNPC[tid] = new_NPC + instSize;
+    microPC[tid] = new_microPC;
    // Clear the icache miss if it's outstanding.
    if (fetchStatus[tid] == IcacheWaitResponse) {
@ -694,12 +699,12 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC,
 template<class Impl>
 void
 DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
-                                     const InstSeqNum &seq_num,
+                                     const Addr &new_MicroPC,
-                                     unsigned tid)
+                                     const InstSeqNum &seq_num, unsigned tid)
 {
    DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
-    doSquash(new_PC, new_NPC, tid);
+    doSquash(new_PC, new_NPC, new_MicroPC, tid);
    // Tell the CPU to remove any instructions that are in flight between
    // fetch and decode.
@ -775,20 +780,15 @@ DefaultFetch<Impl>::updateFetchStatus()
 template <class Impl>
 void
 DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
-                           const InstSeqNum &seq_num,
+                           const Addr &new_MicroPC,
-                           bool squash_delay_slot, unsigned tid)
+                           const InstSeqNum &seq_num, unsigned tid)
 {
    DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
-    doSquash(new_PC, new_NPC, tid);
+    doSquash(new_PC, new_NPC, new_MicroPC, tid);
 #if ISA_HAS_DELAY_SLOT
    // Tell the CPU to remove any instructions that are not in the ROB.
-    cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
+    cpu->removeInstsNotInROB(tid);
 #else
    // Tell the CPU to remove any instructions that are not in the ROB.
    cpu->removeInstsNotInROB(tid, true, 0);
 #endif
 }
 template <class Impl>
@ -897,17 +897,11 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
        DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
                "from commit.\n",tid);
 #if ISA_HAS_DELAY_SLOT
    InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
 #else
    InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum;
 #endif
        // In any case, squash.
        squash(fromCommit->commitInfo[tid].nextPC,
               fromCommit->commitInfo[tid].nextNPC,
-               doneSeqNum,
+               fromCommit->commitInfo[tid].nextMicroPC,
-               fromCommit->commitInfo[tid].squashDelaySlot,
+               fromCommit->commitInfo[tid].doneSeqNum,
               tid);
        // Also check if there's a mispredict that happened.
@ -956,18 +950,14 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
        if (fetchStatus[tid] != Squashing) {
 #if ISA_HAS_DELAY_SLOT
            InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum;
 #else
            InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
 #endif
            DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n",
                    fromDecode->decodeInfo[tid].nextPC,
                    fromDecode->decodeInfo[tid].nextNPC);
            // Squash unless we're already squashing
            squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
                             fromDecode->decodeInfo[tid].nextNPC,
-                             doneSeqNum,
+                             fromDecode->decodeInfo[tid].nextMicroPC,
                             fromDecode->decodeInfo[tid].doneSeqNum,
                             tid);
            return true;
@ -1021,9 +1011,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
    DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
    // The current PC.
-    Addr &fetch_PC = PC[tid];
+    Addr fetch_PC = PC[tid];
-
+    Addr fetch_NPC = nextPC[tid];
-    Addr &fetch_NPC = nextPC[tid];
+    Addr fetch_MicroPC = microPC[tid];
    // Fault code for memory access.
    Fault fault = NoFault;
@ -1082,6 +1072,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
    Addr next_PC = fetch_PC;
    Addr next_NPC = fetch_NPC;
    Addr next_MicroPC = fetch_MicroPC;
    InstSeqNum inst_seq;
    MachInst inst;
@ -1089,6 +1080,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
    // @todo: Fix this hack.
    unsigned offset = (fetch_PC & cacheBlkMask) & ~3;
    StaticInstPtr staticInst = NULL;
    StaticInstPtr macroop = NULL;
    if (fault == NoFault) {
        // If the read of the first instruction was successful, then grab the
        // instructions from the rest of the cache line and put them into the
@ -1101,11 +1095,9 @@ DefaultFetch<Impl>::fetch(bool &status_change)
        // ended this fetch block.
        bool predicted_branch = false;
-        for (;
+        while (offset < cacheBlkSize &&
             offset < cacheBlkSize &&
               numInst < fetchWidth &&
-                 !predicted_branch;
+               !predicted_branch) {
             ++numInst) {
            // If we're branching after this instruction, quite fetching
            // from the same block then.
@ -1116,13 +1108,10 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                        fetch_PC, fetch_NPC);
            }
            // Get a sequence number.
            inst_seq = cpu->getAndIncrementInstSeq();
            // Make sure this is a valid index.
            assert(offset <= cacheBlkSize - instSize);
            if (!macroop) {
                // Get the instruction from the array of the cache line.
                inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
                            (&cacheData[tid][offset]));
@ -1131,11 +1120,24 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                predecoder.moreBytes(fetch_PC, fetch_PC, inst);
                ext_inst = predecoder.getExtMachInst();
                staticInst = StaticInstPtr(ext_inst, fetch_PC);
                if (staticInst->isMacroop())
                    macroop = staticInst;
            }
            do {
                if (macroop) {
                    staticInst = macroop->fetchMicroop(fetch_MicroPC);
                    if (staticInst->isLastMicroop())
                        macroop = NULL;
                }
                // Get a sequence number.
                inst_seq = cpu->getAndIncrementInstSeq();
                // Create a new DynInst from the instruction fetched.
-            DynInstPtr instruction = new DynInst(ext_inst,
+                DynInstPtr instruction = new DynInst(staticInst,
-                                                 fetch_PC, fetch_NPC,
+                                                     fetch_PC, fetch_NPC, fetch_MicroPC,
-                                                 next_PC, next_NPC,
+                                                     next_PC, next_NPC, next_MicroPC,
                                                     inst_seq, cpu);
                instruction->setTid(tid);
@ -1162,11 +1164,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
 #endif
                ///FIXME This needs to be more robust in dealing with delay slots
-#if !ISA_HAS_DELAY_SLOT
+                predicted_branch |=
-//	    predicted_branch |=
+                    lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC);
 #endif
            lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
            predicted_branch |= (next_PC != fetch_NPC);
                // Add instruction to the CPU's list of instructions.
                instruction->setInstListIt(cpu->addInst(instruction));
@ -1183,6 +1182,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                // Move to the next instruction, unless we have a branch.
                fetch_PC = next_PC;
                fetch_NPC = next_NPC;
                fetch_MicroPC = next_MicroPC;
                if (instruction->isQuiesce()) {
                    DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
@ -1193,18 +1193,22 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                    break;
                }
                ++numInst;
            } while (staticInst->isMicroop() &&
                     !staticInst->isLastMicroop() &&
                     numInst < fetchWidth);
            offset += instSize;
        }
-        if (offset >= cacheBlkSize) {
+        if (predicted_branch) {
-            DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
+            DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
-                    "block.\n", tid);
+                    "instruction encountered.\n", tid);
        } else if (numInst >= fetchWidth) {
            DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
                    "for this cycle.\n", tid);
-        } else if (predicted_branch) {
+        } else if (offset >= cacheBlkSize) {
-            DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
+            DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
-                    "instruction encountered.\n", tid);
+                    "block.\n", tid);
        }
    }
@ -1217,12 +1221,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
    if (fault == NoFault) {
        PC[tid] = next_PC;
        nextPC[tid] = next_NPC;
-        nextNPC[tid] = next_NPC + instSize;
+        microPC[tid] = next_MicroPC;
 #if ISA_HAS_DELAY_SLOT
        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]);
 #else
        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC);
 #endif
    } else {
        // We shouldn't be in an icache miss and also have a fault (an ITB
        // miss)
@ -1242,10 +1242,10 @@ DefaultFetch<Impl>::fetch(bool &status_change)
        // Create a new DynInst from the dummy nop.
        DynInstPtr instruction = new DynInst(ext_inst,
-                                             fetch_PC, fetch_NPC,
+                                             fetch_PC, fetch_NPC, fetch_MicroPC,
-                                             next_PC, next_NPC,
+                                             next_PC, next_NPC, next_MicroPC,
                                             inst_seq, cpu);
-        instruction->setPredTarg(next_PC, next_NPC);
+        instruction->setPredTarg(next_PC, next_NPC, 1);
        instruction->setTid(tid);
        instruction->setASID(tid);
--- a/src/cpu/o3/free_list.hh
+++ b/src/cpu/o3/free_list.hh
@ -168,7 +168,9 @@ SimpleFreeList::addReg(PhysRegIndex freed_reg)
        if (freed_reg != TheISA::ZeroReg)
            freeIntRegs.push(freed_reg);
    } else if (freed_reg < numPhysicalRegs) {
 #if THE_ISA == ALPHA_ISA
        if (freed_reg != (TheISA::ZeroReg + numPhysicalIntRegs))
 #endif
            freeFloatRegs.push(freed_reg);
    }
 }
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@ -402,9 +402,6 @@ class DefaultIEW
    /** Records if there is a fetch redirect on this cycle for each thread. */
    bool fetchRedirect[Impl::MaxThreads];
    /** Keeps track of the last valid branch delay slot instss for threads */
    InstSeqNum bdelayDoneSeqNum[Impl::MaxThreads];
    /** Used to track if all instructions have been dispatched this cycle.
     * If they have not, then blocking must have occurred, and the instructions
     * would already be added to the skid buffer.
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@ -69,7 +69,6 @@ DefaultIEW<Impl>::DefaultIEW(O3CPU *_cpu, Params *params)
        dispatchStatus[i] = Running;
        stalls[i].commit = false;
        fetchRedirect[i] = false;
        bdelayDoneSeqNum[i] = 0;
    }
    wbMax = wbWidth * params->wbDepth;
@ -410,31 +409,14 @@ DefaultIEW<Impl>::squash(unsigned tid)
    instQueue.squash(tid);
    // Tell the LDSTQ to start squashing.
 #if ISA_HAS_DELAY_SLOT
    ldstQueue.squash(fromCommit->commitInfo[tid].bdelayDoneSeqNum, tid);
 #else
    ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid);
 #endif
    updatedQueues = true;
    // Clear the skid buffer in case it has any data in it.
    DPRINTF(IEW, "[tid:%i]: Removing skidbuffer instructions until [sn:%i].\n",
-            tid, fromCommit->commitInfo[tid].bdelayDoneSeqNum);
+            tid, fromCommit->commitInfo[tid].doneSeqNum);
    while (!skidBuffer[tid].empty()) {
 #if ISA_HAS_DELAY_SLOT
        if (skidBuffer[tid].front()->seqNum <=
            fromCommit->commitInfo[tid].bdelayDoneSeqNum) {
            DPRINTF(IEW, "[tid:%i]: Cannot remove skidbuffer instructions "
                    "that occur before delay slot [sn:%i].\n",
                    fromCommit->commitInfo[tid].bdelayDoneSeqNum,
                    tid);
            break;
        } else {
            DPRINTF(IEW, "[tid:%i]: Removing instruction [sn:%i] from "
                    "skidBuffer.\n", tid, skidBuffer[tid].front()->seqNum);
        }
 #endif
        if (skidBuffer[tid].front()->isLoad() ||
            skidBuffer[tid].front()->isStore() ) {
            toRename->iewInfo[tid].dispatchedToLSQ++;
@ -445,8 +427,6 @@ DefaultIEW<Impl>::squash(unsigned tid)
        skidBuffer[tid].pop();
    }
    bdelayDoneSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
    emptyRenameInsts(tid);
 }
@ -462,38 +442,19 @@ DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid)
    toCommit->mispredPC[tid] = inst->readPC();
    toCommit->branchMispredict[tid] = true;
    int instSize = sizeof(TheISA::MachInst);
 #if ISA_HAS_DELAY_SLOT
-    bool branch_taken =
+    int instSize = sizeof(TheISA::MachInst);
    toCommit->branchTaken[tid] =
        !(inst->readNextPC() + instSize == inst->readNextNPC() &&
          (inst->readNextPC() == inst->readPC() + instSize ||
           inst->readNextPC() == inst->readPC() + 2 * instSize));
    DPRINTF(Sparc, "Branch taken = %s [sn:%i]\n",
            branch_taken ? "true": "false", inst->seqNum);
    toCommit->branchTaken[tid] = branch_taken;
    bool squashDelaySlot = true;
 //	(inst->readNextPC() != inst->readPC() + sizeof(TheISA::MachInst));
    DPRINTF(Sparc, "Squash delay slot = %s [sn:%i]\n",
            squashDelaySlot ? "true": "false", inst->seqNum);
    toCommit->squashDelaySlot[tid] = squashDelaySlot;
    //If we're squashing the delay slot, we need to pick back up at NextPC.
    //Otherwise, NextPC isn't being squashed, so we should pick back up at
    //NextNPC.
    if (squashDelaySlot) {
        toCommit->nextPC[tid] = inst->readNextPC();
        toCommit->nextNPC[tid] = inst->readNextNPC();
    } else {
        toCommit->nextPC[tid] = inst->readNextNPC();
        toCommit->nextNPC[tid] = inst->readNextNPC() + instSize;
    }
 #else
    toCommit->branchTaken[tid] = inst->readNextPC() !=
        (inst->readPC() + sizeof(TheISA::MachInst));
    toCommit->nextPC[tid] = inst->readNextPC();
    toCommit->nextNPC[tid] = inst->readNextPC() + instSize;
 #endif
    toCommit->nextPC[tid] = inst->readNextPC();
    toCommit->nextNPC[tid] = inst->readNextNPC();
    toCommit->nextMicroPC[tid] = inst->readNextMicroPC();
    toCommit->includeSquashInst[tid] = false;
@ -510,11 +471,7 @@ DefaultIEW<Impl>::squashDueToMemOrder(DynInstPtr &inst, unsigned tid)
    toCommit->squash[tid] = true;
    toCommit->squashedSeqNum[tid] = inst->seqNum;
    toCommit->nextPC[tid] = inst->readNextPC();
 #if ISA_HAS_DELAY_SLOT
    toCommit->nextNPC[tid] = inst->readNextNPC();
 #else
    toCommit->nextNPC[tid] = inst->readNextPC() + sizeof(TheISA::MachInst);
 #endif
    toCommit->branchMispredict[tid] = false;
    toCommit->includeSquashInst[tid] = false;
@ -532,11 +489,7 @@ DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid)
    toCommit->squash[tid] = true;
    toCommit->squashedSeqNum[tid] = inst->seqNum;
    toCommit->nextPC[tid] = inst->readPC();
 #if ISA_HAS_DELAY_SLOT
    toCommit->nextNPC[tid] = inst->readNextPC();
 #else
    toCommit->nextNPC[tid] = inst->readPC() + sizeof(TheISA::MachInst);
 #endif
    toCommit->branchMispredict[tid] = false;
    // Must include the broadcasted SN in the squash.
@ -880,10 +833,8 @@ DefaultIEW<Impl>::sortInsts()
 {
    int insts_from_rename = fromRename->size;
 #ifdef DEBUG
 #if !ISA_HAS_DELAY_SLOT
    for (int i = 0; i < numThreads; i++)
        assert(insts[i].empty());
 #endif
 #endif
    for (int i = 0; i < insts_from_rename; ++i) {
        insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]);
@ -894,21 +845,9 @@ template <class Impl>
 void
 DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
 {
-    DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions until "
+    DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions\n", tid);
            "[sn:%i].\n", tid, bdelayDoneSeqNum[tid]);
    while (!insts[tid].empty()) {
 #if ISA_HAS_DELAY_SLOT
        if (insts[tid].front()->seqNum <= bdelayDoneSeqNum[tid]) {
            DPRINTF(IEW, "[tid:%i]: Done removing, cannot remove instruction"
                    " that occurs at or before delay slot [sn:%i].\n",
                    tid, bdelayDoneSeqNum[tid]);
            break;
        } else {
            DPRINTF(IEW, "[tid:%i]: Removing incoming rename instruction "
                    "[sn:%i].\n", tid, insts[tid].front()->seqNum);
        }
 #endif
        if (insts[tid].front()->isLoad() ||
            insts[tid].front()->isStore() ) {
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@ -1005,11 +1005,7 @@ InstructionQueue<Impl>::squash(unsigned tid)
    // Read instruction sequence number of last instruction out of the
    // time buffer.
 #if ISA_HAS_DELAY_SLOT
    squashedSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
 #else
    squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
 #endif
    // Call doSquash if there are insts in the IQ
    if (count[tid] > 0) {
--- a/src/cpu/o3/mips/dyn_inst.hh
+++ b/src/cpu/o3/mips/dyn_inst.hh
@ -69,10 +69,16 @@ class MipsDynInst : public BaseDynInst<Impl>
    };
  public:
    /** BaseDynInst constructor given a binary instruction. */
    MipsDynInst(StaticInstPtr staticInst,
                Addr PC, Addr NPC, Addr microPC,
                Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
                InstSeqNum seq_num, O3CPU *cpu);
    /** BaseDynInst constructor given a binary instruction. */
    MipsDynInst(ExtMachInst inst,
-                Addr PC, Addr NPC,
+                Addr PC, Addr NPC, Addr microPC,
-                Addr Pred_PC, Addr Pred_NPC,
+                Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
                InstSeqNum seq_num, O3CPU *cpu);
    /** BaseDynInst constructor given a static inst pointer. */
--- a/src/cpu/o3/mips/dyn_inst_impl.hh
+++ b/src/cpu/o3/mips/dyn_inst_impl.hh
@ -31,11 +31,23 @@
 #include "cpu/o3/mips/dyn_inst.hh"
 template <class Impl>
-MipsDynInst<Impl>::MipsDynInst(ExtMachInst inst,
+MipsDynInst<Impl>::MipsDynInst(StaticInstPtr staticInst,
-                               Addr PC, Addr NPC,
+                               Addr PC, Addr NPC, Addr microPC,
-                               Addr Pred_PC, Addr Pred_NPC,
+                               Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
                               InstSeqNum seq_num, O3CPU *cpu)
-    : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
+    : BaseDynInst<Impl>(staticInst, PC, NPC, microPC,
            Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
 {
    initVars();
 }
 template <class Impl>
 MipsDynInst<Impl>::MipsDynInst(ExtMachInst inst,
                               Addr PC, Addr NPC, Addr microPC,
                               Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
                               InstSeqNum seq_num, O3CPU *cpu)
    : BaseDynInst<Impl>(inst, PC, NPC, microPC,
            Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
 {
    initVars();
 }
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@ -179,7 +179,9 @@ class PhysRegFile
        DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
                int(reg_idx), (uint64_t)val);
 #if THE_ISA == ALPHA_ISA
        if (reg_idx != TheISA::ZeroReg)
 #endif
            floatRegFile[reg_idx].d = val;
    }
@ -194,7 +196,9 @@ class PhysRegFile
        DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
                int(reg_idx), (uint64_t)val);
 #if THE_ISA == ALPHA_ISA
        if (reg_idx != TheISA::ZeroReg)
 #endif
            floatRegFile[reg_idx].d = val;
    }
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@ -356,47 +356,12 @@ DefaultRename<Impl>::squash(const InstSeqNum &squash_seq_num, unsigned tid)
    }
    // Clear the instruction list and skid buffer in case they have any
-    // insts in them. Since we support multiple ISAs, we cant just:
+    // insts in them.
    // "insts[tid].clear();" or "skidBuffer[tid].clear()" since there is
    // a possible delay slot inst for different architectures
    // insts[tid].clear();
 #if ISA_HAS_DELAY_SLOT
    DPRINTF(Rename, "[tid:%i] Squashing incoming decode instructions until "
            "[sn:%i].\n",tid, squash_seq_num);
    ListIt ilist_it = insts[tid].begin();
    while (ilist_it != insts[tid].end()) {
        if ((*ilist_it)->seqNum > squash_seq_num) {
            (*ilist_it)->setSquashed();
            DPRINTF(Rename, "Squashing incoming decode instruction, "
                    "[tid:%i] [sn:%i] PC %08p.\n", tid, (*ilist_it)->seqNum, (*ilist_it)->PC);
        }
        ilist_it++;
    }
 #else
    insts[tid].clear();
 #endif
    // Clear the skid buffer in case it has any data in it.
    // See comments above.
    //     skidBuffer[tid].clear();
 #if ISA_HAS_DELAY_SLOT
    DPRINTF(Rename, "[tid:%i] Squashing incoming skidbuffer instructions "
            "until [sn:%i].\n", tid, squash_seq_num);
    ListIt slist_it = skidBuffer[tid].begin();
    while (slist_it != skidBuffer[tid].end()) {
        if ((*slist_it)->seqNum > squash_seq_num) {
            (*slist_it)->setSquashed();
            DPRINTF(Rename, "Squashing skidbuffer instruction, [tid:%i] [sn:%i]"
                    "PC %08p.\n", tid, (*slist_it)->seqNum, (*slist_it)->PC);
        }
        slist_it++;
    }
    resumeUnblocking = (skidBuffer[tid].size() != 0);
    DPRINTF(Rename, "Resume unblocking set to %s\n",
            resumeUnblocking ? "true" : "false");
 #else
    skidBuffer[tid].clear();
-#endif
+
    doSquash(squash_seq_num, tid);
 }
@ -776,10 +741,8 @@ DefaultRename<Impl>::sortInsts()
 {
    int insts_from_decode = fromDecode->size;
 #ifdef DEBUG
 #if !ISA_HAS_DELAY_SLOT
    for (int i=0; i < numThreads; i++)
        assert(insts[i].empty());
 #endif
 #endif
    for (int i = 0; i < insts_from_decode; ++i) {
        DynInstPtr inst = fromDecode->insts[i];
@ -1000,6 +963,7 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
            // Floating point and Miscellaneous registers need their indexes
            // adjusted to account for the expanded number of flattened int regs.
            flat_src_reg = src_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
            DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", src_reg, flat_src_reg);
        }
        inst->flattenSrcReg(src_idx, flat_src_reg);
@ -1016,9 +980,11 @@ DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
        // See if the register is ready or not.
        if (scoreboard->getReg(renamed_reg) == true) {
-            DPRINTF(Rename, "[tid:%u]: Register is ready.\n", tid);
+            DPRINTF(Rename, "[tid:%u]: Register %d is ready.\n", tid, renamed_reg);
            inst->markSrcRegReady(src_idx);
        } else {
            DPRINTF(Rename, "[tid:%u]: Register %d is not ready.\n", tid, renamed_reg);
        }
        ++renameRenameLookups;
@ -1045,6 +1011,7 @@ DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid)
            // Floating point and Miscellaneous registers need their indexes
            // adjusted to account for the expanded number of flattened int regs.
            flat_dest_reg = dest_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs;
            DPRINTF(Rename, "Adjusting reg index from %d to %d.\n", dest_reg, flat_dest_reg);
        }
        inst->flattenDestReg(dest_idx, flat_dest_reg);
@ -1248,13 +1215,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
        DPRINTF(Rename, "[tid:%u]: Squashing instructions due to squash from "
                "commit.\n", tid);
-#if ISA_HAS_DELAY_SLOT
+        squash(fromCommit->commitInfo[tid].doneSeqNum, tid);
        InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
 #else
        InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum;
 #endif
        squash(squashed_seq_num, tid);
        return true;
    }
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@ -165,17 +165,21 @@ SimpleRenameMap::rename(RegIndex arch_reg)
        // If it's not referencing the zero register, then rename the
        // register.
 #if THE_ISA == ALPHA_ISA
        if (arch_reg != floatZeroReg) {
 #endif
            renamed_reg = freeList->getFloatReg();
            floatRenameMap[arch_reg].physical_reg = renamed_reg;
            assert(renamed_reg < numPhysicalRegs &&
                   renamed_reg >= numPhysicalIntRegs);
 #if THE_ISA == ALPHA_ISA
        } else {
            // Otherwise return the zero register so nothing bad happens.
            renamed_reg = floatZeroReg;
        }
 #endif
    } else {
        // Subtract off the base offset for miscellaneous registers.
        arch_reg = arch_reg - numLogicalRegs;
--- a/src/cpu/o3/scoreboard.cc
+++ b/src/cpu/o3/scoreboard.cc
@ -29,6 +29,7 @@
 *          Kevin Lim
 */
 #include "arch/isa_specific.hh"
 #include "cpu/o3/scoreboard.hh"
 Scoreboard::Scoreboard(unsigned activeThreads,
@ -79,11 +80,18 @@ Scoreboard::name() const
 bool
 Scoreboard::getReg(PhysRegIndex phys_reg)
 {
 #if THE_ISA == ALPHA_ISA
    // Always ready if int or fp zero reg.
    if (phys_reg == zeroRegIdx ||
        phys_reg == (zeroRegIdx + numPhysicalIntRegs)) {
        return 1;
    }
 #else
    // Always ready if int zero reg.
    if (phys_reg == zeroRegIdx) {
        return 1;
    }
 #endif
    return regScoreBoard[phys_reg];
 }
@ -99,11 +107,18 @@ Scoreboard::setReg(PhysRegIndex phys_reg)
 void
 Scoreboard::unsetReg(PhysRegIndex ready_reg)
 {
 #if THE_ISA == ALPHA_ISA
    if (ready_reg == zeroRegIdx ||
        ready_reg == (zeroRegIdx + numPhysicalIntRegs)) {
        // Don't do anything if int or fp zero reg.
        return;
    }
 #else
    if (ready_reg == zeroRegIdx) {
        // Don't do anything if int zero reg.
        return;
    }
 #endif
    regScoreBoard[ready_reg] = 0;
 }
--- a/src/cpu/o3/sparc/cpu_impl.hh
+++ b/src/cpu/o3/sparc/cpu_impl.hh
@ -272,7 +272,10 @@ SparcO3CPU<Impl>::getSyscallArg(int i, int tid)
 {
    TheISA::IntReg idx = TheISA::flattenIntIndex(this->tcBase(tid),
            SparcISA::ArgumentReg0 + i);
-    return this->readArchIntReg(idx, tid);
+    TheISA::IntReg val = this->readArchIntReg(idx, tid);
    if (bits(this->readMiscRegNoEffect(SparcISA::MISCREG_PSTATE, tid), 3, 3))
        val = bits(val, 31, 0);
    return val;
 }
 template <class Impl>
--- a/src/cpu/o3/sparc/dyn_inst.hh
+++ b/src/cpu/o3/sparc/dyn_inst.hh
@ -56,8 +56,14 @@ class SparcDynInst : public BaseDynInst<Impl>
  public:
    /** BaseDynInst constructor given a binary instruction. */
-    SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC,
+    SparcDynInst(StaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC,
-            Addr Pred_PC, Addr Pred_NPC, InstSeqNum seq_num, O3CPU *cpu);
+            Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
            InstSeqNum seq_num, O3CPU *cpu);
    /** BaseDynInst constructor given a binary instruction. */
    SparcDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC,
            Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
            InstSeqNum seq_num, O3CPU *cpu);
    /** BaseDynInst constructor given a static inst pointer. */
    SparcDynInst(StaticInstPtr &_staticInst);
--- a/src/cpu/o3/sparc/dyn_inst_impl.hh
+++ b/src/cpu/o3/sparc/dyn_inst_impl.hh
@ -31,10 +31,23 @@
 #include "cpu/o3/sparc/dyn_inst.hh"
 template <class Impl>
-SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst,
+SparcDynInst<Impl>::SparcDynInst(StaticInstPtr staticInst,
-        Addr PC, Addr NPC, Addr Pred_PC, Addr Pred_NPC,
+        Addr PC, Addr NPC, Addr microPC,
        Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
        InstSeqNum seq_num, O3CPU *cpu)
-    : BaseDynInst<Impl>(inst, PC, NPC, Pred_PC, Pred_NPC, seq_num, cpu)
+    : BaseDynInst<Impl>(staticInst, PC, NPC, microPC,
            Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
 {
    initVars();
 }
 template <class Impl>
 SparcDynInst<Impl>::SparcDynInst(TheISA::ExtMachInst inst,
        Addr PC, Addr NPC, Addr microPC,
        Addr Pred_PC, Addr Pred_NPC, Addr Pred_MicroPC,
        InstSeqNum seq_num, O3CPU *cpu)
    : BaseDynInst<Impl>(inst, PC, NPC, microPC,
            Pred_PC, Pred_NPC, Pred_MicroPC, seq_num, cpu)
 {
    initVars();
 }
--- a/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.ini
+++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.ini
@ -0,0 +1,391 @@
 [root]
 type=Root
 children=system
 dummy=0
 [system]
 type=System
 children=cpu membus physmem
 mem_mode=atomic
 physmem=system.physmem
 [system.cpu]
 type=DerivO3CPU
 children=dcache fuPool icache l2cache toL2Bus workload
 BTBEntries=4096
 BTBTagSize=16
 LFSTSize=1024
 LQEntries=32
 RASSize=16
 SQEntries=32
 SSITSize=1024
 activity=0
 backComSize=5
 cachePorts=200
 choiceCtrBits=2
 choicePredictorSize=8192
 clock=500
 commitToDecodeDelay=1
 commitToFetchDelay=1
 commitToIEWDelay=1
 commitToRenameDelay=1
 commitWidth=8
 cpu_id=0
 decodeToFetchDelay=1
 decodeToRenameDelay=1
 decodeWidth=8
 defer_registration=false
 dispatchWidth=8
 fetchToDecodeDelay=1
 fetchTrapLatency=1
 fetchWidth=8
 forwardComSize=5
 fuPool=system.cpu.fuPool
 function_trace=false
 function_trace_start=0
 globalCtrBits=2
 globalHistoryBits=13
 globalPredictorSize=8192
 iewToCommitDelay=1
 iewToDecodeDelay=1
 iewToFetchDelay=1
 iewToRenameDelay=1
 instShiftAmt=2
 issueToExecuteDelay=1
 issueWidth=8
 localCtrBits=2
 localHistoryBits=11
 localHistoryTableSize=2048
 localPredictorSize=2048
 max_insts_all_threads=0
 max_insts_any_thread=0
 max_loads_all_threads=0
 max_loads_any_thread=0
 numIQEntries=64
 numPhysFloatRegs=256
 numPhysIntRegs=256
 numROBEntries=192
 numRobs=1
 numThreads=1
 phase=0
 predType=tournament
 progress_interval=0
 renameToDecodeDelay=1
 renameToFetchDelay=1
 renameToIEWDelay=2
 renameToROBDelay=1
 renameWidth=8
 smtCommitPolicy=RoundRobin
 smtFetchPolicy=SingleThread
 smtIQPolicy=Partitioned
 smtIQThreshold=100
 smtLSQPolicy=Partitioned
 smtLSQThreshold=100
 smtNumFetchingThreads=1
 smtROBPolicy=Partitioned
 smtROBThreshold=100
 squashWidth=8
 system=system
 trapLatency=13
 wbDepth=1
 wbWidth=8
 workload=system.cpu.workload
 dcache_port=system.cpu.dcache.cpu_side
 icache_port=system.cpu.icache.cpu_side
 [system.cpu.dcache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
 compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
 max_miss_count=0
 mshrs=10
 prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
 prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
 prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
 protocol=Null
 repl=Null
 size=262144
 split=false
 split_size=0
 store_compressed=false
 subblock_size=0
 tgts_per_mshr=20
 trace_addr=0
 two_queue=false
 write_buffers=8
 cpu_side=system.cpu.dcache_port
 mem_side=system.cpu.toL2Bus.port[1]
 [system.cpu.fuPool]
 type=FUPool
 children=FUList0 FUList1 FUList2 FUList3 FUList4 FUList5 FUList6 FUList7
 FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
 [system.cpu.fuPool.FUList0]
 type=FUDesc
 children=opList0
 count=6
 opList=system.cpu.fuPool.FUList0.opList0
 [system.cpu.fuPool.FUList0.opList0]
 type=OpDesc
 issueLat=1
 opClass=IntAlu
 opLat=1
 [system.cpu.fuPool.FUList1]
 type=FUDesc
 children=opList0 opList1
 count=2
 opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
 [system.cpu.fuPool.FUList1.opList0]
 type=OpDesc
 issueLat=1
 opClass=IntMult
 opLat=3
 [system.cpu.fuPool.FUList1.opList1]
 type=OpDesc
 issueLat=19
 opClass=IntDiv
 opLat=20
 [system.cpu.fuPool.FUList2]
 type=FUDesc
 children=opList0 opList1 opList2
 count=4
 opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
 [system.cpu.fuPool.FUList2.opList0]
 type=OpDesc
 issueLat=1
 opClass=FloatAdd
 opLat=2
 [system.cpu.fuPool.FUList2.opList1]
 type=OpDesc
 issueLat=1
 opClass=FloatCmp
 opLat=2
 [system.cpu.fuPool.FUList2.opList2]
 type=OpDesc
 issueLat=1
 opClass=FloatCvt
 opLat=2
 [system.cpu.fuPool.FUList3]
 type=FUDesc
 children=opList0 opList1 opList2
 count=2
 opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
 [system.cpu.fuPool.FUList3.opList0]
 type=OpDesc
 issueLat=1
 opClass=FloatMult
 opLat=4
 [system.cpu.fuPool.FUList3.opList1]
 type=OpDesc
 issueLat=12
 opClass=FloatDiv
 opLat=12
 [system.cpu.fuPool.FUList3.opList2]
 type=OpDesc
 issueLat=24
 opClass=FloatSqrt
 opLat=24
 [system.cpu.fuPool.FUList4]
 type=FUDesc
 children=opList0
 count=0
 opList=system.cpu.fuPool.FUList4.opList0
 [system.cpu.fuPool.FUList4.opList0]
 type=OpDesc
 issueLat=1
 opClass=MemRead
 opLat=1
 [system.cpu.fuPool.FUList5]
 type=FUDesc
 children=opList0
 count=0
 opList=system.cpu.fuPool.FUList5.opList0
 [system.cpu.fuPool.FUList5.opList0]
 type=OpDesc
 issueLat=1
 opClass=MemWrite
 opLat=1
 [system.cpu.fuPool.FUList6]
 type=FUDesc
 children=opList0 opList1
 count=4
 opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
 [system.cpu.fuPool.FUList6.opList0]
 type=OpDesc
 issueLat=1
 opClass=MemRead
 opLat=1
 [system.cpu.fuPool.FUList6.opList1]
 type=OpDesc
 issueLat=1
 opClass=MemWrite
 opLat=1
 [system.cpu.fuPool.FUList7]
 type=FUDesc
 children=opList0
 count=1
 opList=system.cpu.fuPool.FUList7.opList0
 [system.cpu.fuPool.FUList7.opList0]
 type=OpDesc
 issueLat=3
 opClass=IprAccess
 opLat=3
 [system.cpu.icache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
 compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
 max_miss_count=0
 mshrs=10
 prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
 prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
 prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
 protocol=Null
 repl=Null
 size=131072
 split=false
 split_size=0
 store_compressed=false
 subblock_size=0
 tgts_per_mshr=20
 trace_addr=0
 two_queue=false
 write_buffers=8
 cpu_side=system.cpu.icache_port
 mem_side=system.cpu.toL2Bus.port[0]
 [system.cpu.l2cache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
 compression_latency=0
 hash_delay=1
 latency=1000
 lifo=false
 max_miss_count=0
 mshrs=10
 prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
 prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
 prefetch_serial_squash=false
 prefetch_use_cpu_id=true
 prefetcher_size=100
 prioritizeRequests=false
 protocol=Null
 repl=Null
 size=2097152
 split=false
 split_size=0
 store_compressed=false
 subblock_size=0
 tgts_per_mshr=5
 trace_addr=0
 two_queue=false
 write_buffers=8
 cpu_side=system.cpu.toL2Bus.port[2]
 mem_side=system.membus.port[1]
 [system.cpu.toL2Bus]
 type=Bus
 block_size=64
 bus_id=0
 clock=1000
 responder_set=false
 width=64
 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side
 [system.cpu.workload]
 type=LiveProcess
 cmd=gzip input.log 1
 cwd=build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/o3-timing
 egid=100
 env=
 euid=100
 executable=/dist/m5/cpu2000/binaries/sparc/linux/gzip
 gid=100
 input=cin
 output=cout
 pid=100
 ppid=99
 system=system
 uid=100
 [system.membus]
 type=Bus
 block_size=64
 bus_id=0
 clock=1000
 responder_set=false
 width=64
 port=system.physmem.port[0] system.cpu.l2cache.mem_side
 [system.physmem]
 type=PhysicalMemory
 file=
 latency=1
 range=0:134217727
 zero=false
 port=system.membus.port[0]
--- a/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out
+++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out
@ -0,0 +1,366 @@
 [root]
 type=Root
 dummy=0
 [system.physmem]
 type=PhysicalMemory
 file=
 range=[0,134217727]
 latency=1
 zero=false
 [system]
 type=System
 physmem=system.physmem
 mem_mode=atomic
 [system.membus]
 type=Bus
 bus_id=0
 clock=1000
 width=64
 responder_set=false
 block_size=64
 [system.cpu.workload]
 type=LiveProcess
 cmd=gzip input.log 1
 executable=/dist/m5/cpu2000/binaries/sparc/linux/gzip
 input=cin
 output=cout
 env=
 cwd=build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/o3-timing
 system=system
 uid=100
 euid=100
 gid=100
 egid=100
 pid=100
 ppid=99
 [system.cpu.fuPool.FUList0.opList0]
 type=OpDesc
 opClass=IntAlu
 opLat=1
 issueLat=1
 [system.cpu.fuPool.FUList0]
 type=FUDesc
 opList=system.cpu.fuPool.FUList0.opList0
 count=6
 [system.cpu.fuPool.FUList1.opList0]
 type=OpDesc
 opClass=IntMult
 opLat=3
 issueLat=1
 [system.cpu.fuPool.FUList1.opList1]
 type=OpDesc
 opClass=IntDiv
 opLat=20
 issueLat=19
 [system.cpu.fuPool.FUList1]
 type=FUDesc
 opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1
 count=2
 [system.cpu.fuPool.FUList2.opList0]
 type=OpDesc
 opClass=FloatAdd
 opLat=2
 issueLat=1
 [system.cpu.fuPool.FUList2.opList1]
 type=OpDesc
 opClass=FloatCmp
 opLat=2
 issueLat=1
 [system.cpu.fuPool.FUList2.opList2]
 type=OpDesc
 opClass=FloatCvt
 opLat=2
 issueLat=1
 [system.cpu.fuPool.FUList2]
 type=FUDesc
 opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2
 count=4
 [system.cpu.fuPool.FUList3.opList0]
 type=OpDesc
 opClass=FloatMult
 opLat=4
 issueLat=1
 [system.cpu.fuPool.FUList3.opList1]
 type=OpDesc
 opClass=FloatDiv
 opLat=12
 issueLat=12
 [system.cpu.fuPool.FUList3.opList2]
 type=OpDesc
 opClass=FloatSqrt
 opLat=24
 issueLat=24
 [system.cpu.fuPool.FUList3]
 type=FUDesc
 opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2
 count=2
 [system.cpu.fuPool.FUList4.opList0]
 type=OpDesc
 opClass=MemRead
 opLat=1
 issueLat=1
 [system.cpu.fuPool.FUList4]
 type=FUDesc
 opList=system.cpu.fuPool.FUList4.opList0
 count=0
 [system.cpu.fuPool.FUList5.opList0]
 type=OpDesc
 opClass=MemWrite
 opLat=1
 issueLat=1
 [system.cpu.fuPool.FUList5]
 type=FUDesc
 opList=system.cpu.fuPool.FUList5.opList0
 count=0
 [system.cpu.fuPool.FUList6.opList0]
 type=OpDesc
 opClass=MemRead
 opLat=1
 issueLat=1
 [system.cpu.fuPool.FUList6.opList1]
 type=OpDesc
 opClass=MemWrite
 opLat=1
 issueLat=1
 [system.cpu.fuPool.FUList6]
 type=FUDesc
 opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1
 count=4
 [system.cpu.fuPool.FUList7.opList0]
 type=OpDesc
 opClass=IprAccess
 opLat=3
 issueLat=3
 [system.cpu.fuPool.FUList7]
 type=FUDesc
 opList=system.cpu.fuPool.FUList7.opList0
 count=1
 [system.cpu.fuPool]
 type=FUPool
 FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7
 [system.cpu]
 type=DerivO3CPU
 clock=500
 phase=0
 numThreads=1
 cpu_id=0
 activity=0
 workload=system.cpu.workload
 checker=null
 max_insts_any_thread=0
 max_insts_all_threads=0
 max_loads_any_thread=0
 max_loads_all_threads=0
 progress_interval=0
 cachePorts=200
 decodeToFetchDelay=1
 renameToFetchDelay=1
 iewToFetchDelay=1
 commitToFetchDelay=1
 fetchWidth=8
 renameToDecodeDelay=1
 iewToDecodeDelay=1
 commitToDecodeDelay=1
 fetchToDecodeDelay=1
 decodeWidth=8
 iewToRenameDelay=1
 commitToRenameDelay=1
 decodeToRenameDelay=1
 renameWidth=8
 commitToIEWDelay=1
 renameToIEWDelay=2
 issueToExecuteDelay=1
 dispatchWidth=8
 issueWidth=8
 wbWidth=8
 wbDepth=1
 fuPool=system.cpu.fuPool
 iewToCommitDelay=1
 renameToROBDelay=1
 commitWidth=8
 squashWidth=8
 trapLatency=13
 backComSize=5
 forwardComSize=5
 predType=tournament
 localPredictorSize=2048
 localCtrBits=2
 localHistoryTableSize=2048
 localHistoryBits=11
 globalPredictorSize=8192
 globalCtrBits=2
 globalHistoryBits=13
 choicePredictorSize=8192
 choiceCtrBits=2
 BTBEntries=4096
 BTBTagSize=16
 RASSize=16
 LQEntries=32
 SQEntries=32
 LFSTSize=1024
 SSITSize=1024
 numPhysIntRegs=256
 numPhysFloatRegs=256
 numIQEntries=64
 numROBEntries=192
 smtNumFetchingThreads=1
 smtFetchPolicy=SingleThread
 smtLSQPolicy=Partitioned
 smtLSQThreshold=100
 smtIQPolicy=Partitioned
 smtIQThreshold=100
 smtROBPolicy=Partitioned
 smtROBThreshold=100
 smtCommitPolicy=RoundRobin
 instShiftAmt=2
 defer_registration=false
 function_trace=false
 function_trace_start=0
 [system.cpu.icache]
 type=BaseCache
 size=131072
 assoc=2
 block_size=64
 latency=1000
 mshrs=10
 tgts_per_mshr=20
 write_buffers=8
 prioritizeRequests=false
 protocol=null
 trace_addr=0
 hash_delay=1
 repl=null
 compressed_bus=false
 store_compressed=false
 adaptive_compression=false
 compression_latency=0
 block_size=64
 max_miss_count=0
 addr_range=[0,18446744073709551615]
 split=false
 split_size=0
 lifo=false
 two_queue=false
 prefetch_miss=false
 prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
 prefetch_latency=10000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
 prefetch_use_cpu_id=true
 prefetch_data_accesses_only=false
 [system.cpu.dcache]
 type=BaseCache
 size=262144
 assoc=2
 block_size=64
 latency=1000
 mshrs=10
 tgts_per_mshr=20
 write_buffers=8
 prioritizeRequests=false
 protocol=null
 trace_addr=0
 hash_delay=1
 repl=null
 compressed_bus=false
 store_compressed=false
 adaptive_compression=false
 compression_latency=0
 block_size=64
 max_miss_count=0
 addr_range=[0,18446744073709551615]
 split=false
 split_size=0
 lifo=false
 two_queue=false
 prefetch_miss=false
 prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
 prefetch_latency=10000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
 prefetch_use_cpu_id=true
 prefetch_data_accesses_only=false
 [system.cpu.l2cache]
 type=BaseCache
 size=2097152
 assoc=2
 block_size=64
 latency=1000
 mshrs=10
 tgts_per_mshr=5
 write_buffers=8
 prioritizeRequests=false
 protocol=null
 trace_addr=0
 hash_delay=1
 repl=null
 compressed_bus=false
 store_compressed=false
 adaptive_compression=false
 compression_latency=0
 block_size=64
 max_miss_count=0
 addr_range=[0,18446744073709551615]
 split=false
 split_size=0
 lifo=false
 two_queue=false
 prefetch_miss=false
 prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
 prefetch_latency=10000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
 prefetch_use_cpu_id=true
 prefetch_data_accesses_only=false
 [system.cpu.toL2Bus]
 type=Bus
 bus_id=0
 clock=1000
 width=64
 responder_set=false
 block_size=64
--- a/tests/long/00.gzip/ref/sparc/linux/o3-timing/m5stats.txt
+++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/m5stats.txt
@ -0,0 +1,423 @@
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
 global.BPredUnit.BTBHits                    155497873                       # Number of BTB hits
 global.BPredUnit.BTBLookups                 176569029                       # Number of BTB lookups
 global.BPredUnit.RASInCorrect                       0                       # Number of incorrect RAS predictions.
 global.BPredUnit.condIncorrect               90327270                       # Number of conditional branches incorrect
 global.BPredUnit.condPredicted              223339092                       # Number of conditional branches predicted
 global.BPredUnit.lookups                    223339092                       # Number of BP lookups
 global.BPredUnit.usedRAS                            0                       # Number of times the RAS was used to get a target.
 host_inst_rate                                  54106                       # Simulator instruction rate (inst/s)
 host_mem_usage                                 156124                       # Number of bytes of host memory used
 host_seconds                                 27529.37                       # Real time elapsed on the host
 host_tick_rate                               45674334                       # Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads          464625781                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores         155659586                       # Number of conflicting stores.
 memdepunit.memDep.insertedLoads             751805606                       # Number of loads inserted to the mem dependence unit.
 memdepunit.memDep.insertedStores            305482201                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                  1489514762                       # Number of instructions simulated
 sim_seconds                                  1.257386                       # Number of seconds simulated
 sim_ticks                                1257385552000                       # Number of ticks simulated
 system.cpu.commit.COM:branches               86246390                       # Number of branches committed
 system.cpu.commit.COM:bw_lim_events           9313657                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
 system.cpu.commit.COM:committed_per_cycle.samples   2273477268                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
                               0   1413600532   6217.79%           
                               1    557883273   2453.88%           
                               2    123364539    542.62%           
                               3    120963543    532.06%           
                               4     18884040     83.06%           
                               5     12171132     53.54%           
                               6      9965158     43.83%           
                               7      7331394     32.25%           
                               8      9313657     40.97%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
 system.cpu.commit.COM:count                1489514762                       # Number of instructions committed
 system.cpu.commit.COM:loads                 402511689                       # Number of loads committed
 system.cpu.commit.COM:membars                   51356                       # Number of memory barriers committed
 system.cpu.commit.COM:refs                  569359657                       # Number of memory references committed
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
 system.cpu.commit.branchMispredicts          90327270                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts     1489514762                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls         2243499                       # The number of times commit has been forced to stall to communicate backwards
 system.cpu.commit.commitSquashedInsts      1399513618                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                  1489514762                       # Number of Instructions Simulated
 system.cpu.committedInsts_total            1489514762                       # Number of Instructions Simulated
 system.cpu.cpi                               1.688316                       # CPI: Cycles Per Instruction
 system.cpu.cpi_total                         1.688316                       # CPI: Total CPI of All Threads
 system.cpu.dcache.ReadReq_accesses          431095835                       # number of ReadReq accesses(hits+misses)
 system.cpu.dcache.ReadReq_avg_miss_latency  2842.252413                       # average ReadReq miss latency
 system.cpu.dcache.ReadReq_avg_mshr_miss_latency  2392.500580                       # average ReadReq mshr miss latency
 system.cpu.dcache.ReadReq_hits              430168385                       # number of ReadReq hits
 system.cpu.dcache.ReadReq_miss_latency     2636047000                       # number of ReadReq miss cycles
 system.cpu.dcache.ReadReq_miss_rate          0.002151                       # miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_misses               927450                       # number of ReadReq misses
 system.cpu.dcache.ReadReq_mshr_hits            694672                       # number of ReadReq MSHR hits
 system.cpu.dcache.ReadReq_mshr_miss_latency    556921500                       # number of ReadReq MSHR miss cycles
 system.cpu.dcache.ReadReq_mshr_miss_rate     0.000540                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses          232778                       # number of ReadReq MSHR misses
 system.cpu.dcache.SwapReq_accesses               1326                       # number of SwapReq accesses(hits+misses)
 system.cpu.dcache.SwapReq_avg_miss_latency         3500                       # average SwapReq miss latency
 system.cpu.dcache.SwapReq_avg_mshr_miss_latency         2500                       # average SwapReq mshr miss latency
 system.cpu.dcache.SwapReq_hits                   1319                       # number of SwapReq hits
 system.cpu.dcache.SwapReq_miss_latency          24500                       # number of SwapReq miss cycles
 system.cpu.dcache.SwapReq_miss_rate          0.005279                       # miss rate for SwapReq accesses
 system.cpu.dcache.SwapReq_misses                    7                       # number of SwapReq misses
 system.cpu.dcache.SwapReq_mshr_miss_latency        17500                       # number of SwapReq MSHR miss cycles
 system.cpu.dcache.SwapReq_mshr_miss_rate     0.005279                       # mshr miss rate for SwapReq accesses
 system.cpu.dcache.SwapReq_mshr_misses               7                       # number of SwapReq MSHR misses
 system.cpu.dcache.WriteReq_accesses         166846642                       # number of WriteReq accesses(hits+misses)
 system.cpu.dcache.WriteReq_avg_miss_latency  3889.592412                       # average WriteReq miss latency
 system.cpu.dcache.WriteReq_avg_mshr_miss_latency  3171.120393                       # average WriteReq mshr miss latency
 system.cpu.dcache.WriteReq_hits             165155866                       # number of WriteReq hits
 system.cpu.dcache.WriteReq_miss_latency    6576429500                       # number of WriteReq miss cycles
 system.cpu.dcache.WriteReq_miss_rate         0.010134                       # miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_misses             1690776                       # number of WriteReq misses
 system.cpu.dcache.WriteReq_mshr_hits          1420478                       # number of WriteReq MSHR hits
 system.cpu.dcache.WriteReq_mshr_miss_latency    857147500                       # number of WriteReq MSHR miss cycles
 system.cpu.dcache.WriteReq_mshr_miss_rate     0.001620                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses         270298                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_refs                1183.354576                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
 system.cpu.dcache.demand_accesses           597942477                       # number of demand (read+write) accesses
 system.cpu.dcache.demand_avg_miss_latency  3518.594842                       # average overall miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency  2810.845677                       # average overall mshr miss latency
 system.cpu.dcache.demand_hits               595324251                       # number of demand (read+write) hits
 system.cpu.dcache.demand_miss_latency      9212476500                       # number of demand (read+write) miss cycles
 system.cpu.dcache.demand_miss_rate           0.004379                       # miss rate for demand accesses
 system.cpu.dcache.demand_misses               2618226                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits            2115150                       # number of demand (read+write) MSHR hits
 system.cpu.dcache.demand_mshr_miss_latency   1414069000                       # number of demand (read+write) MSHR miss cycles
 system.cpu.dcache.demand_mshr_miss_rate      0.000841                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses           503076                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
 system.cpu.dcache.overall_accesses          597942477                       # number of overall (read+write) accesses
 system.cpu.dcache.overall_avg_miss_latency  3518.594842                       # average overall miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency  2810.845677                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.dcache.overall_hits              595324251                       # number of overall hits
 system.cpu.dcache.overall_miss_latency     9212476500                       # number of overall miss cycles
 system.cpu.dcache.overall_miss_rate          0.004379                       # miss rate for overall accesses
 system.cpu.dcache.overall_misses              2618226                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits           2115150                       # number of overall MSHR hits
 system.cpu.dcache.overall_mshr_miss_latency   1414069000                       # number of overall MSHR miss cycles
 system.cpu.dcache.overall_mshr_miss_rate     0.000841                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses          503076                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.cpu.dcache.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.cpu.dcache.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
 system.cpu.dcache.prefetcher.num_hwpf_evicted            0                       # number of hwpf removed due to no buffer left
 system.cpu.dcache.prefetcher.num_hwpf_identified            0                       # number of hwpf identified
 system.cpu.dcache.prefetcher.num_hwpf_issued            0                       # number of hwpf issued
 system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu.dcache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.dcache.replacements                 498987                       # number of replacements
 system.cpu.dcache.sampled_refs                 503083                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.dcache.tagsinuse               4095.797134                       # Cycle average of tags in use
 system.cpu.dcache.total_refs                595325570                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle               77974000                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                   335737                       # number of writebacks
 system.cpu.decode.DECODE:BlockedCycles      435745843                       # Number of cycles decode is blocked
 system.cpu.decode.DECODE:DecodedInsts      3276032607                       # Number of instructions handled by decode
 system.cpu.decode.DECODE:IdleCycles        1073744654                       # Number of cycles decode is idle
 system.cpu.decode.DECODE:RunCycles          761619600                       # Number of cycles decode is running
 system.cpu.decode.DECODE:SquashCycles       241293837                       # Number of cycles decode is squashing
 system.cpu.decode.DECODE:UnblockCycles        2367171                       # Number of cycles decode is unblocking
 system.cpu.fetch.Branches                   223339092                       # Number of branches that fetch encountered
 system.cpu.fetch.CacheLines                 355860305                       # Number of cache lines fetched
 system.cpu.fetch.Cycles                    1166695920                       # Number of cycles fetch has run and was not squashing or blocked
 system.cpu.fetch.IcacheSquashes              14770227                       # Number of outstanding Icache misses that were squashed
 system.cpu.fetch.Insts                     3591774268                       # Number of instructions fetch has processed
 system.cpu.fetch.SquashCycles                93734364                       # Number of cycles fetch has spent squashing
 system.cpu.fetch.branchRate                  0.088811                       # Number of branch fetches per cycle
 system.cpu.fetch.icacheStallCycles          355860305                       # Number of cycles fetch is stalled on an Icache miss
 system.cpu.fetch.predictedBranches          155497873                       # Number of branches that fetch has predicted taken
 system.cpu.fetch.rate                        1.428271                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
 system.cpu.fetch.rateDist.samples          2514771105                      
 system.cpu.fetch.rateDist.min_value                 0                      
                               0   1703935491   6775.71%           
                               1    252157679   1002.71%           
                               2     75632424    300.75%           
                               3     38096592    151.49%           
                               4     76680653    304.92%           
                               5     30840750    122.64%           
                               6     33076966    131.53%           
                               7     20130593     80.05%           
                               8    284219957   1130.20%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 system.cpu.icache.ReadReq_accesses          355860305                       # number of ReadReq accesses(hits+misses)
 system.cpu.icache.ReadReq_avg_miss_latency  5111.111111                       # average ReadReq miss latency
 system.cpu.icache.ReadReq_avg_mshr_miss_latency  4198.640483                       # average ReadReq mshr miss latency
 system.cpu.icache.ReadReq_hits              355858946                       # number of ReadReq hits
 system.cpu.icache.ReadReq_miss_latency        6946000                       # number of ReadReq miss cycles
 system.cpu.icache.ReadReq_miss_rate          0.000004                       # miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_misses                 1359                       # number of ReadReq misses
 system.cpu.icache.ReadReq_mshr_hits                35                       # number of ReadReq MSHR hits
 system.cpu.icache.ReadReq_mshr_miss_latency      5559000                       # number of ReadReq MSHR miss cycles
 system.cpu.icache.ReadReq_mshr_miss_rate     0.000004                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses            1324                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_refs               268775.638973                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
 system.cpu.icache.demand_accesses           355860305                       # number of demand (read+write) accesses
 system.cpu.icache.demand_avg_miss_latency  5111.111111                       # average overall miss latency
 system.cpu.icache.demand_avg_mshr_miss_latency  4198.640483                       # average overall mshr miss latency
 system.cpu.icache.demand_hits               355858946                       # number of demand (read+write) hits
 system.cpu.icache.demand_miss_latency         6946000                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_rate           0.000004                       # miss rate for demand accesses
 system.cpu.icache.demand_misses                  1359                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                 35                       # number of demand (read+write) MSHR hits
 system.cpu.icache.demand_mshr_miss_latency      5559000                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_rate      0.000004                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses             1324                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
 system.cpu.icache.overall_accesses          355860305                       # number of overall (read+write) accesses
 system.cpu.icache.overall_avg_miss_latency  5111.111111                       # average overall miss latency
 system.cpu.icache.overall_avg_mshr_miss_latency  4198.640483                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_hits              355858946                       # number of overall hits
 system.cpu.icache.overall_miss_latency        6946000                       # number of overall miss cycles
 system.cpu.icache.overall_miss_rate          0.000004                       # miss rate for overall accesses
 system.cpu.icache.overall_misses                 1359                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                35                       # number of overall MSHR hits
 system.cpu.icache.overall_mshr_miss_latency      5559000                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_rate     0.000004                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses            1324                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.icache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.cpu.icache.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.cpu.icache.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
 system.cpu.icache.prefetcher.num_hwpf_evicted            0                       # number of hwpf removed due to no buffer left
 system.cpu.icache.prefetcher.num_hwpf_identified            0                       # number of hwpf identified
 system.cpu.icache.prefetcher.num_hwpf_issued            0                       # number of hwpf issued
 system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu.icache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.icache.replacements                    198                       # number of replacements
 system.cpu.icache.sampled_refs                   1324                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.icache.tagsinuse               1026.431065                       # Cycle average of tags in use
 system.cpu.icache.total_refs                355858946                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.idleCycles                            1497                       # Total number of cycles that the CPU has spent unscheduled due to idling
 system.cpu.iew.EXEC:branches                128998684                       # Number of branches executed
 system.cpu.iew.EXEC:nop                             0                       # number of nop insts executed
 system.cpu.iew.EXEC:rate                     0.879999                       # Inst execution rate
 system.cpu.iew.EXEC:refs                    756340485                       # number of memory reference insts executed
 system.cpu.iew.EXEC:stores                  208683785                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
 system.cpu.iew.WB:consumers                1511846593                       # num instructions consuming a value
 system.cpu.iew.WB:count                    2184193190                       # cumulative count of insts written-back
 system.cpu.iew.WB:fanout                     0.964010                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
 system.cpu.iew.WB:producers                1457435157                       # num instructions producing a value
 system.cpu.iew.WB:rate                       0.868546                       # insts written-back per cycle
 system.cpu.iew.WB:sent                     2194556483                       # cumulative count of insts sent to commit
 system.cpu.iew.branchMispredicts             93921260                       # Number of branch mispredicts detected at execute
 system.cpu.iew.iewBlockCycles                  242324                       # Number of cycles IEW is blocking
 system.cpu.iew.iewDispLoadInsts             751805606                       # Number of dispatched load instructions
 system.cpu.iew.iewDispNonSpecInsts           21112863                       # Number of dispatched non-speculative instructions
 system.cpu.iew.iewDispSquashedInsts           6967923                       # Number of squashed instructions skipped by dispatch
 system.cpu.iew.iewDispStoreInsts            305482201                       # Number of dispatched store instructions
 system.cpu.iew.iewDispatchedInsts          2889028359                       # Number of instructions dispatched to IQ
 system.cpu.iew.iewExecLoadInsts             547656700                       # Number of load instructions executed
 system.cpu.iew.iewExecSquashedInsts         155922171                       # Number of squashed instructions skipped in execute
 system.cpu.iew.iewExecutedInsts            2212995141                       # Number of executed instructions
 system.cpu.iew.iewIQFullEvents                      0                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
 system.cpu.iew.iewSquashCycles              241293837                       # Number of cycles IEW is squashing
 system.cpu.iew.iewUnblockCycles                  1173                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
 system.cpu.iew.lsq.thread.0.forwLoads       116560202                       # Number of loads that had data forwarded from stores
 system.cpu.iew.lsq.thread.0.ignoredResponses       586068                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.memOrderViolation      3827981                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads           59                       # Number of loads that were rescheduled
 system.cpu.iew.lsq.thread.0.squashedLoads    349293917                       # Number of loads squashed
 system.cpu.iew.lsq.thread.0.squashedStores    138634233                       # Number of stores squashed
 system.cpu.iew.memOrderViolationEvents        3827981                       # Number of memory order violations
 system.cpu.iew.predictedNotTakenIncorrect      1127857                       # Number of branches that were predicted not taken incorrectly
 system.cpu.iew.predictedTakenIncorrect       92793403                       # Number of branches that were predicted taken incorrectly
 system.cpu.ipc                               0.592306                       # IPC: Instructions Per Cycle
 system.cpu.ipc_total                         0.592306                       # IPC: Total IPC of All Threads
 system.cpu.iq.ISSUE:FU_type_0              2368917312                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
                      No_OpClass    351375247     14.83%            # Type of FU issued
                          IntAlu   1188705257     50.18%            # Type of FU issued
                         IntMult            0      0.00%            # Type of FU issued
                          IntDiv            0      0.00%            # Type of FU issued
                        FloatAdd      2951238      0.12%            # Type of FU issued
                        FloatCmp            0      0.00%            # Type of FU issued
                        FloatCvt            0      0.00%            # Type of FU issued
                       FloatMult            0      0.00%            # Type of FU issued
                        FloatDiv            0      0.00%            # Type of FU issued
                       FloatSqrt            0      0.00%            # Type of FU issued
                         MemRead    592531661     25.01%            # Type of FU issued
                        MemWrite    233353909      9.85%            # Type of FU issued
                       IprAccess            0      0.00%            # Type of FU issued
                    InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
 system.cpu.iq.ISSUE:fu_busy_cnt               6622922                       # FU busy when requested
 system.cpu.iq.ISSUE:fu_busy_rate             0.002796                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                      No_OpClass            0      0.00%            # attempts to use FU when none available
                          IntAlu      3150287     47.57%            # attempts to use FU when none available
                         IntMult            0      0.00%            # attempts to use FU when none available
                          IntDiv            0      0.00%            # attempts to use FU when none available
                        FloatAdd       202242      3.05%            # attempts to use FU when none available
                        FloatCmp            0      0.00%            # attempts to use FU when none available
                        FloatCvt            0      0.00%            # attempts to use FU when none available
                       FloatMult            0      0.00%            # attempts to use FU when none available
                        FloatDiv            0      0.00%            # attempts to use FU when none available
                       FloatSqrt            0      0.00%            # attempts to use FU when none available
                         MemRead      2975364     44.93%            # attempts to use FU when none available
                        MemWrite       295029      4.45%            # attempts to use FU when none available
                       IprAccess            0      0.00%            # attempts to use FU when none available
                    InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
 system.cpu.iq.ISSUE:issued_per_cycle.samples   2514771105                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
                               0   1264571415   5028.57%           
                               1    618163663   2458.13%           
                               2    318214573   1265.38%           
                               3    195947630    779.19%           
                               4     78232851    311.09%           
                               5     28085074    111.68%           
                               6      8167595     32.48%           
                               7      2987163     11.88%           
                               8       401141      1.60%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
 system.cpu.iq.ISSUE:rate                     0.942001                       # Inst issue rate
 system.cpu.iq.iqInstsAdded                 2867645475                       # Number of instructions added to the IQ (excludes non-spec)
 system.cpu.iq.iqInstsIssued                2368917312                       # Number of instructions issued
 system.cpu.iq.iqNonSpecInstsAdded            21382884                       # Number of non-speculative instructions added to the IQ
 system.cpu.iq.iqSquashedInstsExamined      1368214032                       # Number of squashed instructions iterated over during squash; mainly for profiling
 system.cpu.iq.iqSquashedInstsIssued            461256                       # Number of squashed instructions issued
 system.cpu.iq.iqSquashedNonSpecRemoved       19139385                       # Number of squashed non-spec instructions that were removed
 system.cpu.iq.iqSquashedOperandsExamined   1296493196                       # Number of squashed operands that are examined and possibly removed from graph
 system.cpu.l2cache.ReadReq_accesses            504406                       # number of ReadReq accesses(hits+misses)
 system.cpu.l2cache.ReadReq_avg_miss_latency  4393.799833                       # average ReadReq miss latency
 system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2267.430007                       # average ReadReq mshr miss latency
 system.cpu.l2cache.ReadReq_hits                476939                       # number of ReadReq hits
 system.cpu.l2cache.ReadReq_miss_latency     120684500                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate         0.054454                       # miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_misses               27467                       # number of ReadReq misses
 system.cpu.l2cache.ReadReq_mshr_miss_latency     62279500                       # number of ReadReq MSHR miss cycles
 system.cpu.l2cache.ReadReq_mshr_miss_rate     0.054454                       # mshr miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_mshr_misses          27467                       # number of ReadReq MSHR misses
 system.cpu.l2cache.Writeback_accesses          335737                       # number of Writeback accesses(hits+misses)
 system.cpu.l2cache.Writeback_hits              335720                       # number of Writeback hits
 system.cpu.l2cache.Writeback_miss_rate       0.000051                       # miss rate for Writeback accesses
 system.cpu.l2cache.Writeback_misses                17                       # number of Writeback misses
 system.cpu.l2cache.Writeback_mshr_miss_rate     0.000051                       # mshr miss rate for Writeback accesses
 system.cpu.l2cache.Writeback_mshr_misses           17                       # number of Writeback MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_refs                 29.586740                       # Average number of references to valid blocks.
 system.cpu.l2cache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
 system.cpu.l2cache.demand_accesses             504406                       # number of demand (read+write) accesses
 system.cpu.l2cache.demand_avg_miss_latency  4393.799833                       # average overall miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency  2267.430007                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                 476939                       # number of demand (read+write) hits
 system.cpu.l2cache.demand_miss_latency      120684500                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate          0.054454                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                27467                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
 system.cpu.l2cache.demand_mshr_miss_latency     62279500                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate     0.054454                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_misses           27467                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
 system.cpu.l2cache.overall_accesses            840143                       # number of overall (read+write) accesses
 system.cpu.l2cache.overall_avg_miss_latency  4391.082084                       # average overall miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency  2267.430007                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                812659                       # number of overall hits
 system.cpu.l2cache.overall_miss_latency     120684500                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate         0.032713                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses               27484                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
 system.cpu.l2cache.overall_mshr_miss_latency     62279500                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate     0.032693                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_misses          27467                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
 system.cpu.l2cache.prefetcher.num_hwpf_already_in_mshr            0                       # number of hwpf that were already in mshr
 system.cpu.l2cache.prefetcher.num_hwpf_already_in_prefetcher            0                       # number of hwpf that were already in the prefetch queue
 system.cpu.l2cache.prefetcher.num_hwpf_evicted            0                       # number of hwpf removed due to no buffer left
 system.cpu.l2cache.prefetcher.num_hwpf_identified            0                       # number of hwpf identified
 system.cpu.l2cache.prefetcher.num_hwpf_issued            0                       # number of hwpf issued
 system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu.l2cache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
 system.cpu.l2cache.replacements                  2692                       # number of replacements
 system.cpu.l2cache.sampled_refs                 27467                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.l2cache.tagsinuse             24466.224839                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                  812659                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                    2555                       # number of writebacks
 system.cpu.numCycles                       2514771105                       # number of cpu cycles simulated
 system.cpu.rename.RENAME:BlockCycles         14153952                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps     1244762263                       # Number of HB maps that are committed
 system.cpu.rename.RENAME:IQFullEvents             845                       # Number of times rename has blocked due to IQ full
 system.cpu.rename.RENAME:IdleCycles        1122858502                       # Number of cycles rename is idle
 system.cpu.rename.RENAME:LSQFullEvents       18964355                       # Number of times rename has blocked due to LSQ full
 system.cpu.rename.RENAME:RenameLookups     4974059876                       # Number of register rename lookups that rename has made
 system.cpu.rename.RENAME:RenamedInsts      3105364972                       # Number of instructions processed by rename
 system.cpu.rename.RENAME:RenamedOperands   2435580679                       # Number of destination operands rename has renamed
 system.cpu.rename.RENAME:RunCycles          713636177                       # Number of cycles rename is running
 system.cpu.rename.RENAME:SquashCycles       241293837                       # Number of cycles rename is squashing
 system.cpu.rename.RENAME:UnblockCycles       24303898                       # Number of cycles rename is unblocking
 system.cpu.rename.RENAME:UndoneMaps        1190818416                       # Number of HB maps that are undone due to squashing
 system.cpu.rename.RENAME:serializeStallCycles    398524739                       # count of cycles rename stalled for serializing inst
 system.cpu.rename.RENAME:serializingInsts     21495577                       # count of serializing insts renamed
 system.cpu.rename.RENAME:skidInsts          149561373                       # count of insts added to the skid buffer
 system.cpu.rename.RENAME:tempSerializingInsts     21338548                       # count of temporary serializing insts renamed
 system.cpu.timesIdled                               3                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload.PROG:num_syscalls              19                       # Number of system calls
 ---------- End Simulation Statistics   ----------
--- a/tests/long/00.gzip/ref/sparc/linux/o3-timing/stderr
+++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/stderr
@ -0,0 +1,6 @@
 warn: More than two loadable segments in ELF object.
 warn: Ignoring segment @ 0xb4000 length 0x10.
 warn: More than two loadable segments in ELF object.
 warn: Ignoring segment @ 0x0 length 0x0.
 warn: Entering event queue @ 0.  Starting simulation...
 warn: Ignoring request to flush register windows.
--- a/tests/long/00.gzip/ref/sparc/linux/o3-timing/stdout
+++ b/tests/long/00.gzip/ref/sparc/linux/o3-timing/stdout
@ -0,0 +1,44 @@
 spec_init
 Loading Input Data
 Duplicating 262144 bytes
 Duplicating 524288 bytes
 Input data 1048576 bytes in length
 Compressing Input Data, level 1
 Compressed data 108074 bytes in length
 Uncompressing Data
 Uncompressed data 1048576 bytes in length
 Uncompressed data compared correctly
 Compressing Input Data, level 3
 Compressed data 97831 bytes in length
 Uncompressing Data
 Uncompressed data 1048576 bytes in length
 Uncompressed data compared correctly
 Compressing Input Data, level 5
 Compressed data 83382 bytes in length
 Uncompressing Data
 Uncompressed data 1048576 bytes in length
 Uncompressed data compared correctly
 Compressing Input Data, level 7
 Compressed data 76606 bytes in length
 Uncompressing Data
 Uncompressed data 1048576 bytes in length
 Uncompressed data compared correctly
 Compressing Input Data, level 9
 Compressed data 73189 bytes in length
 Uncompressing Data
 Uncompressed data 1048576 bytes in length
 Uncompressed data compared correctly
 Tested 1MB buffer: OK!
 M5 Simulator System
 Copyright (c) 2001-2006
 The Regents of The University of Michigan
 All Rights Reserved
 M5 compiled Jun 21 2007 21:15:48
 M5 started Fri Jun 22 01:01:27 2007
 M5 executing on zizzer.eecs.umich.edu
 command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/o3-timing tests/run.py long/00.gzip/sparc/linux/o3-timing
 Global frequency set at 1000000000000 ticks per second
 Exiting @ tick 1257385552000 because target called exit()
--- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.ini
@ -39,7 +39,7 @@ env=
 euid=100
 executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf
 gid=100
-input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in
+input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in
 output=cout
 pid=100
 ppid=99
@ -53,7 +53,7 @@ bus_id=0
 clock=1000
 responder_set=false
 width=64
-port=system.physmem.port system.cpu.icache_port system.cpu.dcache_port
+port=system.physmem.port[0] system.cpu.icache_port system.cpu.dcache_port
 [system.physmem]
 type=PhysicalMemory
--- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out
@ -26,7 +26,7 @@ block_size=64
 type=LiveProcess
 cmd=mcf mcf.in
 executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf
-input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in
+input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in
 output=cout
 env=
 cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic
--- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/m5stats.txt
@ -1,18 +1,18 @@
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                 686638                       # Simulator instruction rate (inst/s)
+host_inst_rate                                1151751                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 149820                       # Number of bytes of host memory used
+host_mem_usage                                 150484                       # Number of bytes of host memory used
-host_seconds                                  2504.37                       # Real time elapsed on the host
+host_seconds                                   211.71                       # Real time elapsed on the host
-host_tick_rate                              343319148                       # Simulator tick rate (ticks/s)
+host_tick_rate                              575874246                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
-sim_insts                                  1719594534                       # Number of instructions simulated
+sim_insts                                   243840172                       # Number of instructions simulated
-sim_seconds                                  0.859797                       # Number of seconds simulated
+sim_seconds                                  0.121920                       # Number of seconds simulated
-sim_ticks                                859797266500                       # Number of ticks simulated
+sim_ticks                                121920085500                       # Number of ticks simulated
 system.cpu.idle_fraction                            0                       # Percentage of idle cycles
 system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
-system.cpu.numCycles                       1719594534                       # number of cpu cycles simulated
+system.cpu.numCycles                        243840172                       # number of cpu cycles simulated
-system.cpu.num_insts                       1719594534                       # Number of instructions executed
+system.cpu.num_insts                        243840172                       # Number of instructions executed
-system.cpu.num_refs                         774793634                       # Number of memory references
+system.cpu.num_refs                         105125191                       # Number of memory references
-system.cpu.workload.PROG:num_syscalls             632                       # Number of system calls
+system.cpu.workload.PROG:num_syscalls             428                       # Number of system calls
 ---------- End Simulation Statistics   ----------
--- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/mcf.out
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/mcf.out
--- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/stdout
@ -4,19 +4,15 @@ by  Andreas Loebel
 Copyright (c) 1998,1999   ZIB Berlin
 All Rights Reserved.
-nodes                      : 1800
+nodes                      : 500
-active arcs                : 8190
+active arcs                : 1905
-simplex iterations         : 6837
+simplex iterations         : 1502
-flow value                 : 12860044181
+flow value                 : 4990014995
-new implicit arcs          : 300000
+new implicit arcs          : 23867
-active arcs                : 308190
+active arcs                : 25772
-simplex iterations         : 11843
+simplex iterations         : 2663
-flow value                 : 9360043604
+flow value                 : 3080014995
-new implicit arcs          : 22787
+checksum                   : 68389
 active arcs                : 330977
 simplex iterations         : 11931
 flow value                 : 9360043512
 checksum                   : 798014
 optimal
 M5 Simulator System
@ -25,9 +21,9 @@ The Regents of The University of Michigan
 All Rights Reserved
-M5 compiled May 15 2007 13:02:31
+M5 compiled Jun 21 2007 21:15:48
-M5 started Tue May 15 14:23:47 2007
+M5 started Fri Jun 22 01:58:18 2007
 M5 executing on zizzer.eecs.umich.edu
 command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic tests/run.py long/10.mcf/sparc/linux/simple-atomic
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 859797266500 because target called exit()
+Exiting @ tick 121920085500 because target called exit()
--- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.ini
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.ini
@ -31,6 +31,7 @@ icache_port=system.cpu.icache.cpu_side
 [system.cpu.dcache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@ -44,7 +45,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@ -69,6 +70,7 @@ mem_side=system.cpu.toL2Bus.port[1]
 [system.cpu.icache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@ -82,7 +84,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@ -107,6 +109,7 @@ mem_side=system.cpu.toL2Bus.port[0]
 [system.cpu.l2cache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@ -120,7 +123,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=100000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@ -160,7 +163,7 @@ env=
 euid=100
 executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf
 gid=100
-input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in
+input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in
 output=cout
 pid=100
 ppid=99
@ -174,7 +177,7 @@ bus_id=0
 clock=1000
 responder_set=false
 width=64
-port=system.physmem.port system.cpu.l2cache.mem_side
+port=system.physmem.port[0] system.cpu.l2cache.mem_side
 [system.physmem]
 type=PhysicalMemory
--- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out
@ -26,7 +26,7 @@ block_size=64
 type=LiveProcess
 cmd=mcf mcf.in
 executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf
-input=/dist/m5/cpu2000/data/mcf/lgred/input/mcf.in
+input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in
 output=cout
 env=
 cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-timing
@ -94,7 +94,7 @@ prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
@ -131,7 +131,7 @@ prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
@ -168,7 +168,7 @@ prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
-prefetch_latency=10
+prefetch_latency=100000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
--- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/m5stats.txt
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/m5stats.txt
@ -1,77 +1,77 @@
 ---------- Begin Simulation Statistics ----------
-host_inst_rate                                 480485                       # Simulator instruction rate (inst/s)
+host_inst_rate                                 697152                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 155316                       # Number of bytes of host memory used
+host_mem_usage                                 155896                       # Number of bytes of host memory used
-host_seconds                                  3578.87                       # Real time elapsed on the host
+host_seconds                                   349.77                       # Real time elapsed on the host
-host_tick_rate                              745845171                       # Simulator tick rate (ticks/s)
+host_tick_rate                             1027373651                       # Simulator tick rate (ticks/s)
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
-sim_insts                                  1719594534                       # Number of instructions simulated
+sim_insts                                   243840172                       # Number of instructions simulated
-sim_seconds                                  2.669285                       # Number of seconds simulated
+sim_seconds                                  0.359341                       # Number of seconds simulated
-sim_ticks                                2669284585000                       # Number of ticks simulated
+sim_ticks                                359340764000                       # Number of ticks simulated
-system.cpu.dcache.ReadReq_accesses          607807189                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_accesses           82219469                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency 12893.226605                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_miss_latency 12000.343864                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency 11893.226605                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 11000.343864                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits              594739458                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_hits               81326673                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency   168485217000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency    10713859000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.021500                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_miss_rate          0.010859                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses             13067731                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_misses               892796                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_miss_latency 155417486000                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency   9821063000                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.021500                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.010859                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_misses        13067731                       # number of ReadReq MSHR misses
+system.cpu.dcache.ReadReq_mshr_misses          892796                       # number of ReadReq MSHR misses
-system.cpu.dcache.SwapReq_accesses              15448                       # number of SwapReq accesses(hits+misses)
+system.cpu.dcache.SwapReq_accesses               3886                       # number of SwapReq accesses(hits+misses)
-system.cpu.dcache.SwapReq_avg_miss_latency 13090.909091                       # average SwapReq miss latency
+system.cpu.dcache.SwapReq_avg_miss_latency        12500                       # average SwapReq miss latency
-system.cpu.dcache.SwapReq_avg_mshr_miss_latency 12090.909091                       # average SwapReq mshr miss latency
+system.cpu.dcache.SwapReq_avg_mshr_miss_latency        11500                       # average SwapReq mshr miss latency
-system.cpu.dcache.SwapReq_hits                  15437                       # number of SwapReq hits
+system.cpu.dcache.SwapReq_hits                   3882                       # number of SwapReq hits
-system.cpu.dcache.SwapReq_miss_latency         144000                       # number of SwapReq miss cycles
+system.cpu.dcache.SwapReq_miss_latency          50000                       # number of SwapReq miss cycles
-system.cpu.dcache.SwapReq_miss_rate          0.000712                       # miss rate for SwapReq accesses
+system.cpu.dcache.SwapReq_miss_rate          0.001029                       # miss rate for SwapReq accesses
-system.cpu.dcache.SwapReq_misses                   11                       # number of SwapReq misses
+system.cpu.dcache.SwapReq_misses                    4                       # number of SwapReq misses
-system.cpu.dcache.SwapReq_mshr_miss_latency       133000                       # number of SwapReq MSHR miss cycles
+system.cpu.dcache.SwapReq_mshr_miss_latency        46000                       # number of SwapReq MSHR miss cycles
-system.cpu.dcache.SwapReq_mshr_miss_rate     0.000712                       # mshr miss rate for SwapReq accesses
+system.cpu.dcache.SwapReq_mshr_miss_rate     0.001029                       # mshr miss rate for SwapReq accesses
-system.cpu.dcache.SwapReq_mshr_misses              11                       # number of SwapReq MSHR misses
+system.cpu.dcache.SwapReq_mshr_misses               4                       # number of SwapReq MSHR misses
-system.cpu.dcache.WriteReq_accesses         166970997                       # number of WriteReq accesses(hits+misses)
+system.cpu.dcache.WriteReq_accesses          22901836                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency 12404.292450                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency 12623.899964                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency 11404.292450                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency 11623.899964                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits             165264000                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_hits              22855133                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency   21174090000                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency     589574000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.010223                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_miss_rate         0.002039                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses             1706997                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_misses               46703                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_miss_latency  19467093000                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency    542871000                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_rate     0.010223                       # mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.002039                       # mshr miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_mshr_misses        1706997                       # number of WriteReq MSHR misses
+system.cpu.dcache.WriteReq_mshr_misses          46703                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  51.440428                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                 110.894471                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses           774778186                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses           105121305                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency 12836.737637                       # average overall miss latency
+system.cpu.dcache.demand_avg_miss_latency 12031.341172                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency 11836.737637                       # average overall mshr miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 11031.341172                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits               760003458                       # number of demand (read+write) hits
+system.cpu.dcache.demand_hits               104181806                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency    189659307000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency     11303433000                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.019070                       # miss rate for demand accesses
+system.cpu.dcache.demand_miss_rate           0.008937                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses              14774728                       # number of demand (read+write) misses
+system.cpu.dcache.demand_misses                939499                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency 174884579000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency  10363934000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.019070                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_rate      0.008937                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses         14774728                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_misses           939499                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses          774778186                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses          105121305                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency 12836.737637                       # average overall miss latency
+system.cpu.dcache.overall_avg_miss_latency 12031.341172                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency 11836.737637                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 11031.341172                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits              760003458                       # number of overall hits
+system.cpu.dcache.overall_hits              104181806                       # number of overall hits
-system.cpu.dcache.overall_miss_latency   189659307000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency    11303433000                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.019070                       # miss rate for overall accesses
+system.cpu.dcache.overall_miss_rate          0.008937                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses             14774728                       # number of overall misses
+system.cpu.dcache.overall_misses               939499                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency 174884579000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency  10363934000                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.019070                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_rate     0.008937                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses        14774728                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_misses          939499                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.dcache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@ -83,57 +83,57 @@ system.cpu.dcache.prefetcher.num_hwpf_issued            0
 system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu.dcache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu.dcache.replacements               14770643                       # number of replacements
+system.cpu.dcache.replacements                 935407                       # number of replacements
-system.cpu.dcache.sampled_refs               14774739                       # Sample count of references to valid blocks.
+system.cpu.dcache.sampled_refs                 939503                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse               4094.628585                       # Cycle average of tags in use
+system.cpu.dcache.tagsinuse               3560.887601                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                760018895                       # Total number of references to valid blocks.
+system.cpu.dcache.total_refs                104185688                       # Total number of references to valid blocks.
-system.cpu.dcache.warmup_cycle             3913237000                       # Cycle when the warmup percentage was hit.
+system.cpu.dcache.warmup_cycle           134116230000                       # Cycle when the warmup percentage was hit.
-system.cpu.dcache.writebacks                  4191356                       # number of writebacks
+system.cpu.dcache.writebacks                    94807                       # number of writebacks
-system.cpu.icache.ReadReq_accesses         1719594535                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_accesses          243840173                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 13991.120977                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_miss_latency 13993.174061                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency 12991.120977                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 12993.174061                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits             1719593634                       # number of ReadReq hits
+system.cpu.icache.ReadReq_hits              243839294                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency       12606000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency       12300000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.000001                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_miss_rate          0.000004                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  901                       # number of ReadReq misses
+system.cpu.icache.ReadReq_misses                  879                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_miss_latency     11705000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency     11421000                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.000001                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_miss_rate     0.000004                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses             901                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_mshr_misses             879                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs               1908538.994451                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs               277405.340159                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses          1719594535                       # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses           243840173                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 13991.120977                       # average overall miss latency
+system.cpu.icache.demand_avg_miss_latency 13993.174061                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency 12991.120977                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 12993.174061                       # average overall mshr miss latency
-system.cpu.icache.demand_hits              1719593634                       # number of demand (read+write) hits
+system.cpu.icache.demand_hits               243839294                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency        12606000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency        12300000                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.000001                       # miss rate for demand accesses
+system.cpu.icache.demand_miss_rate           0.000004                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   901                       # number of demand (read+write) misses
+system.cpu.icache.demand_misses                   879                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency     11705000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency     11421000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.000001                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_rate      0.000004                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses              901                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_misses              879                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses         1719594535                       # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses          243840173                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 13991.120977                       # average overall miss latency
+system.cpu.icache.overall_avg_miss_latency 13993.174061                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency 12991.120977                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 12993.174061                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits             1719593634                       # number of overall hits
+system.cpu.icache.overall_hits              243839294                       # number of overall hits
-system.cpu.icache.overall_miss_latency       12606000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency       12300000                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.000001                       # miss rate for overall accesses
+system.cpu.icache.overall_miss_rate          0.000004                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  901                       # number of overall misses
+system.cpu.icache.overall_misses                  879                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency     11705000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency     11421000                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.000001                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_rate     0.000004                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses             901                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_misses             879                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.icache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@ -145,64 +145,60 @@ system.cpu.icache.prefetcher.num_hwpf_issued            0
 system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu.icache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu.icache.replacements                     31                       # number of replacements
+system.cpu.icache.replacements                     25                       # number of replacements
-system.cpu.icache.sampled_refs                    901                       # Sample count of references to valid blocks.
+system.cpu.icache.sampled_refs                    879                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                737.715884                       # Cycle average of tags in use
+system.cpu.icache.tagsinuse                716.200092                       # Cycle average of tags in use
-system.cpu.icache.total_refs               1719593634                       # Total number of references to valid blocks.
+system.cpu.icache.total_refs                243839294                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.idle_fraction                            0                       # Percentage of idle cycles
-system.cpu.l2cache.ReadReq_accesses          14775639                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_accesses            940381                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency 12999.785859                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_miss_latency        13000                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 10999.785859                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency        11000                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_hits               8592784                       # number of ReadReq hits
+system.cpu.l2cache.ReadReq_hits                924777                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_miss_latency   80375791000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency     202852000                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_rate         0.418449                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_miss_rate         0.016593                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses             6182855                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_misses               15604                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency  68010081000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency    171644000                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_rate     0.418449                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_miss_rate     0.016593                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses        6182855                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_mshr_misses          15604                       # number of ReadReq MSHR misses
-system.cpu.l2cache.Writeback_accesses         4191356                       # number of Writeback accesses(hits+misses)
+system.cpu.l2cache.Writeback_accesses           94807                       # number of Writeback accesses(hits+misses)
-system.cpu.l2cache.Writeback_hits             4164131                       # number of Writeback hits
+system.cpu.l2cache.Writeback_hits               94807                       # number of Writeback hits
 system.cpu.l2cache.Writeback_miss_rate       0.006496                       # miss rate for Writeback accesses
 system.cpu.l2cache.Writeback_misses             27225                       # number of Writeback misses
 system.cpu.l2cache.Writeback_mshr_miss_rate     0.006496                       # mshr miss rate for Writeback accesses
 system.cpu.l2cache.Writeback_mshr_misses        27225                       # number of Writeback MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.l2cache.avg_refs                  2.063273                       # Average number of references to valid blocks.
+system.cpu.l2cache.avg_refs                 65.341195                       # Average number of references to valid blocks.
 system.cpu.l2cache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses           14775639                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_accesses             940381                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency 12999.785859                       # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency        13000                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency 10999.785859                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency        11000                       # average overall mshr miss latency
-system.cpu.l2cache.demand_hits                8592784                       # number of demand (read+write) hits
+system.cpu.l2cache.demand_hits                 924777                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency    80375791000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency      202852000                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_rate          0.418449                       # miss rate for demand accesses
+system.cpu.l2cache.demand_miss_rate          0.016593                       # miss rate for demand accesses
-system.cpu.l2cache.demand_misses              6182855                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_misses                15604                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency  68010081000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency    171644000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_rate     0.418449                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_miss_rate     0.016593                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_misses         6182855                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_misses           15604                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses          18966995                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_accesses           1035188                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency 12942.794779                       # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency        13000                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency 10999.785859                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency        11000                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.l2cache.overall_hits              12756915                       # number of overall hits
+system.cpu.l2cache.overall_hits               1019584                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency   80375791000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency     202852000                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_rate         0.327415                       # miss rate for overall accesses
+system.cpu.l2cache.overall_miss_rate         0.015074                       # miss rate for overall accesses
-system.cpu.l2cache.overall_misses             6210080                       # number of overall misses
+system.cpu.l2cache.overall_misses               15604                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency  68010081000                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency    171644000                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_rate     0.325980                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_miss_rate     0.015074                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_misses        6182855                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_misses          15604                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
 system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache            0                       # number of hwpf that were already in the cache
@ -214,17 +210,17 @@ system.cpu.l2cache.prefetcher.num_hwpf_issued            0
 system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit            0                       # number of hwpf removed because MSHR allocated
 system.cpu.l2cache.prefetcher.num_hwpf_span_page            0                       # number of hwpf spanning a virtual page
 system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0                       # number of hwpf that got squashed due to a miss aborting calculation time
-system.cpu.l2cache.replacements               6150087                       # number of replacements
+system.cpu.l2cache.replacements                     0                       # number of replacements
-system.cpu.l2cache.sampled_refs               6182855                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                 15604                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse             26129.060966                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse             10833.027960                       # Cycle average of tags in use
-system.cpu.l2cache.total_refs                12756915                       # Total number of references to valid blocks.
+system.cpu.l2cache.total_refs                 1019584                       # Total number of references to valid blocks.
-system.cpu.l2cache.warmup_cycle          806915893000                       # Cycle when the warmup percentage was hit.
+system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
-system.cpu.l2cache.writebacks                 1069081                       # number of writebacks
+system.cpu.l2cache.writebacks                       0                       # number of writebacks
 system.cpu.not_idle_fraction                        1                       # Percentage of non-idle cycles
-system.cpu.numCycles                     2669284585000                       # number of cpu cycles simulated
+system.cpu.numCycles                     359340764000                       # number of cpu cycles simulated
-system.cpu.num_insts                       1719594534                       # Number of instructions executed
+system.cpu.num_insts                        243840172                       # Number of instructions executed
-system.cpu.num_refs                         774793634                       # Number of memory references
+system.cpu.num_refs                         105125191                       # Number of memory references
-system.cpu.workload.PROG:num_syscalls             632                       # Number of system calls
+system.cpu.workload.PROG:num_syscalls             428                       # Number of system calls
 ---------- End Simulation Statistics   ----------
--- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/mcf.out
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/mcf.out
--- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/stdout
+++ b/tests/long/10.mcf/ref/sparc/linux/simple-timing/stdout
@ -4,19 +4,15 @@ by  Andreas Loebel
 Copyright (c) 1998,1999   ZIB Berlin
 All Rights Reserved.
-nodes                      : 1800
+nodes                      : 500
-active arcs                : 8190
+active arcs                : 1905
-simplex iterations         : 6837
+simplex iterations         : 1502
-flow value                 : 12860044181
+flow value                 : 4990014995
-new implicit arcs          : 300000
+new implicit arcs          : 23867
-active arcs                : 308190
+active arcs                : 25772
-simplex iterations         : 11843
+simplex iterations         : 2663
-flow value                 : 9360043604
+flow value                 : 3080014995
-new implicit arcs          : 22787
+checksum                   : 68389
 active arcs                : 330977
 simplex iterations         : 11931
 flow value                 : 9360043512
 checksum                   : 798014
 optimal
 M5 Simulator System
@ -25,9 +21,9 @@ The Regents of The University of Michigan
 All Rights Reserved
-M5 compiled May 15 2007 13:02:31
+M5 compiled Jun 21 2007 21:15:48
-M5 started Tue May 15 15:05:32 2007
+M5 started Fri Jun 22 02:01:52 2007
 M5 executing on zizzer.eecs.umich.edu
 command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-timing tests/run.py long/10.mcf/sparc/linux/simple-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 2669284585000 because target called exit()
+Exiting @ tick 359340764000 because target called exit()
--- a/tests/long/10.mcf/test.py
+++ b/tests/long/10.mcf/test.py
@ -29,5 +29,5 @@
 m5.AddToPath('../configs/common')
 from cpu2000 import mcf
-workload = mcf(isa, opsys, 'lgred')
+workload = mcf(isa, opsys, 'smred')
 root.system.cpu.workload = workload.makeLiveProcess()
--- a/tests/long/20.parser/test.py
+++ b/tests/long/20.parser/test.py
@ -29,5 +29,5 @@
 m5.AddToPath('../configs/common')
 from cpu2000 import parser
-workload = parser(isa, opsys, 'lgred')
+workload = parser(isa, opsys, 'mdred')
 root.system.cpu.workload = workload.makeLiveProcess()
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini
@ -21,6 +21,7 @@ SQEntries=32
 SSITSize=1024
 activity=0
 backComSize=5
 cachePorts=200
 choiceCtrBits=2
 choicePredictorSize=8192
 clock=500
@ -74,6 +75,15 @@ renameToFetchDelay=1
 renameToIEWDelay=2
 renameToROBDelay=1
 renameWidth=8
 smtCommitPolicy=RoundRobin
 smtFetchPolicy=SingleThread
 smtIQPolicy=Partitioned
 smtIQThreshold=100
 smtLSQPolicy=Partitioned
 smtLSQThreshold=100
 smtNumFetchingThreads=1
 smtROBPolicy=Partitioned
 smtROBThreshold=100
 squashWidth=8
 system=system
 trapLatency=13
@ -86,6 +96,7 @@ icache_port=system.cpu.icache.cpu_side
 [system.cpu.dcache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@ -261,6 +272,7 @@ opLat=3
 [system.cpu.icache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@ -299,6 +311,7 @@ mem_side=system.cpu.toL2Bus.port[0]
 [system.cpu.l2cache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt
@ -1,39 +1,40 @@
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                          524                       # Number of BTB hits
+global.BPredUnit.BTBHits                          522                       # Number of BTB hits
-global.BPredUnit.BTBLookups                      1590                       # Number of BTB lookups
+global.BPredUnit.BTBLookups                      1584                       # Number of BTB lookups
 global.BPredUnit.RASInCorrect                      57                       # Number of incorrect RAS predictions.
 global.BPredUnit.condIncorrect                    422                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                   1093                       # Number of conditional branches predicted
+global.BPredUnit.condPredicted                   1088                       # Number of conditional branches predicted
-global.BPredUnit.lookups                         1843                       # Number of BP lookups
+global.BPredUnit.lookups                         1837                       # Number of BP lookups
 global.BPredUnit.usedRAS                          241                       # Number of times the RAS was used to get a target.
-host_inst_rate                                   7145                       # Simulator instruction rate (inst/s)
+host_inst_rate                                  39303                       # Simulator instruction rate (inst/s)
-host_seconds                                     0.79                       # Real time elapsed on the host
+host_mem_usage                                 153768                       # Number of bytes of host memory used
-host_tick_rate                                5828052                       # Simulator tick rate (ticks/s)
+host_seconds                                     0.14                       # Real time elapsed on the host
 host_tick_rate                               32016268                       # Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads                 17                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores               127                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                  1876                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads                  1874                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 1144                       # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 1142                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        5623                       # Number of instructions simulated
 sim_seconds                                  0.000005                       # Number of seconds simulated
-sim_ticks                                     4588000                       # Number of ticks simulated
+sim_ticks                                     4589500                       # Number of ticks simulated
 system.cpu.commit.COM:branches                    862                       # Number of branches committed
 system.cpu.commit.COM:bw_lim_events               104                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples         8514                      
+system.cpu.commit.COM:committed_per_cycle.samples         8521                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0         6195   7276.25%           
+                               0         6200   7276.14%           
-                               1         1158   1360.11%           
+                               1         1160   1361.34%           
-                               2          469    550.86%           
+                               2          469    550.40%           
-                               3          176    206.72%           
+                               3          177    207.72%           
-                               4          131    153.86%           
+                               4          131    153.74%           
-                               5           99    116.28%           
+                               5           98    115.01%           
-                               6          109    128.02%           
+                               6          109    127.92%           
-                               7           73     85.74%           
+                               7           73     85.67%           
-                               8          104    122.15%           
+                               8          104    122.05%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
@ -45,27 +46,27 @@ system.cpu.commit.COM:swp_count                     0                       # Nu
 system.cpu.commit.branchMispredicts               350                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts           5640                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls              17                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts            3588                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts            3571                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                        5623                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                  5623                       # Number of Instructions Simulated
-system.cpu.cpi                               1.635604                       # CPI: Cycles Per Instruction
+system.cpu.cpi                               1.636315                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                         1.635604                       # CPI: Total CPI of All Threads
+system.cpu.cpi_total                         1.636315                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               1475                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_accesses               1470                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  5928.571429                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_miss_latency  5932.330827                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency         5385                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency         5380                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   1342                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_hits                   1337                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency         788500                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency         789000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.090169                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_miss_rate          0.090476                       # miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_misses                  133                       # number of ReadReq misses
 system.cpu.dcache.ReadReq_mshr_hits                33                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency       538500                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency       538000                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.067797                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.068027                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses             100                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses               812                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency  4501.457726                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency  4504.373178                       # average WriteReq miss latency
 system.cpu.dcache.WriteReq_avg_mshr_miss_latency  5116.438356                       # average WriteReq mshr miss latency
 system.cpu.dcache.WriteReq_hits                   469                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       1544000                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency       1545000                       # number of WriteReq miss cycles
 system.cpu.dcache.WriteReq_miss_rate         0.422414                       # miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_misses                 343                       # number of WriteReq misses
 system.cpu.dcache.WriteReq_mshr_hits              270                       # number of WriteReq MSHR hits
@ -74,37 +75,37 @@ system.cpu.dcache.WriteReq_mshr_miss_rate     0.089901                       # m
 system.cpu.dcache.WriteReq_mshr_misses             73                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  10.468208                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                  10.439306                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                2287                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses                2282                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  4900.210084                       # average overall miss latency
+system.cpu.dcache.demand_avg_miss_latency  4903.361345                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  5271.676301                       # average overall mshr miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  5268.786127                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    1811                       # number of demand (read+write) hits
+system.cpu.dcache.demand_hits                    1806                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         2332500                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency         2334000                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.208133                       # miss rate for demand accesses
+system.cpu.dcache.demand_miss_rate           0.208589                       # miss rate for demand accesses
 system.cpu.dcache.demand_misses                   476                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                303                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency       912000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency       911500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.075645                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_rate      0.075811                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses              173                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               2287                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses               2282                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  4900.210084                       # average overall miss latency
+system.cpu.dcache.overall_avg_miss_latency  4903.361345                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  5271.676301                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  5268.786127                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   1811                       # number of overall hits
+system.cpu.dcache.overall_hits                   1806                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        2332500                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency        2334000                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.208133                       # miss rate for overall accesses
+system.cpu.dcache.overall_miss_rate          0.208589                       # miss rate for overall accesses
 system.cpu.dcache.overall_misses                  476                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits               303                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency       912000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency       911500                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.075645                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_rate     0.075811                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses             173                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@ -120,88 +121,88 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                    173                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                112.670676                       # Cycle average of tags in use
+system.cpu.dcache.tagsinuse                112.669258                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     1811                       # Total number of references to valid blocks.
+system.cpu.dcache.total_refs                     1806                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
 system.cpu.decode.DECODE:BlockedCycles            389                       # Number of cycles decode is blocked
 system.cpu.decode.DECODE:BranchMispred             75                       # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved           144                       # Number of times decode resolved a branch
+system.cpu.decode.DECODE:BranchResolved           143                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts           10499                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:DecodedInsts           10466                       # Number of instructions handled by decode
 system.cpu.decode.DECODE:IdleCycles              6230                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles               1848                       # Number of cycles decode is running
+system.cpu.decode.DECODE:RunCycles               1855                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles             682                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:SquashCycles             679                       # Number of cycles decode is squashing
 system.cpu.decode.DECODE:SquashedInsts            228                       # Number of squashed instructions handled by decode
 system.cpu.decode.DECODE:UnblockCycles             48                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                        1843                       # Number of branches that fetch encountered
+system.cpu.fetch.Branches                        1837                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                      1471                       # Number of cache lines fetched
+system.cpu.fetch.CacheLines                      1469                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                          3451                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.Cycles                          3456                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   269                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.IcacheSquashes                   267                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          11450                       # Number of instructions fetch has processed
+system.cpu.fetch.Insts                          11417                       # Number of instructions fetch has processed
 system.cpu.fetch.SquashCycles                     455                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.200391                       # Number of branch fetches per cycle
+system.cpu.fetch.branchRate                  0.199652                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles               1471                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.icacheStallCycles               1469                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches                765                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.predictedBranches                763                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        1.244971                       # Number of inst fetches per cycle
+system.cpu.fetch.rate                        1.240843                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples                9197                      
+system.cpu.fetch.rateDist.samples                9201                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0         7219   7849.30%           
+                               0         7216   7842.63%           
-                               1          167    181.58%           
+                               1          168    182.59%           
-                               2          147    159.83%           
+                               2          148    160.85%           
-                               3          129    140.26%           
+                               3          136    147.81%           
-                               4          200    217.46%           
+                               4          214    232.58%           
-                               5          139    151.14%           
+                               5          138    149.98%           
-                               6          181    196.80%           
+                               6          177    192.37%           
-                               7           99    107.64%           
+                               7           95    103.25%           
-                               8          916    995.98%           
+                               8          909    987.94%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
-system.cpu.icache.ReadReq_accesses               1471                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_accesses               1469                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  5375.757576                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_miss_latency  5381.818182                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  4524.038462                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4530.448718                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   1141                       # number of ReadReq hits
+system.cpu.icache.ReadReq_hits                   1139                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        1774000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency        1776000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.224337                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_miss_rate          0.224643                       # miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_misses                  330                       # number of ReadReq misses
 system.cpu.icache.ReadReq_mshr_hits                18                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency      1411500                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency      1413500                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.212101                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_miss_rate     0.212389                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             312                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                   3.657051                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                   3.650641                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                1471                       # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses                1469                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  5375.757576                       # average overall miss latency
+system.cpu.icache.demand_avg_miss_latency  5381.818182                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  4524.038462                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4530.448718                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    1141                       # number of demand (read+write) hits
+system.cpu.icache.demand_hits                    1139                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         1774000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency         1776000                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.224337                       # miss rate for demand accesses
+system.cpu.icache.demand_miss_rate           0.224643                       # miss rate for demand accesses
 system.cpu.icache.demand_misses                   330                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                 18                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      1411500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency      1413500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.212101                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_rate      0.212389                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              312                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               1471                       # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses               1469                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  5375.757576                       # average overall miss latency
+system.cpu.icache.overall_avg_miss_latency  5381.818182                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  4524.038462                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4530.448718                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   1141                       # number of overall hits
+system.cpu.icache.overall_hits                   1139                       # number of overall hits
-system.cpu.icache.overall_miss_latency        1774000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency        1776000                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.224337                       # miss rate for overall accesses
+system.cpu.icache.overall_miss_rate          0.224643                       # miss rate for overall accesses
 system.cpu.icache.overall_misses                  330                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                18                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      1411500                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency      1413500                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.212101                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_rate     0.212389                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             312                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@ -217,39 +218,39 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      0                       # number of replacements
 system.cpu.icache.sampled_refs                    312                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                165.938349                       # Cycle average of tags in use
+system.cpu.icache.tagsinuse                165.921810                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     1141                       # Total number of references to valid blocks.
+system.cpu.icache.total_refs                     1139                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.idleCycles                            2475                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.idleCycles                            2474                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     1148                       # Number of branches executed
+system.cpu.iew.EXEC:branches                     1144                       # Number of branches executed
 system.cpu.iew.EXEC:nop                            40                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.837338                       # Inst execution rate
+system.cpu.iew.EXEC:rate                     0.835018                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         2524                       # number of memory reference insts executed
+system.cpu.iew.EXEC:refs                         2519                       # number of memory reference insts executed
 system.cpu.iew.EXEC:stores                        977                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                      5205                       # num instructions consuming a value
+system.cpu.iew.WB:consumers                      5193                       # num instructions consuming a value
-system.cpu.iew.WB:count                          7402                       # cumulative count of insts written-back
+system.cpu.iew.WB:count                          7387                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.742747                       # average fanout of values written-back
+system.cpu.iew.WB:fanout                     0.742923                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      3866                       # num instructions producing a value
+system.cpu.iew.WB:producers                      3858                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.804828                       # insts written-back per cycle
+system.cpu.iew.WB:rate                       0.802848                       # insts written-back per cycle
-system.cpu.iew.WB:sent                           7467                       # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent                           7452                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                  374                       # Number of branch mispredicts detected at execute
+system.cpu.iew.branchMispredicts                  373                       # Number of branch mispredicts detected at execute
 system.cpu.iew.iewBlockCycles                       4                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                  1876                       # Number of dispatched load instructions
+system.cpu.iew.iewDispLoadInsts                  1874                       # Number of dispatched load instructions
 system.cpu.iew.iewDispNonSpecInsts                 22                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts               315                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispSquashedInsts               302                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                 1144                       # Number of dispatched store instructions
+system.cpu.iew.iewDispStoreInsts                 1142                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts                9245                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewDispatchedInsts                9228                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  1547                       # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts                  1542                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts               280                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecSquashedInsts               285                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                  7701                       # Number of executed instructions
+system.cpu.iew.iewExecutedInsts                  7683                       # Number of executed instructions
 system.cpu.iew.iewIQFullEvents                      0                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                    682                       # Number of cycles IEW is squashing
+system.cpu.iew.iewSquashCycles                    679                       # Number of cycles IEW is squashing
 system.cpu.iew.iewUnblockCycles                     0                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
@ -259,17 +260,17 @@ system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Nu
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.memOrderViolation           63                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads          897                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedLoads          895                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores          332                       # Number of stores squashed
+system.cpu.iew.lsq.thread.0.squashedStores          330                       # Number of stores squashed
 system.cpu.iew.memOrderViolationEvents             63                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          263                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedNotTakenIncorrect          262                       # Number of branches that were predicted not taken incorrectly
 system.cpu.iew.predictedTakenIncorrect            111                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc                               0.611395                       # IPC: Instructions Per Cycle
+system.cpu.ipc                               0.611129                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         0.611395                       # IPC: Total IPC of All Threads
+system.cpu.ipc_total                         0.611129                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                    7981                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0                    7968                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
                      No_OpClass            2      0.03%            # Type of FU issued
-                          IntAlu         5322     66.68%            # Type of FU issued
+                          IntAlu         5314     66.69%            # Type of FU issued
                         IntMult            1      0.01%            # Type of FU issued
                          IntDiv            0      0.00%            # Type of FU issued
                        FloatAdd            2      0.03%            # Type of FU issued
@ -278,13 +279,13 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist
                       FloatMult            0      0.00%            # Type of FU issued
                        FloatDiv            0      0.00%            # Type of FU issued
                       FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         1662     20.82%            # Type of FU issued
+                         MemRead         1659     20.82%            # Type of FU issued
-                        MemWrite          992     12.43%            # Type of FU issued
+                        MemWrite          990     12.42%            # Type of FU issued
                       IprAccess            0      0.00%            # Type of FU issued
                    InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt                   106                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt                   105                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.013282                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate             0.013178                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                      No_OpClass            0      0.00%            # attempts to use FU when none available
                          IntAlu            0      0.00%            # attempts to use FU when none available
@ -296,41 +297,41 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                       FloatMult            0      0.00%            # attempts to use FU when none available
                        FloatDiv            0      0.00%            # attempts to use FU when none available
                       FloatSqrt            0      0.00%            # attempts to use FU when none available
-                         MemRead           71     66.98%            # attempts to use FU when none available
+                         MemRead           70     66.67%            # attempts to use FU when none available
-                        MemWrite           35     33.02%            # attempts to use FU when none available
+                        MemWrite           35     33.33%            # attempts to use FU when none available
                       IprAccess            0      0.00%            # attempts to use FU when none available
                    InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples         9197                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples         9201                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0         5952   6471.68%           
+                               0         5952   6468.86%           
-                               1         1107   1203.65%           
+                               1         1111   1207.48%           
-                               2          919    999.24%           
+                               2          928   1008.59%           
-                               3          442    480.59%           
+                               3          433    470.60%           
-                               4          375    407.74%           
+                               4          378    410.82%           
-                               5          250    271.83%           
+                               5          251    272.80%           
-                               6          115    125.04%           
+                               6          111    120.64%           
-                               7           26     28.27%           
+                               7           27     29.34%           
-                               8           11     11.96%           
+                               8           10     10.87%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
-system.cpu.iq.ISSUE:rate                     0.867783                       # Inst issue rate
+system.cpu.iq.ISSUE:rate                     0.865993                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                       9183                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsAdded                       9166                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                      7981                       # Number of instructions issued
+system.cpu.iq.iqInstsIssued                      7968                       # Number of instructions issued
 system.cpu.iq.iqNonSpecInstsAdded                  22                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined            3171                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsExamined            3154                       # Number of squashed instructions iterated over during squash; mainly for profiling
 system.cpu.iq.iqSquashedInstsIssued                22                       # Number of squashed instructions issued
 system.cpu.iq.iqSquashedNonSpecRemoved              5                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined         2045                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.iq.iqSquashedOperandsExamined         2035                       # Number of squashed operands that are examined and possibly removed from graph
 system.cpu.l2cache.ReadReq_accesses               483                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  4639.751553                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_miss_latency  4644.927536                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2463.768116                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2467.908903                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       2241000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency       2243500                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_misses                 483                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      1190000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency      1192000                       # number of ReadReq MSHR miss cycles
 system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_mshr_misses            483                       # number of ReadReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
@ -342,29 +343,29 @@ system.cpu.l2cache.blocked_cycles_no_mshrs            0                       #
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
 system.cpu.l2cache.demand_accesses                483                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  4639.751553                       # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency  4644.927536                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency  2463.768116                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  2467.908903                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        2241000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        2243500                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  483                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency      1190000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency      1192000                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_misses             483                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
 system.cpu.l2cache.overall_accesses               483                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  4639.751553                       # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency  4644.927536                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency  2463.768116                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  2467.908903                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       2241000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       2243500                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 483                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency      1190000                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency      1192000                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_misses            483                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@ -381,27 +382,27 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.sampled_refs                   483                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               278.222582                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               278.204751                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                             9197                       # number of cpu cycles simulated
+system.cpu.numCycles                             9201                       # number of cpu cycles simulated
 system.cpu.rename.RENAME:BlockCycles               15                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           4051                       # Number of HB maps that are committed
-system.cpu.rename.RENAME:IdleCycles              6383                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:IdleCycles              6382                       # Number of cycles rename is idle
 system.cpu.rename.RENAME:LSQFullEvents             70                       # Number of times rename has blocked due to LSQ full
-system.cpu.rename.RENAME:RenameLookups          12854                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenameLookups          12837                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           10031                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedInsts           10018                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands         7485                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RenamedOperands         7477                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles               1746                       # Number of cycles rename is running
+system.cpu.rename.RENAME:RunCycles               1754                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles             682                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:SquashCycles             679                       # Number of cycles rename is squashing
 system.cpu.rename.RENAME:UnblockCycles            101                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps              3434                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:UndoneMaps              3426                       # Number of HB maps that are undone due to squashing
 system.cpu.rename.RENAME:serializeStallCycles          270                       # count of cycles rename stalled for serializing inst
 system.cpu.rename.RENAME:serializingInsts           26                       # count of serializing insts renamed
 system.cpu.rename.RENAME:skidInsts                380                       # count of insts added to the skid buffer
 system.cpu.rename.RENAME:tempSerializingInsts           20                       # count of temporary serializing insts renamed
-system.cpu.timesIdled                              25                       # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.timesIdled                              26                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload.PROG:num_syscalls              17                       # Number of system calls
 ---------- End Simulation Statistics   ----------
--- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout
@ -6,9 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
-M5 compiled Jun 10 2007 14:06:20
+M5 compiled Jun 21 2007 21:25:27
-M5 started Sun Jun 10 14:22:32 2007
+M5 started Fri Jun 22 00:04:38 2007
-M5 executing on iceaxe
+M5 executing on zizzer.eecs.umich.edu
-command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 4588000 because target called exit()
+Exiting @ tick 4589500 because target called exit()
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini
@ -21,6 +21,7 @@ SQEntries=32
 SSITSize=1024
 activity=0
 backComSize=5
 cachePorts=200
 choiceCtrBits=2
 choicePredictorSize=8192
 clock=500
@ -74,6 +75,15 @@ renameToFetchDelay=1
 renameToIEWDelay=2
 renameToROBDelay=1
 renameWidth=8
 smtCommitPolicy=RoundRobin
 smtFetchPolicy=SingleThread
 smtIQPolicy=Partitioned
 smtIQThreshold=100
 smtLSQPolicy=Partitioned
 smtLSQThreshold=100
 smtNumFetchingThreads=1
 smtROBPolicy=Partitioned
 smtROBThreshold=100
 squashWidth=8
 system=system
 trapLatency=13
@ -86,6 +96,7 @@ icache_port=system.cpu.icache.cpu_side
 [system.cpu.dcache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@ -261,6 +272,7 @@ opLat=3
 [system.cpu.icache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@ -299,6 +311,7 @@ mem_side=system.cpu.toL2Bus.port[0]
 [system.cpu.l2cache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt
@ -8,9 +8,10 @@ global.BPredUnit.condIncorrect                    208                       # Nu
 global.BPredUnit.condPredicted                    376                       # Number of conditional branches predicted
 global.BPredUnit.lookups                          738                       # Number of BP lookups
 global.BPredUnit.usedRAS                          140                       # Number of times the RAS was used to get a target.
-host_inst_rate                                   8881                       # Simulator instruction rate (inst/s)
+host_inst_rate                                  39805                       # Simulator instruction rate (inst/s)
-host_seconds                                     0.27                       # Real time elapsed on the host
+host_mem_usage                                 153128                       # Number of bytes of host memory used
-host_tick_rate                                7632084                       # Simulator tick rate (ticks/s)
+host_seconds                                     0.06                       # Real time elapsed on the host
 host_tick_rate                               34110715                       # Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads                  8                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores                 7                       # Number of conflicting stores.
 memdepunit.memDep.insertedLoads                   608                       # Number of loads inserted to the mem dependence unit.
@ -18,22 +19,22 @@ memdepunit.memDep.insertedStores                  357                       # Nu
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                        2387                       # Number of instructions simulated
 sim_seconds                                  0.000002                       # Number of seconds simulated
-sim_ticks                                     2053000                       # Number of ticks simulated
+sim_ticks                                     2055000                       # Number of ticks simulated
 system.cpu.commit.COM:branches                    396                       # Number of branches committed
 system.cpu.commit.COM:bw_lim_events                41                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples         3906                      
+system.cpu.commit.COM:committed_per_cycle.samples         3910                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0         2949   7549.92%           
+                               0         2950   7544.76%           
-                               1          266    681.00%           
+                               1          266    680.31%           
-                               2          333    852.53%           
+                               2          336    859.34%           
-                               3          131    335.38%           
+                               3          131    335.04%           
-                               4           74    189.45%           
+                               4           76    194.37%           
-                               5           64    163.85%           
+                               5           65    166.24%           
-                               6           29     74.24%           
+                               6           27     69.05%           
-                               7           19     48.64%           
+                               7           18     46.04%           
-                               8           41    104.97%           
+                               8           41    104.86%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
@ -48,17 +49,17 @@ system.cpu.commit.commitNonSpecStalls               4                       # Th
 system.cpu.commit.commitSquashedInsts             978                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                        2387                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                  2387                       # Number of Instructions Simulated
-system.cpu.cpi                               1.721408                       # CPI: Cycles Per Instruction
+system.cpu.cpi                               1.723083                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                         1.721408                       # CPI: Total CPI of All Threads
+system.cpu.cpi_total                         1.723083                       # CPI: Total CPI of All Threads
 system.cpu.dcache.ReadReq_accesses                514                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  5456.521739                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_miss_latency  5391.304348                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency  4737.288136                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency  4669.491525                       # average ReadReq mshr miss latency
 system.cpu.dcache.ReadReq_hits                    445                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency         376500                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency         372000                       # number of ReadReq miss cycles
 system.cpu.dcache.ReadReq_miss_rate          0.134241                       # miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_misses                   69                       # number of ReadReq misses
 system.cpu.dcache.ReadReq_mshr_hits                10                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency       279500                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency       275500                       # number of ReadReq MSHR miss cycles
 system.cpu.dcache.ReadReq_mshr_miss_rate     0.114786                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses              59                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses               294                       # number of WriteReq accesses(hits+misses)
@ -81,29 +82,29 @@ system.cpu.dcache.blocked_cycles_no_mshrs            0                       # n
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
 system.cpu.dcache.demand_accesses                 808                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  5564.285714                       # average overall miss latency
+system.cpu.dcache.demand_avg_miss_latency  5532.142857                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  4821.428571                       # average overall mshr miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  4773.809524                       # average overall mshr miss latency
 system.cpu.dcache.demand_hits                     668                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency          779000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency          774500                       # number of demand (read+write) miss cycles
 system.cpu.dcache.demand_miss_rate           0.173267                       # miss rate for demand accesses
 system.cpu.dcache.demand_misses                   140                       # number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits                 56                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency       405000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency       401000                       # number of demand (read+write) MSHR miss cycles
 system.cpu.dcache.demand_mshr_miss_rate      0.103960                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses               84                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
 system.cpu.dcache.overall_accesses                808                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  5564.285714                       # average overall miss latency
+system.cpu.dcache.overall_avg_miss_latency  5532.142857                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  4821.428571                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  4773.809524                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.dcache.overall_hits                    668                       # number of overall hits
-system.cpu.dcache.overall_miss_latency         779000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency         774500                       # number of overall miss cycles
 system.cpu.dcache.overall_miss_rate          0.173267                       # miss rate for overall accesses
 system.cpu.dcache.overall_misses                  140                       # number of overall misses
 system.cpu.dcache.overall_mshr_hits                56                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency       405000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency       401000                       # number of overall MSHR miss cycles
 system.cpu.dcache.overall_mshr_miss_rate     0.103960                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses              84                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@ -120,7 +121,7 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                     84                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                 51.851940                       # Cycle average of tags in use
+system.cpu.dcache.tagsinuse                 51.873008                       # Cycle average of tags in use
 system.cpu.dcache.total_refs                      668                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
@ -129,43 +130,43 @@ system.cpu.decode.DECODE:BranchMispred             81                       # Nu
 system.cpu.decode.DECODE:BranchResolved           123                       # Number of times decode resolved a branch
 system.cpu.decode.DECODE:DecodedInsts            4033                       # Number of instructions handled by decode
 system.cpu.decode.DECODE:IdleCycles              3045                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles                767                       # Number of cycles decode is running
+system.cpu.decode.DECODE:RunCycles                771                       # Number of cycles decode is running
 system.cpu.decode.DECODE:SquashCycles             202                       # Number of cycles decode is squashing
 system.cpu.decode.DECODE:SquashedInsts            298                       # Number of squashed instructions handled by decode
 system.cpu.fetch.Branches                         738                       # Number of branches that fetch encountered
 system.cpu.fetch.CacheLines                       654                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                          1440                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.Cycles                          1444                       # Number of cycles fetch has run and was not squashing or blocked
 system.cpu.fetch.IcacheSquashes                   120                       # Number of outstanding Icache misses that were squashed
 system.cpu.fetch.Insts                           4685                       # Number of instructions fetch has processed
 system.cpu.fetch.SquashCycles                     218                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.179606                       # Number of branch fetches per cycle
+system.cpu.fetch.branchRate                  0.179431                       # Number of branch fetches per cycle
 system.cpu.fetch.icacheStallCycles                654                       # Number of cycles fetch is stalled on an Icache miss
 system.cpu.fetch.predictedBranches                272                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        1.140180                       # Number of inst fetches per cycle
+system.cpu.fetch.rate                        1.139071                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples                4109                      
+system.cpu.fetch.rateDist.samples                4113                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0         3325   8091.99%           
+                               0         3325   8084.12%           
-                               1           32     77.88%           
+                               1           32     77.80%           
-                               2           74    180.09%           
+                               2           80    194.51%           
-                               3           53    128.99%           
+                               3           50    121.57%           
-                               4           99    240.93%           
+                               4           99    240.70%           
-                               5           49    119.25%           
+                               5           52    126.43%           
-                               6           38     92.48%           
+                               6           39     94.82%           
-                               7           35     85.18%           
+                               7           35     85.10%           
-                               8          404    983.21%           
+                               8          401    974.96%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 system.cpu.icache.ReadReq_accesses                654                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  5296.019900                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_miss_latency  5298.507463                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  4553.763441                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4556.451613                       # average ReadReq mshr miss latency
 system.cpu.icache.ReadReq_hits                    453                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        1064500                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency        1065000                       # number of ReadReq miss cycles
 system.cpu.icache.ReadReq_miss_rate          0.307339                       # miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_misses                  201                       # number of ReadReq misses
 system.cpu.icache.ReadReq_mshr_hits                15                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency       847000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency       847500                       # number of ReadReq MSHR miss cycles
 system.cpu.icache.ReadReq_mshr_miss_rate     0.284404                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             186                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
@ -177,29 +178,29 @@ system.cpu.icache.blocked_cycles_no_mshrs            0                       # n
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
 system.cpu.icache.demand_accesses                 654                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  5296.019900                       # average overall miss latency
+system.cpu.icache.demand_avg_miss_latency  5298.507463                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  4553.763441                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4556.451613                       # average overall mshr miss latency
 system.cpu.icache.demand_hits                     453                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         1064500                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency         1065000                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_rate           0.307339                       # miss rate for demand accesses
 system.cpu.icache.demand_misses                   201                       # number of demand (read+write) misses
 system.cpu.icache.demand_mshr_hits                 15                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency       847000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency       847500                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_rate      0.284404                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              186                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
 system.cpu.icache.overall_accesses                654                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  5296.019900                       # average overall miss latency
+system.cpu.icache.overall_avg_miss_latency  5298.507463                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  4553.763441                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4556.451613                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_hits                    453                       # number of overall hits
-system.cpu.icache.overall_miss_latency        1064500                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency        1065000                       # number of overall miss cycles
 system.cpu.icache.overall_miss_rate          0.307339                       # miss rate for overall accesses
 system.cpu.icache.overall_misses                  201                       # number of overall misses
 system.cpu.icache.overall_mshr_hits                15                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency       847000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency       847500                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_rate     0.284404                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             186                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@ -216,14 +217,14 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      0                       # number of replacements
 system.cpu.icache.sampled_refs                    186                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                106.237740                       # Cycle average of tags in use
+system.cpu.icache.tagsinuse                106.293956                       # Cycle average of tags in use
 system.cpu.icache.total_refs                      453                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.idleCycles                            2992                       # Total number of cycles that the CPU has spent unscheduled due to idling
 system.cpu.iew.EXEC:branches                      501                       # Number of branches executed
 system.cpu.iew.EXEC:nop                           234                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.727184                       # Inst execution rate
+system.cpu.iew.EXEC:rate                     0.726477                       # Inst execution rate
 system.cpu.iew.EXEC:refs                          878                       # number of memory reference insts executed
 system.cpu.iew.EXEC:stores                        333                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
@ -233,7 +234,7 @@ system.cpu.iew.WB:fanout                     0.799637                       # av
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
 system.cpu.iew.WB:producers                      1321                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.709175                       # insts written-back per cycle
+system.cpu.iew.WB:rate                       0.708485                       # insts written-back per cycle
 system.cpu.iew.WB:sent                           2931                       # cumulative count of insts sent to commit
 system.cpu.iew.branchMispredicts                  135                       # Number of branch mispredicts detected at execute
 system.cpu.iew.iewBlockCycles                       0                       # Number of cycles IEW is blocking
@ -263,8 +264,8 @@ system.cpu.iew.lsq.thread.0.squashedStores           63                       #
 system.cpu.iew.memOrderViolationEvents             10                       # Number of memory order violations
 system.cpu.iew.predictedNotTakenIncorrect           98                       # Number of branches that were predicted not taken incorrectly
 system.cpu.iew.predictedTakenIncorrect             37                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc                               0.580920                       # IPC: Instructions Per Cycle
+system.cpu.ipc                               0.580355                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         0.580920                       # IPC: Total IPC of All Threads
+system.cpu.ipc_total                         0.580355                       # IPC: Total IPC of All Threads
 system.cpu.iq.ISSUE:FU_type_0                    3075                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
                      No_OpClass            0      0.00%            # Type of FU issued
@ -301,21 +302,21 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                    InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples         4109                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples         4113                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0         2849   6933.56%           
+                               0         2848   6924.39%           
-                               1          475   1156.00%           
+                               1          479   1164.60%           
-                               2          270    657.09%           
+                               2          276    671.04%           
-                               3          217    528.11%           
+                               3          213    517.87%           
-                               4          159    386.96%           
+                               4          158    384.15%           
-                               5           86    209.30%           
+                               5           86    209.09%           
-                               6           34     82.75%           
+                               6           34     82.66%           
-                               7           13     31.64%           
+                               7           13     31.61%           
-                               8            6     14.60%           
+                               8            6     14.59%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
-system.cpu.iq.ISSUE:rate                     0.748357                       # Inst issue rate
+system.cpu.iq.ISSUE:rate                     0.747629                       # Inst issue rate
 system.cpu.iq.iqInstsAdded                       3330                       # Number of instructions added to the IQ (excludes non-spec)
 system.cpu.iq.iqInstsIssued                      3075                       # Number of instructions issued
 system.cpu.iq.iqNonSpecInstsAdded                   7                       # Number of non-speculative instructions added to the IQ
@ -323,9 +324,9 @@ system.cpu.iq.iqSquashedInstsExamined             790                       # Nu
 system.cpu.iq.iqSquashedNonSpecRemoved              3                       # Number of squashed non-spec instructions that were removed
 system.cpu.iq.iqSquashedOperandsExamined          409                       # Number of squashed operands that are examined and possibly removed from graph
 system.cpu.l2cache.ReadReq_accesses               270                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency  4522.222222                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_miss_latency  4509.259259                       # average ReadReq miss latency
 system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2388.888889                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_miss_latency       1221000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency       1217500                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_misses                 270                       # number of ReadReq misses
 system.cpu.l2cache.ReadReq_mshr_miss_latency       645000                       # number of ReadReq MSHR miss cycles
@ -340,10 +341,10 @@ system.cpu.l2cache.blocked_cycles_no_mshrs            0                       #
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
 system.cpu.l2cache.demand_accesses                270                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency  4522.222222                       # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency  4509.259259                       # average overall miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency  2388.888889                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        1221000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        1217500                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  270                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
@ -354,11 +355,11 @@ system.cpu.l2cache.fast_writes                      0                       # nu
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
 system.cpu.l2cache.overall_accesses               270                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency  4522.222222                       # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency  4509.259259                       # average overall miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency  2388.888889                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       1221000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       1217500                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 270                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
@ -379,18 +380,18 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.sampled_refs                   270                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               158.236294                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               158.313436                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                             4109                       # number of cpu cycles simulated
+system.cpu.numCycles                             4113                       # number of cpu cycles simulated
 system.cpu.rename.RENAME:CommittedMaps           1768                       # Number of HB maps that are committed
 system.cpu.rename.RENAME:IdleCycles              3116                       # Number of cycles rename is idle
 system.cpu.rename.RENAME:LSQFullEvents              1                       # Number of times rename has blocked due to LSQ full
 system.cpu.rename.RENAME:RenameLookups           4416                       # Number of register rename lookups that rename has made
 system.cpu.rename.RENAME:RenamedInsts            3886                       # Number of instructions processed by rename
 system.cpu.rename.RENAME:RenamedOperands         2777                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles                696                       # Number of cycles rename is running
+system.cpu.rename.RENAME:RunCycles                700                       # Number of cycles rename is running
 system.cpu.rename.RENAME:SquashCycles             202                       # Number of cycles rename is squashing
 system.cpu.rename.RENAME:UnblockCycles              6                       # Number of cycles rename is unblocking
 system.cpu.rename.RENAME:UndoneMaps              1009                       # Number of HB maps that are undone due to squashing
--- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
+++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout
@ -6,9 +6,9 @@ The Regents of The University of Michigan
 All Rights Reserved
-M5 compiled Jun 10 2007 14:06:20
+M5 compiled Jun 21 2007 21:25:27
-M5 started Sun Jun 10 14:22:36 2007
+M5 started Fri Jun 22 00:04:44 2007
-M5 executing on iceaxe
+M5 executing on zizzer.eecs.umich.edu
-command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 2053000 because target called exit()
+Exiting @ tick 2055000 because target called exit()
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini
@ -21,6 +21,7 @@ SQEntries=32
 SSITSize=1024
 activity=0
 backComSize=5
 cachePorts=200
 choiceCtrBits=2
 choicePredictorSize=8192
 clock=500
@ -74,6 +75,15 @@ renameToFetchDelay=1
 renameToIEWDelay=2
 renameToROBDelay=1
 renameWidth=8
 smtCommitPolicy=RoundRobin
 smtFetchPolicy=SingleThread
 smtIQPolicy=Partitioned
 smtIQThreshold=100
 smtLSQPolicy=Partitioned
 smtLSQThreshold=100
 smtNumFetchingThreads=1
 smtROBPolicy=Partitioned
 smtROBThreshold=100
 squashWidth=8
 system=system
 trapLatency=13
@ -86,6 +96,7 @@ icache_port=system.cpu.icache.cpu_side
 [system.cpu.dcache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@ -261,6 +272,7 @@ opLat=3
 [system.cpu.icache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@ -299,6 +311,7 @@ mem_side=system.cpu.toL2Bus.port[0]
 [system.cpu.l2cache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt
@ -1,47 +1,48 @@
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                          674                       # Number of BTB hits
+global.BPredUnit.BTBHits                          696                       # Number of BTB hits
-global.BPredUnit.BTBLookups                      3410                       # Number of BTB lookups
+global.BPredUnit.BTBLookups                      3414                       # Number of BTB lookups
-global.BPredUnit.RASInCorrect                     118                       # Number of incorrect RAS predictions.
+global.BPredUnit.RASInCorrect                     125                       # Number of incorrect RAS predictions.
-global.BPredUnit.condIncorrect                   1115                       # Number of conditional branches incorrect
+global.BPredUnit.condIncorrect                   1124                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                   2318                       # Number of conditional branches predicted
+global.BPredUnit.condPredicted                   2315                       # Number of conditional branches predicted
-global.BPredUnit.lookups                         3964                       # Number of BP lookups
+global.BPredUnit.lookups                         3940                       # Number of BP lookups
-global.BPredUnit.usedRAS                          532                       # Number of times the RAS was used to get a target.
+global.BPredUnit.usedRAS                          525                       # Number of times the RAS was used to get a target.
-host_inst_rate                                   8215                       # Simulator instruction rate (inst/s)
+host_inst_rate                                  52706                       # Simulator instruction rate (inst/s)
-host_seconds                                     1.37                       # Real time elapsed on the host
+host_mem_usage                                 154396                       # Number of bytes of host memory used
-host_tick_rate                                4009351                       # Simulator tick rate (ticks/s)
+host_seconds                                     0.21                       # Real time elapsed on the host
-memdepunit.memDep.conflictingLoads                 19                       # Number of conflicting loads.
+host_tick_rate                               25698682                       # Simulator tick rate (ticks/s)
-memdepunit.memDep.conflictingLoads                 18                       # Number of conflicting loads.
+memdepunit.memDep.conflictingLoads                 16                       # Number of conflicting loads.
-memdepunit.memDep.conflictingStores                54                       # Number of conflicting stores.
+memdepunit.memDep.conflictingLoads                 16                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores                53                       # Number of conflicting stores.
 memdepunit.memDep.conflictingStores                59                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                  1925                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads                  1934                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedLoads                  1898                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads                  1903                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 1088                       # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 1082                       # Number of stores inserted to the mem dependence unit.
 memdepunit.memDep.insertedStores                 1090                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                       11247                       # Number of instructions simulated
 sim_seconds                                  0.000005                       # Number of seconds simulated
-sim_ticks                                     5490000                       # Number of ticks simulated
+sim_ticks                                     5491500                       # Number of ticks simulated
 system.cpu.commit.COM:branches                   1724                       # Number of branches committed
 system.cpu.commit.COM:branches_0                  862                       # Number of branches committed
 system.cpu.commit.COM:branches_1                  862                       # Number of branches committed
-system.cpu.commit.COM:bw_lim_events               165                       # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events               168                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:bw_limited_0                  0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:bw_limited_1                  0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples        10929                      
+system.cpu.commit.COM:committed_per_cycle.samples        10926                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0         6410   5865.13%           
+                               0         6353   5814.57%           
-                               1         2019   1847.38%           
+                               1         2078   1901.89%           
-                               2          999    914.08%           
+                               2          996    911.59%           
-                               3          454    415.41%           
+                               3          472    432.00%           
-                               4          300    274.50%           
+                               4          296    270.91%           
-                               5          246    225.09%           
+                               5          241    220.57%           
-                               6          200    183.00%           
+                               6          192    175.73%           
-                               7          136    124.44%           
+                               7          130    118.98%           
-                               8          165    150.97%           
+                               8          168    153.76%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
@ -60,133 +61,133 @@ system.cpu.commit.COM:refs_1                     1791                       # Nu
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
 system.cpu.commit.COM:swp_count_0                   0                       # Number of s/w prefetches committed
 system.cpu.commit.COM:swp_count_1                   0                       # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts               874                       # The number of times a branch was mispredicted
+system.cpu.commit.branchMispredicts               885                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts          11281                       # The number of committed instructions
 system.cpu.commit.commitNonSpecStalls              34                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts            7769                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts            7777                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts_0                      5623                       # Number of Instructions Simulated
 system.cpu.committedInsts_1                      5624                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                 11247                       # Number of Instructions Simulated
-system.cpu.cpi_0                             1.952516                       # CPI: Cycles Per Instruction
+system.cpu.cpi_0                             1.952872                       # CPI: Cycles Per Instruction
-system.cpu.cpi_1                             1.952169                       # CPI: Cycles Per Instruction
+system.cpu.cpi_1                             1.952525                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                         0.976171                       # CPI: Total CPI of All Threads
+system.cpu.cpi_total                         0.976349                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               2969                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_accesses               2981                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_accesses_0             2969                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_accesses_0             2981                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency_0  7072.992701                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_miss_latency_0  7040.892193                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0  6972.361809                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0  6979.591837                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   2695                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_hits                   2712                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_hits_0                 2695                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_hits_0                 2712                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency        1938000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency        1894000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_latency_0      1938000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency_0      1894000                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate_0        0.092287                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_miss_rate_0        0.090238                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                  274                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_misses                  269                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_misses_0                274                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_misses_0                269                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits                75                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_hits                73                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_hits_0              75                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_hits_0              73                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency      1387500                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency      1368000                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_latency_0      1387500                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency_0      1368000                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate_0     0.067026                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_miss_rate_0     0.065750                       # mshr miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_mshr_misses             199                       # number of ReadReq MSHR misses
+system.cpu.dcache.ReadReq_mshr_misses             196                       # number of ReadReq MSHR misses
-system.cpu.dcache.ReadReq_mshr_misses_0           199                       # number of ReadReq MSHR misses
+system.cpu.dcache.ReadReq_mshr_misses_0           196                       # number of ReadReq MSHR misses
 system.cpu.dcache.WriteReq_accesses              1624                       # number of WriteReq accesses(hits+misses)
 system.cpu.dcache.WriteReq_accesses_0            1624                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency_0  5352.409639                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency_0  5306.613226                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0  5859.589041                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0  5852.739726                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                  1126                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_hits                  1125                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_hits_0                1126                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_hits_0                1125                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       2665500                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency       2648000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_latency_0      2665500                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency_0      2648000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate_0       0.306650                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_miss_rate_0       0.307266                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                 498                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_misses                 499                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_misses_0               498                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_misses_0               499                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_hits              352                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_hits              353                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_hits_0            352                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_hits_0            353                       # number of WriteReq MSHR hits
-system.cpu.dcache.WriteReq_mshr_miss_latency       855500                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency       854500                       # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_latency_0       855500                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency_0       854500                       # number of WriteReq MSHR miss cycles
 system.cpu.dcache.WriteReq_mshr_miss_rate_0     0.089901                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses            146                       # number of WriteReq MSHR misses
 system.cpu.dcache.WriteReq_mshr_misses_0          146                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  11.075362                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                  11.219298                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                4593                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses                4605                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_accesses_0              4593                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses_0              4605                       # number of demand (read+write) accesses
 system.cpu.dcache.demand_accesses_1                 0                       # number of demand (read+write) accesses
 system.cpu.dcache.demand_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.dcache.demand_avg_miss_latency_0  5963.082902                       # average overall miss latency
+system.cpu.dcache.demand_avg_miss_latency_0  5914.062500                       # average overall miss latency
 system.cpu.dcache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency_0  6501.449275                       # average overall mshr miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency_0  6498.538012                       # average overall mshr miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    3821                       # number of demand (read+write) hits
+system.cpu.dcache.demand_hits                    3837                       # number of demand (read+write) hits
-system.cpu.dcache.demand_hits_0                  3821                       # number of demand (read+write) hits
+system.cpu.dcache.demand_hits_0                  3837                       # number of demand (read+write) hits
 system.cpu.dcache.demand_hits_1                     0                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         4603500                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency         4542000                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_latency_0       4603500                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency_0       4542000                       # number of demand (read+write) miss cycles
 system.cpu.dcache.demand_miss_latency_1             0                       # number of demand (read+write) miss cycles
 system.cpu.dcache.demand_miss_rate       <err: div-0>                       # miss rate for demand accesses
-system.cpu.dcache.demand_miss_rate_0         0.168082                       # miss rate for demand accesses
+system.cpu.dcache.demand_miss_rate_0         0.166775                       # miss rate for demand accesses
 system.cpu.dcache.demand_miss_rate_1     <err: div-0>                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   772                       # number of demand (read+write) misses
+system.cpu.dcache.demand_misses                   768                       # number of demand (read+write) misses
-system.cpu.dcache.demand_misses_0                 772                       # number of demand (read+write) misses
+system.cpu.dcache.demand_misses_0                 768                       # number of demand (read+write) misses
 system.cpu.dcache.demand_misses_1                   0                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                427                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_hits                426                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_hits_0              427                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_hits_0              426                       # number of demand (read+write) MSHR hits
 system.cpu.dcache.demand_mshr_hits_1                0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      2243000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency      2222500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_latency_0      2243000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency_0      2222500                       # number of demand (read+write) MSHR miss cycles
 system.cpu.dcache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu.dcache.demand_mshr_miss_rate  <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_miss_rate_0     0.075114                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_rate_0     0.074267                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.dcache.demand_mshr_misses              345                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_misses              342                       # number of demand (read+write) MSHR misses
-system.cpu.dcache.demand_mshr_misses_0            345                       # number of demand (read+write) MSHR misses
+system.cpu.dcache.demand_mshr_misses_0            342                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.demand_mshr_misses_1              0                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.mshr_cap_events_0                 0                       # number of times MSHR cap was activated
 system.cpu.dcache.mshr_cap_events_1                 0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               4593                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses               4605                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_accesses_0             4593                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses_0             4605                       # number of overall (read+write) accesses
 system.cpu.dcache.overall_accesses_1                0                       # number of overall (read+write) accesses
 system.cpu.dcache.overall_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.dcache.overall_avg_miss_latency_0  5963.082902                       # average overall miss latency
+system.cpu.dcache.overall_avg_miss_latency_0  5914.062500                       # average overall miss latency
 system.cpu.dcache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency_0  6501.449275                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency_0  6498.538012                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   3821                       # number of overall hits
+system.cpu.dcache.overall_hits                   3837                       # number of overall hits
-system.cpu.dcache.overall_hits_0                 3821                       # number of overall hits
+system.cpu.dcache.overall_hits_0                 3837                       # number of overall hits
 system.cpu.dcache.overall_hits_1                    0                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        4603500                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency        4542000                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_latency_0      4603500                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency_0      4542000                       # number of overall miss cycles
 system.cpu.dcache.overall_miss_latency_1            0                       # number of overall miss cycles
 system.cpu.dcache.overall_miss_rate      <err: div-0>                       # miss rate for overall accesses
-system.cpu.dcache.overall_miss_rate_0        0.168082                       # miss rate for overall accesses
+system.cpu.dcache.overall_miss_rate_0        0.166775                       # miss rate for overall accesses
 system.cpu.dcache.overall_miss_rate_1    <err: div-0>                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  772                       # number of overall misses
+system.cpu.dcache.overall_misses                  768                       # number of overall misses
-system.cpu.dcache.overall_misses_0                772                       # number of overall misses
+system.cpu.dcache.overall_misses_0                768                       # number of overall misses
 system.cpu.dcache.overall_misses_1                  0                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits               427                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_hits               426                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_hits_0             427                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_hits_0             426                       # number of overall MSHR hits
 system.cpu.dcache.overall_mshr_hits_1               0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      2243000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency      2222500                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_latency_0      2243000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency_0      2222500                       # number of overall MSHR miss cycles
 system.cpu.dcache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
 system.cpu.dcache.overall_mshr_miss_rate <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_miss_rate_0     0.075114                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_rate_0     0.074267                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.dcache.overall_mshr_misses             345                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_misses             342                       # number of overall MSHR misses
-system.cpu.dcache.overall_mshr_misses_0           345                       # number of overall MSHR misses
+system.cpu.dcache.overall_mshr_misses_0           342                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_misses_1             0                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_latency_0            0                       # number of overall MSHR uncacheable cycles
@ -206,149 +207,149 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.replacements_0                    0                       # number of replacements
 system.cpu.dcache.replacements_1                    0                       # number of replacements
-system.cpu.dcache.sampled_refs                    345                       # Sample count of references to valid blocks.
+system.cpu.dcache.sampled_refs                    342                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.dcache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.dcache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                221.724795                       # Cycle average of tags in use
+system.cpu.dcache.tagsinuse                221.287284                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     3821                       # Total number of references to valid blocks.
+system.cpu.dcache.total_refs                     3837                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
 system.cpu.dcache.writebacks_0                      0                       # number of writebacks
 system.cpu.dcache.writebacks_1                      0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles           1857                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BlockedCycles           1876                       # Number of cycles decode is blocked
-system.cpu.decode.DECODE:BranchMispred            251                       # Number of times decode detected a branch misprediction
+system.cpu.decode.DECODE:BranchMispred            246                       # Number of times decode detected a branch misprediction
-system.cpu.decode.DECODE:BranchResolved           346                       # Number of times decode resolved a branch
+system.cpu.decode.DECODE:BranchResolved           345                       # Number of times decode resolved a branch
-system.cpu.decode.DECODE:DecodedInsts           21806                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:DecodedInsts           21769                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles             14535                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:IdleCycles             14522                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles               3658                       # Number of cycles decode is running
+system.cpu.decode.DECODE:RunCycles               3673                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles            1498                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:SquashCycles            1511                       # Number of cycles decode is squashing
-system.cpu.decode.DECODE:SquashedInsts            351                       # Number of squashed instructions handled by decode
+system.cpu.decode.DECODE:SquashedInsts            346                       # Number of squashed instructions handled by decode
 system.cpu.decode.DECODE:UnblockCycles            145                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                        3964                       # Number of branches that fetch encountered
+system.cpu.fetch.Branches                        3940                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                      2983                       # Number of cache lines fetched
+system.cpu.fetch.CacheLines                      3009                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                          6940                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.Cycles                          6972                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   525                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.IcacheSquashes                   537                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          24033                       # Number of instructions fetch has processed
+system.cpu.fetch.Insts                          23897                       # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles                    1178                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.SquashCycles                    1189                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.361053                       # Number of branch fetches per cycle
+system.cpu.fetch.branchRate                  0.358802                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles               2983                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.icacheStallCycles               3009                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches               1206                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.predictedBranches               1221                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        2.188997                       # Number of inst fetches per cycle
+system.cpu.fetch.rate                        2.176213                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples               10979                      
+system.cpu.fetch.rateDist.samples               10981                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0         7023   6396.76%           
+                               0         7019   6391.95%           
-                               1          285    259.59%           
+                               1          293    266.82%           
-                               2          224    204.03%           
+                               2          225    204.90%           
-                               3          248    225.89%           
+                               3          260    236.77%           
-                               4          335    305.13%           
+                               4          345    314.18%           
-                               5          281    255.94%           
+                               5          288    262.27%           
-                               6          301    274.16%           
+                               6          304    276.84%           
-                               7          251    228.62%           
+                               7          246    224.02%           
-                               8         2031   1849.90%           
+                               8         2001   1822.24%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
-system.cpu.icache.ReadReq_accesses               2983                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_accesses               3009                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_accesses_0             2983                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_accesses_0             3009                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency_0  5910.313901                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_miss_latency_0  5911.144578                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency_0  5152.173913                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency_0  5119.774920                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   2314                       # number of ReadReq hits
+system.cpu.icache.ReadReq_hits                   2345                       # number of ReadReq hits
-system.cpu.icache.ReadReq_hits_0                 2314                       # number of ReadReq hits
+system.cpu.icache.ReadReq_hits_0                 2345                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        3954000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency        3925000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_latency_0      3954000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency_0      3925000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate_0        0.224271                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_miss_rate_0        0.220671                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  669                       # number of ReadReq misses
+system.cpu.icache.ReadReq_misses                  664                       # number of ReadReq misses
-system.cpu.icache.ReadReq_misses_0                669                       # number of ReadReq misses
+system.cpu.icache.ReadReq_misses_0                664                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits                48                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_hits                42                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_hits_0              48                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_hits_0              42                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency      3199500                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency      3184500                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_latency_0      3199500                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency_0      3184500                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate_0     0.208180                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_miss_rate_0     0.206713                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses             621                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_mshr_misses             622                       # number of ReadReq MSHR misses
-system.cpu.icache.ReadReq_mshr_misses_0           621                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_mshr_misses_0           622                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                   3.726248                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                   3.770096                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                2983                       # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses                3009                       # number of demand (read+write) accesses
-system.cpu.icache.demand_accesses_0              2983                       # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses_0              3009                       # number of demand (read+write) accesses
 system.cpu.icache.demand_accesses_1                 0                       # number of demand (read+write) accesses
 system.cpu.icache.demand_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.icache.demand_avg_miss_latency_0  5910.313901                       # average overall miss latency
+system.cpu.icache.demand_avg_miss_latency_0  5911.144578                       # average overall miss latency
 system.cpu.icache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.icache.demand_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency_0  5152.173913                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency_0  5119.774920                       # average overall mshr miss latency
 system.cpu.icache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    2314                       # number of demand (read+write) hits
+system.cpu.icache.demand_hits                    2345                       # number of demand (read+write) hits
-system.cpu.icache.demand_hits_0                  2314                       # number of demand (read+write) hits
+system.cpu.icache.demand_hits_0                  2345                       # number of demand (read+write) hits
 system.cpu.icache.demand_hits_1                     0                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         3954000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency         3925000                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_latency_0       3954000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency_0       3925000                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_latency_1             0                       # number of demand (read+write) miss cycles
 system.cpu.icache.demand_miss_rate       <err: div-0>                       # miss rate for demand accesses
-system.cpu.icache.demand_miss_rate_0         0.224271                       # miss rate for demand accesses
+system.cpu.icache.demand_miss_rate_0         0.220671                       # miss rate for demand accesses
 system.cpu.icache.demand_miss_rate_1     <err: div-0>                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   669                       # number of demand (read+write) misses
+system.cpu.icache.demand_misses                   664                       # number of demand (read+write) misses
-system.cpu.icache.demand_misses_0                 669                       # number of demand (read+write) misses
+system.cpu.icache.demand_misses_0                 664                       # number of demand (read+write) misses
 system.cpu.icache.demand_misses_1                   0                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                 48                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_hits                 42                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_hits_0               48                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_hits_0               42                       # number of demand (read+write) MSHR hits
 system.cpu.icache.demand_mshr_hits_1                0                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      3199500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency      3184500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_latency_0      3199500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency_0      3184500                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu.icache.demand_mshr_miss_rate  <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_miss_rate_0     0.208180                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_rate_0     0.206713                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses              621                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_misses              622                       # number of demand (read+write) MSHR misses
-system.cpu.icache.demand_mshr_misses_0            621                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_mshr_misses_0            622                       # number of demand (read+write) MSHR misses
 system.cpu.icache.demand_mshr_misses_1              0                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.mshr_cap_events_0                 0                       # number of times MSHR cap was activated
 system.cpu.icache.mshr_cap_events_1                 0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               2983                       # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses               3009                       # number of overall (read+write) accesses
-system.cpu.icache.overall_accesses_0             2983                       # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses_0             3009                       # number of overall (read+write) accesses
 system.cpu.icache.overall_accesses_1                0                       # number of overall (read+write) accesses
 system.cpu.icache.overall_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.icache.overall_avg_miss_latency_0  5910.313901                       # average overall miss latency
+system.cpu.icache.overall_avg_miss_latency_0  5911.144578                       # average overall miss latency
 system.cpu.icache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.icache.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency_0  5152.173913                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency_0  5119.774920                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   2314                       # number of overall hits
+system.cpu.icache.overall_hits                   2345                       # number of overall hits
-system.cpu.icache.overall_hits_0                 2314                       # number of overall hits
+system.cpu.icache.overall_hits_0                 2345                       # number of overall hits
 system.cpu.icache.overall_hits_1                    0                       # number of overall hits
-system.cpu.icache.overall_miss_latency        3954000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency        3925000                       # number of overall miss cycles
-system.cpu.icache.overall_miss_latency_0      3954000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency_0      3925000                       # number of overall miss cycles
 system.cpu.icache.overall_miss_latency_1            0                       # number of overall miss cycles
 system.cpu.icache.overall_miss_rate      <err: div-0>                       # miss rate for overall accesses
-system.cpu.icache.overall_miss_rate_0        0.224271                       # miss rate for overall accesses
+system.cpu.icache.overall_miss_rate_0        0.220671                       # miss rate for overall accesses
 system.cpu.icache.overall_miss_rate_1    <err: div-0>                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  669                       # number of overall misses
+system.cpu.icache.overall_misses                  664                       # number of overall misses
-system.cpu.icache.overall_misses_0                669                       # number of overall misses
+system.cpu.icache.overall_misses_0                664                       # number of overall misses
 system.cpu.icache.overall_misses_1                  0                       # number of overall misses
-system.cpu.icache.overall_mshr_hits                48                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_hits                42                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_hits_0              48                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_hits_0              42                       # number of overall MSHR hits
 system.cpu.icache.overall_mshr_hits_1               0                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      3199500                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency      3184500                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_latency_0      3199500                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency_0      3184500                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
 system.cpu.icache.overall_mshr_miss_rate <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_miss_rate_0     0.208180                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_rate_0     0.206713                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses             621                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_misses             622                       # number of overall MSHR misses
-system.cpu.icache.overall_mshr_misses_0           621                       # number of overall MSHR misses
+system.cpu.icache.overall_mshr_misses_0           622                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_misses_1             0                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_latency_0            0                       # number of overall MSHR uncacheable cycles
@ -368,104 +369,104 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      9                       # number of replacements
 system.cpu.icache.replacements_0                    9                       # number of replacements
 system.cpu.icache.replacements_1                    0                       # number of replacements
-system.cpu.icache.sampled_refs                    621                       # Sample count of references to valid blocks.
+system.cpu.icache.sampled_refs                    622                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.icache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.icache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                322.894952                       # Cycle average of tags in use
+system.cpu.icache.tagsinuse                323.196356                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     2314                       # Total number of references to valid blocks.
+system.cpu.icache.total_refs                     2345                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
 system.cpu.icache.writebacks_0                      0                       # number of writebacks
 system.cpu.icache.writebacks_1                      0                       # number of writebacks
-system.cpu.idleCycles                            1998                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.idleCycles                            2997                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     2367                       # Number of branches executed
+system.cpu.iew.EXEC:branches                     2377                       # Number of branches executed
-system.cpu.iew.EXEC:branches_0                   1185                       # Number of branches executed
+system.cpu.iew.EXEC:branches_0                   1192                       # Number of branches executed
-system.cpu.iew.EXEC:branches_1                   1182                       # Number of branches executed
+system.cpu.iew.EXEC:branches_1                   1185                       # Number of branches executed
-system.cpu.iew.EXEC:nop                            73                       # number of nop insts executed
+system.cpu.iew.EXEC:nop                            72                       # number of nop insts executed
 system.cpu.iew.EXEC:nop_0                          37                       # number of nop insts executed
-system.cpu.iew.EXEC:nop_1                          36                       # number of nop insts executed
+system.cpu.iew.EXEC:nop_1                          35                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     1.416158                       # Inst execution rate
+system.cpu.iew.EXEC:rate                     1.419725                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         4978                       # number of memory reference insts executed
+system.cpu.iew.EXEC:refs                         5002                       # number of memory reference insts executed
-system.cpu.iew.EXEC:refs_0                       2514                       # number of memory reference insts executed
+system.cpu.iew.EXEC:refs_0                       2507                       # number of memory reference insts executed
-system.cpu.iew.EXEC:refs_1                       2464                       # number of memory reference insts executed
+system.cpu.iew.EXEC:refs_1                       2495                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                       1867                       # Number of stores executed
+system.cpu.iew.EXEC:stores                       1874                       # Number of stores executed
-system.cpu.iew.EXEC:stores_0                      938                       # Number of stores executed
+system.cpu.iew.EXEC:stores_0                      933                       # Number of stores executed
-system.cpu.iew.EXEC:stores_1                      929                       # Number of stores executed
+system.cpu.iew.EXEC:stores_1                      941                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
 system.cpu.iew.EXEC:swp_0                           0                       # number of swp insts executed
 system.cpu.iew.EXEC:swp_1                           0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                     10219                       # num instructions consuming a value
+system.cpu.iew.WB:consumers                     10260                       # num instructions consuming a value
-system.cpu.iew.WB:consumers_0                    5113                       # num instructions consuming a value
+system.cpu.iew.WB:consumers_0                    5135                       # num instructions consuming a value
-system.cpu.iew.WB:consumers_1                    5106                       # num instructions consuming a value
+system.cpu.iew.WB:consumers_1                    5125                       # num instructions consuming a value
-system.cpu.iew.WB:count                         14974                       # cumulative count of insts written-back
+system.cpu.iew.WB:count                         14994                       # cumulative count of insts written-back
-system.cpu.iew.WB:count_0                        7532                       # cumulative count of insts written-back
+system.cpu.iew.WB:count_0                        7526                       # cumulative count of insts written-back
-system.cpu.iew.WB:count_1                        7442                       # cumulative count of insts written-back
+system.cpu.iew.WB:count_1                        7468                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     1.526960                       # average fanout of values written-back
+system.cpu.iew.WB:fanout                     1.530607                       # average fanout of values written-back
-system.cpu.iew.WB:fanout_0                   0.762957                       # average fanout of values written-back
+system.cpu.iew.WB:fanout_0                   0.763778                       # average fanout of values written-back
-system.cpu.iew.WB:fanout_1                   0.764003                       # average fanout of values written-back
+system.cpu.iew.WB:fanout_1                   0.766829                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_0                       0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_1                       0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
 system.cpu.iew.WB:penalized_rate_0                  0                       # fraction of instructions written-back that wrote to 'other' IQ
 system.cpu.iew.WB:penalized_rate_1                  0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      7802                       # num instructions producing a value
+system.cpu.iew.WB:producers                      7852                       # num instructions producing a value
-system.cpu.iew.WB:producers_0                    3901                       # num instructions producing a value
+system.cpu.iew.WB:producers_0                    3922                       # num instructions producing a value
-system.cpu.iew.WB:producers_1                    3901                       # num instructions producing a value
+system.cpu.iew.WB:producers_1                    3930                       # num instructions producing a value
-system.cpu.iew.WB:rate                       1.363876                       # insts written-back per cycle
+system.cpu.iew.WB:rate                       1.365449                       # insts written-back per cycle
-system.cpu.iew.WB:rate_0                     0.686037                       # insts written-back per cycle
+system.cpu.iew.WB:rate_0                     0.685366                       # insts written-back per cycle
-system.cpu.iew.WB:rate_1                     0.677840                       # insts written-back per cycle
+system.cpu.iew.WB:rate_1                     0.680084                       # insts written-back per cycle
-system.cpu.iew.WB:sent                          15105                       # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent                          15132                       # cumulative count of insts sent to commit
-system.cpu.iew.WB:sent_0                         7590                       # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent_0                         7582                       # cumulative count of insts sent to commit
-system.cpu.iew.WB:sent_1                         7515                       # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent_1                         7550                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                  941                       # Number of branch mispredicts detected at execute
+system.cpu.iew.branchMispredicts                  958                       # Number of branch mispredicts detected at execute
-system.cpu.iew.iewBlockCycles                       7                       # Number of cycles IEW is blocking
+system.cpu.iew.iewBlockCycles                       6                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                  3823                       # Number of dispatched load instructions
+system.cpu.iew.iewDispLoadInsts                  3837                       # Number of dispatched load instructions
 system.cpu.iew.iewDispNonSpecInsts                 42                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts               501                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispSquashedInsts               445                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                 2178                       # Number of dispatched store instructions
+system.cpu.iew.iewDispStoreInsts                 2172                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts               19078                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewDispatchedInsts               19086                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  3111                       # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts                  3128                       # Number of load instructions executed
-system.cpu.iew.iewExecLoadInsts_0                1576                       # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts_0                1574                       # Number of load instructions executed
-system.cpu.iew.iewExecLoadInsts_1                1535                       # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts_1                1554                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts               864                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecSquashedInsts               852                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                 15548                       # Number of executed instructions
+system.cpu.iew.iewExecutedInsts                 15590                       # Number of executed instructions
 system.cpu.iew.iewIQFullEvents                      0                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                   1498                       # Number of cycles IEW is squashing
+system.cpu.iew.iewSquashCycles                   1511                       # Number of cycles IEW is squashing
 system.cpu.iew.iewUnblockCycles                     0                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.0.forwLoads              42                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.0.forwLoads              43                       # Number of loads that had data forwarded from stores
 system.cpu.iew.lsq.thread.0.ignoredResponses            4                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.0.memOrderViolation           63                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.0.memOrderViolation           64                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads          946                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedLoads          955                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores          276                       # Number of stores squashed
+system.cpu.iew.lsq.thread.0.squashedStores          270                       # Number of stores squashed
 system.cpu.iew.lsq.thread.1.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.1.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.1.forwLoads              38                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.1.forwLoads              42                       # Number of loads that had data forwarded from stores
-system.cpu.iew.lsq.thread.1.ignoredResponses            0                       # Number of memory responses ignored because the instruction is squashed
+system.cpu.iew.lsq.thread.1.ignoredResponses            2                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.1.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.1.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
-system.cpu.iew.lsq.thread.1.memOrderViolation           54                       # Number of memory ordering violations
+system.cpu.iew.lsq.thread.1.memOrderViolation           58                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.1.rescheduledLoads            1                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.1.squashedLoads          919                       # Number of loads squashed
+system.cpu.iew.lsq.thread.1.squashedLoads          924                       # Number of loads squashed
 system.cpu.iew.lsq.thread.1.squashedStores          278                       # Number of stores squashed
-system.cpu.iew.memOrderViolationEvents            117                       # Number of memory order violations
+system.cpu.iew.memOrderViolationEvents            122                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          761                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedNotTakenIncorrect          767                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect            180                       # Number of branches that were predicted taken incorrectly
+system.cpu.iew.predictedTakenIncorrect            191                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc_0                             0.512160                       # IPC: Instructions Per Cycle
+system.cpu.ipc_0                             0.512066                       # IPC: Instructions Per Cycle
-system.cpu.ipc_1                             0.512251                       # IPC: Instructions Per Cycle
+system.cpu.ipc_1                             0.512157                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         1.024410                       # IPC: Total IPC of All Threads
+system.cpu.ipc_total                         1.024224                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                    8232                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0                    8235                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
                      No_OpClass            2      0.02%            # Type of FU issued
-                          IntAlu         5551     67.43%            # Type of FU issued
+                          IntAlu         5567     67.60%            # Type of FU issued
                         IntMult            1      0.01%            # Type of FU issued
                          IntDiv            0      0.00%            # Type of FU issued
                        FloatAdd            2      0.02%            # Type of FU issued
@ -474,15 +475,15 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist
                       FloatMult            0      0.00%            # Type of FU issued
                        FloatDiv            0      0.00%            # Type of FU issued
                       FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         1704     20.70%            # Type of FU issued
+                         MemRead         1702     20.67%            # Type of FU issued
-                        MemWrite          972     11.81%            # Type of FU issued
+                        MemWrite          961     11.67%            # Type of FU issued
                       IprAccess            0      0.00%            # Type of FU issued
                    InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:FU_type_1                    8180                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_1                    8207                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_1.start_dist
                      No_OpClass            2      0.02%            # Type of FU issued
-                          IntAlu         5536     67.68%            # Type of FU issued
+                          IntAlu         5547     67.59%            # Type of FU issued
                         IntMult            1      0.01%            # Type of FU issued
                          IntDiv            0      0.00%            # Type of FU issued
                        FloatAdd            2      0.02%            # Type of FU issued
@ -491,15 +492,15 @@ system.cpu.iq.ISSUE:FU_type_1.start_dist
                       FloatMult            0      0.00%            # Type of FU issued
                        FloatDiv            0      0.00%            # Type of FU issued
                       FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         1681     20.55%            # Type of FU issued
+                         MemRead         1690     20.59%            # Type of FU issued
-                        MemWrite          958     11.71%            # Type of FU issued
+                        MemWrite          965     11.76%            # Type of FU issued
                       IprAccess            0      0.00%            # Type of FU issued
                    InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_1.end_dist
-system.cpu.iq.ISSUE:FU_type                     16412                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type                     16442                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type.start_dist
                      No_OpClass            4      0.02%            # Type of FU issued
-                          IntAlu        11087     67.55%            # Type of FU issued
+                          IntAlu        11114     67.60%            # Type of FU issued
                         IntMult            2      0.01%            # Type of FU issued
                          IntDiv            0      0.00%            # Type of FU issued
                        FloatAdd            4      0.02%            # Type of FU issued
@ -508,20 +509,20 @@ system.cpu.iq.ISSUE:FU_type.start_dist
                       FloatMult            0      0.00%            # Type of FU issued
                        FloatDiv            0      0.00%            # Type of FU issued
                       FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         3385     20.63%            # Type of FU issued
+                         MemRead         3392     20.63%            # Type of FU issued
-                        MemWrite         1930     11.76%            # Type of FU issued
+                        MemWrite         1926     11.71%            # Type of FU issued
                       IprAccess            0      0.00%            # Type of FU issued
                    InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt                   180                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt                   189                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_cnt_0                  92                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt_0                  98                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_cnt_1                  88                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt_1                  91                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.010968                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate             0.011495                       # FU busy rate (busy events/executed inst)
-system.cpu.iq.ISSUE:fu_busy_rate_0           0.005606                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate_0           0.005960                       # FU busy rate (busy events/executed inst)
-system.cpu.iq.ISSUE:fu_busy_rate_1           0.005362                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate_1           0.005535                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
                      No_OpClass            0      0.00%            # attempts to use FU when none available
-                          IntAlu           16      8.89%            # attempts to use FU when none available
+                          IntAlu           14      7.41%            # attempts to use FU when none available
                         IntMult            0      0.00%            # attempts to use FU when none available
                          IntDiv            0      0.00%            # attempts to use FU when none available
                        FloatAdd            0      0.00%            # attempts to use FU when none available
@ -530,104 +531,104 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                       FloatMult            0      0.00%            # attempts to use FU when none available
                        FloatDiv            0      0.00%            # attempts to use FU when none available
                       FloatSqrt            0      0.00%            # attempts to use FU when none available
-                         MemRead           97     53.89%            # attempts to use FU when none available
+                         MemRead          107     56.61%            # attempts to use FU when none available
-                        MemWrite           67     37.22%            # attempts to use FU when none available
+                        MemWrite           68     35.98%            # attempts to use FU when none available
                       IprAccess            0      0.00%            # attempts to use FU when none available
                    InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples        10979                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples        10981                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0         4788   4361.05%           
+                               0         4775   4348.42%           
-                               1         1816   1654.07%           
+                               1         1817   1654.68%           
-                               2         1657   1509.24%           
+                               2         1638   1491.67%           
-                               3         1039    946.35%           
+                               3         1107   1008.10%           
-                               4          774    704.98%           
+                               4          745    678.44%           
-                               5          501    456.33%           
+                               5          490    446.23%           
-                               6          289    263.23%           
+                               6          287    261.36%           
-                               7           90     81.97%           
+                               7          100     91.07%           
-                               8           25     22.77%           
+                               8           22     20.03%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
-system.cpu.iq.ISSUE:rate                     1.494854                       # Inst issue rate
+system.cpu.iq.ISSUE:rate                     1.497314                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                      18963                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsAdded                      18972                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                     16412                       # Number of instructions issued
+system.cpu.iq.iqInstsIssued                     16442                       # Number of instructions issued
 system.cpu.iq.iqNonSpecInstsAdded                  42                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined            6896                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsExamined            6918                       # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedInstsIssued                34                       # Number of squashed instructions issued
+system.cpu.iq.iqSquashedInstsIssued                63                       # Number of squashed instructions issued
 system.cpu.iq.iqSquashedNonSpecRemoved              8                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined         4313                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.iq.iqSquashedOperandsExamined         4274                       # Number of squashed operands that are examined and possibly removed from graph
-system.cpu.l2cache.ReadReq_accesses               963                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_accesses               962                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_accesses_0             963                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_accesses_0             962                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency_0  5220.374220                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_miss_latency_0  5208.636837                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0  2725.051975                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0  2724.765869                       # average ReadReq mshr miss latency
 system.cpu.l2cache.ReadReq_hits                     1                       # number of ReadReq hits
 system.cpu.l2cache.ReadReq_hits_0                   1                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_miss_latency       5022000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency       5005500                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_latency_0      5022000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency_0      5005500                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_rate_0       0.998962                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_miss_rate_0       0.998960                       # miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_misses                 962                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_misses                 961                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_misses_0               962                       # number of ReadReq misses
+system.cpu.l2cache.ReadReq_misses_0               961                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      2621500                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency      2618500                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_latency_0      2621500                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency_0      2618500                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_rate_0     0.998962                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_miss_rate_0     0.998960                       # mshr miss rate for ReadReq accesses
-system.cpu.l2cache.ReadReq_mshr_misses            962                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_mshr_misses            961                       # number of ReadReq MSHR misses
-system.cpu.l2cache.ReadReq_mshr_misses_0          962                       # number of ReadReq MSHR misses
+system.cpu.l2cache.ReadReq_mshr_misses_0          961                       # number of ReadReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.l2cache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.l2cache.avg_refs                  0.001040                       # Average number of references to valid blocks.
+system.cpu.l2cache.avg_refs                  0.001041                       # Average number of references to valid blocks.
 system.cpu.l2cache.blocked_no_mshrs                 0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_no_targets               0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.l2cache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses                963                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_accesses                962                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_accesses_0              963                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_accesses_0              962                       # number of demand (read+write) accesses
 system.cpu.l2cache.demand_accesses_1                0                       # number of demand (read+write) accesses
 system.cpu.l2cache.demand_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.l2cache.demand_avg_miss_latency_0  5220.374220                       # average overall miss latency
+system.cpu.l2cache.demand_avg_miss_latency_0  5208.636837                       # average overall miss latency
 system.cpu.l2cache.demand_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency_0  2725.051975                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency_0  2724.765869                       # average overall mshr miss latency
 system.cpu.l2cache.demand_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      1                       # number of demand (read+write) hits
 system.cpu.l2cache.demand_hits_0                    1                       # number of demand (read+write) hits
 system.cpu.l2cache.demand_hits_1                    0                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency        5022000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency        5005500                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_latency_0      5022000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency_0      5005500                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_latency_1            0                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate      <err: div-0>                       # miss rate for demand accesses
-system.cpu.l2cache.demand_miss_rate_0        0.998962                       # miss rate for demand accesses
+system.cpu.l2cache.demand_miss_rate_0        0.998960                       # miss rate for demand accesses
 system.cpu.l2cache.demand_miss_rate_1    <err: div-0>                       # miss rate for demand accesses
-system.cpu.l2cache.demand_misses                  962                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_misses                  961                       # number of demand (read+write) misses
-system.cpu.l2cache.demand_misses_0                962                       # number of demand (read+write) misses
+system.cpu.l2cache.demand_misses_0                961                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_misses_1                  0                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
 system.cpu.l2cache.demand_mshr_hits_0               0                       # number of demand (read+write) MSHR hits
 system.cpu.l2cache.demand_mshr_hits_1               0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency      2621500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency      2618500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_latency_0      2621500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency_0      2618500                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_latency_1            0                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_miss_rate_0     0.998962                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_miss_rate_0     0.998960                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for demand accesses
-system.cpu.l2cache.demand_mshr_misses             962                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_misses             961                       # number of demand (read+write) MSHR misses
-system.cpu.l2cache.demand_mshr_misses_0           962                       # number of demand (read+write) MSHR misses
+system.cpu.l2cache.demand_mshr_misses_0           961                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.demand_mshr_misses_1             0                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
 system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
 system.cpu.l2cache.mshr_cap_events_0                0                       # number of times MSHR cap was activated
 system.cpu.l2cache.mshr_cap_events_1                0                       # number of times MSHR cap was activated
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses               963                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_accesses               962                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_accesses_0             963                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_accesses_0             962                       # number of overall (read+write) accesses
 system.cpu.l2cache.overall_accesses_1               0                       # number of overall (read+write) accesses
 system.cpu.l2cache.overall_avg_miss_latency <err: div-0>                       # average overall miss latency
-system.cpu.l2cache.overall_avg_miss_latency_0  5220.374220                       # average overall miss latency
+system.cpu.l2cache.overall_avg_miss_latency_0  5208.636837                       # average overall miss latency
 system.cpu.l2cache.overall_avg_miss_latency_1 <err: div-0>                       # average overall miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency <err: div-0>                       # average overall mshr miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency_0  2725.051975                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency_0  2724.765869                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_miss_latency_1 <err: div-0>                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_0 <err: div-0>                       # average overall mshr uncacheable latency
@ -635,26 +636,26 @@ system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_1 <err: div-0>
 system.cpu.l2cache.overall_hits                     1                       # number of overall hits
 system.cpu.l2cache.overall_hits_0                   1                       # number of overall hits
 system.cpu.l2cache.overall_hits_1                   0                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency       5022000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency       5005500                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_latency_0      5022000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency_0      5005500                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_latency_1            0                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate     <err: div-0>                       # miss rate for overall accesses
-system.cpu.l2cache.overall_miss_rate_0       0.998962                       # miss rate for overall accesses
+system.cpu.l2cache.overall_miss_rate_0       0.998960                       # miss rate for overall accesses
 system.cpu.l2cache.overall_miss_rate_1   <err: div-0>                       # miss rate for overall accesses
-system.cpu.l2cache.overall_misses                 962                       # number of overall misses
+system.cpu.l2cache.overall_misses                 961                       # number of overall misses
-system.cpu.l2cache.overall_misses_0               962                       # number of overall misses
+system.cpu.l2cache.overall_misses_0               961                       # number of overall misses
 system.cpu.l2cache.overall_misses_1                 0                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
 system.cpu.l2cache.overall_mshr_hits_0              0                       # number of overall MSHR hits
 system.cpu.l2cache.overall_mshr_hits_1              0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency      2621500                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency      2618500                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_latency_0      2621500                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency_0      2618500                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_latency_1            0                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_miss_rate_0     0.998962                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_miss_rate_0     0.998960                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_miss_rate_1 <err: div-0>                       # mshr miss rate for overall accesses
-system.cpu.l2cache.overall_mshr_misses            962                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_misses            961                       # number of overall MSHR misses
-system.cpu.l2cache.overall_mshr_misses_0          962                       # number of overall MSHR misses
+system.cpu.l2cache.overall_mshr_misses_0          961                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_misses_1            0                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.l2cache.overall_mshr_uncacheable_latency_0            0                       # number of overall MSHR uncacheable cycles
@ -674,33 +675,33 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.replacements_0                   0                       # number of replacements
 system.cpu.l2cache.replacements_1                   0                       # number of replacements
-system.cpu.l2cache.sampled_refs                   962                       # Sample count of references to valid blocks.
+system.cpu.l2cache.sampled_refs                   961                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.l2cache.soft_prefetch_mshr_full_0            0                       # number of mshr full events for SW prefetching instrutions
 system.cpu.l2cache.soft_prefetch_mshr_full_1            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               545.133409                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               545.318204                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       1                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
 system.cpu.l2cache.writebacks_0                     0                       # number of writebacks
 system.cpu.l2cache.writebacks_1                     0                       # number of writebacks
-system.cpu.numCycles                            10979                       # number of cpu cycles simulated
+system.cpu.numCycles                            10981                       # number of cpu cycles simulated
-system.cpu.rename.RENAME:BlockCycles              614                       # Number of cycles rename is blocking
+system.cpu.rename.RENAME:BlockCycles              612                       # Number of cycles rename is blocking
 system.cpu.rename.RENAME:CommittedMaps           8102                       # Number of HB maps that are committed
-system.cpu.rename.RENAME:IdleCycles             14840                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:IdleCycles             14828                       # Number of cycles rename is idle
-system.cpu.rename.RENAME:LSQFullEvents            684                       # Number of times rename has blocked due to LSQ full
+system.cpu.rename.RENAME:LSQFullEvents            692                       # Number of times rename has blocked due to LSQ full
-system.cpu.rename.RENAME:RenameLookups          26359                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenameLookups          26356                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           20748                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedInsts           20731                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands        15612                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RenamedOperands        15606                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles               3480                       # Number of cycles rename is running
+system.cpu.rename.RENAME:RunCycles               3494                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles            1498                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:SquashCycles            1511                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:UnblockCycles            744                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UnblockCycles            761                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UndoneMaps              7510                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:UndoneMaps              7504                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:serializeStallCycles          517                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:serializeStallCycles          521                       # count of cycles rename stalled for serializing inst
 system.cpu.rename.RENAME:serializingInsts           48                       # count of serializing insts renamed
-system.cpu.rename.RENAME:skidInsts               2147                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:skidInsts               2159                       # count of insts added to the skid buffer
 system.cpu.rename.RENAME:tempSerializingInsts           37                       # count of temporary serializing insts renamed
-system.cpu.timesIdled                               2                       # Number of times that the entire CPU went into an idle state and unscheduled itself
+system.cpu.timesIdled                               3                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload0.PROG:num_syscalls             17                       # Number of system calls
 system.cpu.workload1.PROG:num_syscalls             17                       # Number of system calls
--- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
+++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout
@ -7,9 +7,9 @@ The Regents of The University of Michigan
 All Rights Reserved
-M5 compiled Jun 10 2007 14:06:20
+M5 compiled Jun 21 2007 21:25:27
-M5 started Sun Jun 10 14:22:38 2007
+M5 started Fri Jun 22 00:04:51 2007
-M5 executing on iceaxe
+M5 executing on zizzer.eecs.umich.edu
-command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing
+command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 5490000 because target called exit()
+Exiting @ tick 5491500 because target called exit()
--- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini
@ -21,6 +21,7 @@ SQEntries=32
 SSITSize=1024
 activity=0
 backComSize=5
 cachePorts=200
 choiceCtrBits=2
 choicePredictorSize=8192
 clock=500
@ -74,6 +75,15 @@ renameToFetchDelay=1
 renameToIEWDelay=2
 renameToROBDelay=1
 renameWidth=8
 smtCommitPolicy=RoundRobin
 smtFetchPolicy=SingleThread
 smtIQPolicy=Partitioned
 smtIQThreshold=100
 smtLSQPolicy=Partitioned
 smtLSQThreshold=100
 smtNumFetchingThreads=1
 smtROBPolicy=Partitioned
 smtROBThreshold=100
 squashWidth=8
 system=system
 trapLatency=13
@ -86,6 +96,7 @@ icache_port=system.cpu.icache.cpu_side
 [system.cpu.dcache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@ -99,7 +110,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@ -261,6 +272,7 @@ opLat=3
 [system.cpu.icache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@ -274,7 +286,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@ -299,6 +311,7 @@ mem_side=system.cpu.toL2Bus.port[0]
 [system.cpu.l2cache]
 type=BaseCache
 adaptive_compression=false
 addr_range=0:18446744073709551615
 assoc=2
 block_size=64
 compressed_bus=false
@ -312,7 +325,7 @@ prefetch_access=false
 prefetch_cache_check_push=true
 prefetch_data_accesses_only=false
 prefetch_degree=1
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_miss=false
 prefetch_past_page=false
 prefetch_policy=none
@ -366,7 +379,7 @@ bus_id=0
 clock=1000
 responder_set=false
 width=64
-port=system.physmem.port system.cpu.l2cache.mem_side
+port=system.physmem.port[0] system.cpu.l2cache.mem_side
 [system.physmem]
 type=PhysicalMemory
--- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out
@ -275,7 +275,7 @@ prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
@ -312,7 +312,7 @@ prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
@ -349,7 +349,7 @@ prefetch_access=false
 prefetcher_size=100
 prefetch_past_page=false
 prefetch_serial_squash=false
-prefetch_latency=10
+prefetch_latency=10000
 prefetch_degree=1
 prefetch_policy=none
 prefetch_cache_check_push=true
--- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt
@ -1,40 +1,40 @@
 ---------- Begin Simulation Statistics ----------
 global.BPredUnit.BTBCorrect                         0                       # Number of correct BTB predictions (this stat may not work properly.
-global.BPredUnit.BTBHits                         2726                       # Number of BTB hits
+global.BPredUnit.BTBHits                         2589                       # Number of BTB hits
-global.BPredUnit.BTBLookups                      7230                       # Number of BTB lookups
+global.BPredUnit.BTBLookups                      6396                       # Number of BTB lookups
 global.BPredUnit.RASInCorrect                       0                       # Number of incorrect RAS predictions.
-global.BPredUnit.condIncorrect                   2062                       # Number of conditional branches incorrect
+global.BPredUnit.condIncorrect                   2002                       # Number of conditional branches incorrect
-global.BPredUnit.condPredicted                   7954                       # Number of conditional branches predicted
+global.BPredUnit.condPredicted                   6955                       # Number of conditional branches predicted
-global.BPredUnit.lookups                         7954                       # Number of BP lookups
+global.BPredUnit.lookups                         6955                       # Number of BP lookups
 global.BPredUnit.usedRAS                            0                       # Number of times the RAS was used to get a target.
-host_inst_rate                                  37089                       # Simulator instruction rate (inst/s)
+host_inst_rate                                  33806                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 154932                       # Number of bytes of host memory used
+host_mem_usage                                 154936                       # Number of bytes of host memory used
-host_seconds                                     0.30                       # Real time elapsed on the host
+host_seconds                                     0.32                       # Real time elapsed on the host
-host_tick_rate                               53780846                       # Simulator tick rate (ticks/s)
+host_tick_rate                               48256964                       # Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads                 10                       # Number of conflicting loads.
 memdepunit.memDep.conflictingStores                 0                       # Number of conflicting stores.
-memdepunit.memDep.insertedLoads                  3198                       # Number of loads inserted to the mem dependence unit.
+memdepunit.memDep.insertedLoads                  2999                       # Number of loads inserted to the mem dependence unit.
-memdepunit.memDep.insertedStores                 2970                       # Number of stores inserted to the mem dependence unit.
+memdepunit.memDep.insertedStores                 2872                       # Number of stores inserted to the mem dependence unit.
 sim_freq                                 1000000000000                       # Frequency of simulated ticks
 sim_insts                                       10976                       # Number of instructions simulated
 sim_seconds                                  0.000016                       # Number of seconds simulated
-sim_ticks                                    15931500                       # Number of ticks simulated
+sim_ticks                                    15682500                       # Number of ticks simulated
 system.cpu.commit.COM:branches                   2152                       # Number of branches committed
-system.cpu.commit.COM:bw_lim_events               146                       # number cycles where commit BW limit reached
+system.cpu.commit.COM:bw_lim_events               199                       # number cycles where commit BW limit reached
 system.cpu.commit.COM:bw_limited                    0                       # number of insts not committed due to BW limits
 system.cpu.commit.COM:committed_per_cycle.start_dist                     # Number of insts commited each cycle
-system.cpu.commit.COM:committed_per_cycle.samples        28801                      
+system.cpu.commit.COM:committed_per_cycle.samples        28561                      
 system.cpu.commit.COM:committed_per_cycle.min_value            0                      
-                               0        23411   8128.54%           
+                               0        23237   8135.92%           
-                               1         2862    993.72%           
+                               1         2855    999.61%           
-                               2         1174    407.62%           
+                               2         1132    396.34%           
-                               3          608    211.10%           
+                               3          638    223.38%           
-                               4          359    124.65%           
+                               4          273     95.58%           
-                               5          123     42.71%           
+                               5          119     41.67%           
-                               6          103     35.76%           
+                               6           92     32.21%           
-                               7           15      5.21%           
+                               7           16      5.60%           
-                               8          146     50.69%           
+                               8          199     69.68%           
 system.cpu.commit.COM:committed_per_cycle.max_value            8                      
 system.cpu.commit.COM:committed_per_cycle.end_dist
@ -43,71 +43,71 @@ system.cpu.commit.COM:loads                      1462                       # Nu
 system.cpu.commit.COM:membars                       0                       # Number of memory barriers committed
 system.cpu.commit.COM:refs                       2760                       # Number of memory references committed
 system.cpu.commit.COM:swp_count                     0                       # Number of s/w prefetches committed
-system.cpu.commit.branchMispredicts              2062                       # The number of times a branch was mispredicted
+system.cpu.commit.branchMispredicts              2002                       # The number of times a branch was mispredicted
 system.cpu.commit.commitCommittedInsts          10976                       # The number of committed instructions
-system.cpu.commit.commitNonSpecStalls             327                       # The number of times commit has been forced to stall to communicate backwards
+system.cpu.commit.commitNonSpecStalls             329                       # The number of times commit has been forced to stall to communicate backwards
-system.cpu.commit.commitSquashedInsts           14297                       # The number of squashed insts skipped by commit
+system.cpu.commit.commitSquashedInsts           12659                       # The number of squashed insts skipped by commit
 system.cpu.committedInsts                       10976                       # Number of Instructions Simulated
 system.cpu.committedInsts_total                 10976                       # Number of Instructions Simulated
-system.cpu.cpi                               2.903061                       # CPI: Cycles Per Instruction
+system.cpu.cpi                               2.857598                       # CPI: Cycles Per Instruction
-system.cpu.cpi_total                         2.903061                       # CPI: Total CPI of All Threads
+system.cpu.cpi_total                         2.857598                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               2743                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_accesses               2313                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency  5392.857143                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_miss_latency  5451.807229                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency  4696.969697                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency  4719.696970                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   2659                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_hits                   2230                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency         453000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency         452500                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.030623                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_miss_rate          0.035884                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                   84                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_misses                   83                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits                18                       # number of ReadReq MSHR hits
+system.cpu.dcache.ReadReq_mshr_hits                17                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency       310000                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency       311500                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.024061                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.028534                       # mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses              66                       # number of ReadReq MSHR misses
 system.cpu.dcache.SwapReq_accesses                  6                       # number of SwapReq accesses(hits+misses)
 system.cpu.dcache.SwapReq_hits                      6                       # number of SwapReq hits
 system.cpu.dcache.WriteReq_accesses              1292                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency         5505                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency  5522.613065                       # average WriteReq miss latency
 system.cpu.dcache.WriteReq_avg_mshr_miss_latency  4802.325581                       # average WriteReq mshr miss latency
-system.cpu.dcache.WriteReq_hits                  1092                       # number of WriteReq hits
+system.cpu.dcache.WriteReq_hits                  1093                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       1101000                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency       1099000                       # number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.154799                       # miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_miss_rate         0.154025                       # miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses                 200                       # number of WriteReq misses
+system.cpu.dcache.WriteReq_misses                 199                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_hits              114                       # number of WriteReq MSHR hits
+system.cpu.dcache.WriteReq_mshr_hits              113                       # number of WriteReq MSHR hits
 system.cpu.dcache.WriteReq_mshr_miss_latency       413000                       # number of WriteReq MSHR miss cycles
 system.cpu.dcache.WriteReq_mshr_miss_rate     0.066563                       # mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses             86                       # number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  24.717105                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                  21.901316                       # Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                4035                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_accesses                3605                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency  5471.830986                       # average overall miss latency
+system.cpu.dcache.demand_avg_miss_latency  5501.773050                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency  4756.578947                       # average overall mshr miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency  4766.447368                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    3751                       # number of demand (read+write) hits
+system.cpu.dcache.demand_hits                    3323                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         1554000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency         1551500                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.070384                       # miss rate for demand accesses
+system.cpu.dcache.demand_miss_rate           0.078225                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                   284                       # number of demand (read+write) misses
+system.cpu.dcache.demand_misses                   282                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                132                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_hits                130                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency       723000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency       724500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.037670                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_rate      0.042164                       # mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses              152                       # number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               4035                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_accesses               3605                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency  5471.830986                       # average overall miss latency
+system.cpu.dcache.overall_avg_miss_latency  5501.773050                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency  4756.578947                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency  4766.447368                       # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   3751                       # number of overall hits
+system.cpu.dcache.overall_hits                   3323                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        1554000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency        1551500                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.070384                       # miss rate for overall accesses
+system.cpu.dcache.overall_miss_rate          0.078225                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                  284                       # number of overall misses
+system.cpu.dcache.overall_misses                  282                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits               132                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_hits               130                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency       723000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency       724500                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.037670                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_rate     0.042164                       # mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses             152                       # number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@ -123,85 +123,85 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.dcache.replacements                      0                       # number of replacements
 system.cpu.dcache.sampled_refs                    152                       # Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                113.439038                       # Cycle average of tags in use
+system.cpu.dcache.tagsinuse                113.060803                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     3757                       # Total number of references to valid blocks.
+system.cpu.dcache.total_refs                     3329                       # Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                        0                       # number of writebacks
-system.cpu.decode.DECODE:BlockedCycles           4602                       # Number of cycles decode is blocked
+system.cpu.decode.DECODE:BlockedCycles           3802                       # Number of cycles decode is blocked
-system.cpu.decode.DECODE:DecodedInsts           38937                       # Number of instructions handled by decode
+system.cpu.decode.DECODE:DecodedInsts           34098                       # Number of instructions handled by decode
-system.cpu.decode.DECODE:IdleCycles             16098                       # Number of cycles decode is idle
+system.cpu.decode.DECODE:IdleCycles             15413                       # Number of cycles decode is idle
-system.cpu.decode.DECODE:RunCycles               7883                       # Number of cycles decode is running
+system.cpu.decode.DECODE:RunCycles               9282                       # Number of cycles decode is running
-system.cpu.decode.DECODE:SquashCycles            3063                       # Number of cycles decode is squashing
+system.cpu.decode.DECODE:SquashCycles            2804                       # Number of cycles decode is squashing
-system.cpu.decode.DECODE:UnblockCycles            218                       # Number of cycles decode is unblocking
+system.cpu.decode.DECODE:UnblockCycles             64                       # Number of cycles decode is unblocking
-system.cpu.fetch.Branches                        7954                       # Number of branches that fetch encountered
+system.cpu.fetch.Branches                        6955                       # Number of branches that fetch encountered
-system.cpu.fetch.CacheLines                      4933                       # Number of cache lines fetched
+system.cpu.fetch.CacheLines                      4655                       # Number of cache lines fetched
-system.cpu.fetch.Cycles                         14166                       # Number of cycles fetch has run and was not squashing or blocked
+system.cpu.fetch.Cycles                         15062                       # Number of cycles fetch has run and was not squashing or blocked
-system.cpu.fetch.IcacheSquashes                   565                       # Number of outstanding Icache misses that were squashed
+system.cpu.fetch.IcacheSquashes                   489                       # Number of outstanding Icache misses that were squashed
-system.cpu.fetch.Insts                          44421                       # Number of instructions fetch has processed
+system.cpu.fetch.Insts                          38520                       # Number of instructions fetch has processed
-system.cpu.fetch.SquashCycles                    2121                       # Number of cycles fetch has spent squashing
+system.cpu.fetch.SquashCycles                    2061                       # Number of cycles fetch has spent squashing
-system.cpu.fetch.branchRate                  0.249623                       # Number of branch fetches per cycle
+system.cpu.fetch.branchRate                  0.221744                       # Number of branch fetches per cycle
-system.cpu.fetch.icacheStallCycles               4933                       # Number of cycles fetch is stalled on an Icache miss
+system.cpu.fetch.icacheStallCycles               4655                       # Number of cycles fetch is stalled on an Icache miss
-system.cpu.fetch.predictedBranches               2726                       # Number of branches that fetch has predicted taken
+system.cpu.fetch.predictedBranches               2589                       # Number of branches that fetch has predicted taken
-system.cpu.fetch.rate                        1.394081                       # Number of inst fetches per cycle
+system.cpu.fetch.rate                        1.228121                       # Number of inst fetches per cycle
 system.cpu.fetch.rateDist.start_dist                           # Number of instructions fetched each cycle (Total)
-system.cpu.fetch.rateDist.samples               31864                      
+system.cpu.fetch.rateDist.samples               31365                      
 system.cpu.fetch.rateDist.min_value                 0                      
-                               0        22632   7102.69%           
+                               0        20959   6682.29%           
-                               1         2187    686.35%           
+                               1         4502   1435.36%           
-                               2          562    176.37%           
+                               2          577    183.96%           
-                               3          869    272.72%           
+                               3          682    217.44%           
-                               4          521    163.51%           
+                               4          776    247.41%           
-                               5          770    241.65%           
+                               5          629    200.54%           
-                               6          886    278.06%           
+                               6          581    185.24%           
-                               7          243     76.26%           
+                               7          189     60.26%           
-                               8         3194   1002.39%           
+                               8         2470    787.50%           
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
-system.cpu.icache.ReadReq_accesses               4933                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_accesses               4655                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency  5310.666667                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_miss_latency  5308.823529                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency  4396.174863                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency  4382.513661                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   4558                       # number of ReadReq hits
+system.cpu.icache.ReadReq_hits                   4281                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency        1991500                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_latency        1985500                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.076019                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_miss_rate          0.080344                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  375                       # number of ReadReq misses
+system.cpu.icache.ReadReq_misses                  374                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits                 9                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_hits                 8                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency      1609000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_latency      1604000                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.074194                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_miss_rate     0.078625                       # mshr miss rate for ReadReq accesses
 system.cpu.icache.ReadReq_mshr_misses             366                       # number of ReadReq MSHR misses
 system.cpu.icache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
 system.cpu.icache.avg_blocked_cycles_no_targets <err: div-0>                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                  12.453552                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                  11.696721                       # Average number of references to valid blocks.
 system.cpu.icache.blocked_no_mshrs                  0                       # number of cycles access was blocked
 system.cpu.icache.blocked_no_targets                0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets            0                       # number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                4933                       # number of demand (read+write) accesses
+system.cpu.icache.demand_accesses                4655                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency  5310.666667                       # average overall miss latency
+system.cpu.icache.demand_avg_miss_latency  5308.823529                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency  4396.174863                       # average overall mshr miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency  4382.513661                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    4558                       # number of demand (read+write) hits
+system.cpu.icache.demand_hits                    4281                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency         1991500                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_latency         1985500                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.076019                       # miss rate for demand accesses
+system.cpu.icache.demand_miss_rate           0.080344                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   375                       # number of demand (read+write) misses
+system.cpu.icache.demand_misses                   374                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                  9                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_hits                  8                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency      1609000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_latency      1604000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.074194                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_rate      0.078625                       # mshr miss rate for demand accesses
 system.cpu.icache.demand_mshr_misses              366                       # number of demand (read+write) MSHR misses
 system.cpu.icache.fast_writes                       0                       # number of fast writes performed
 system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
 system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               4933                       # number of overall (read+write) accesses
+system.cpu.icache.overall_accesses               4655                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency  5310.666667                       # average overall miss latency
+system.cpu.icache.overall_avg_miss_latency  5308.823529                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency  4396.174863                       # average overall mshr miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency  4382.513661                       # average overall mshr miss latency
 system.cpu.icache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   4558                       # number of overall hits
+system.cpu.icache.overall_hits                   4281                       # number of overall hits
-system.cpu.icache.overall_miss_latency        1991500                       # number of overall miss cycles
+system.cpu.icache.overall_miss_latency        1985500                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.076019                       # miss rate for overall accesses
+system.cpu.icache.overall_miss_rate          0.080344                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  375                       # number of overall misses
+system.cpu.icache.overall_misses                  374                       # number of overall misses
-system.cpu.icache.overall_mshr_hits                 9                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_hits                 8                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency      1609000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_latency      1604000                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.074194                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_rate     0.078625                       # mshr miss rate for overall accesses
 system.cpu.icache.overall_mshr_misses             366                       # number of overall MSHR misses
 system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
 system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
@ -217,59 +217,59 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.icache.replacements                      1                       # number of replacements
 system.cpu.icache.sampled_refs                    366                       # Sample count of references to valid blocks.
 system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                233.760012                       # Cycle average of tags in use
+system.cpu.icache.tagsinuse                232.692086                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     4558                       # Total number of references to valid blocks.
+system.cpu.icache.total_refs                     4281                       # Total number of references to valid blocks.
 system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
 system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.idleCycles                             499                       # Total number of cycles that the CPU has spent unscheduled due to idling
+system.cpu.idleCycles                            1997                       # Total number of cycles that the CPU has spent unscheduled due to idling
-system.cpu.iew.EXEC:branches                     3548                       # Number of branches executed
+system.cpu.iew.EXEC:branches                     3040                       # Number of branches executed
 system.cpu.iew.EXEC:nop                             0                       # number of nop insts executed
-system.cpu.iew.EXEC:rate                     0.670318                       # Inst execution rate
+system.cpu.iew.EXEC:rate                     0.582082                       # Inst execution rate
-system.cpu.iew.EXEC:refs                         5385                       # number of memory reference insts executed
+system.cpu.iew.EXEC:refs                         4490                       # number of memory reference insts executed
-system.cpu.iew.EXEC:stores                       2502                       # Number of stores executed
+system.cpu.iew.EXEC:stores                       2077                       # Number of stores executed
 system.cpu.iew.EXEC:swp                             0                       # number of swp insts executed
-system.cpu.iew.WB:consumers                     10159                       # num instructions consuming a value
+system.cpu.iew.WB:consumers                      8997                       # num instructions consuming a value
-system.cpu.iew.WB:count                         20199                       # cumulative count of insts written-back
+system.cpu.iew.WB:count                         17565                       # cumulative count of insts written-back
-system.cpu.iew.WB:fanout                     0.790629                       # average fanout of values written-back
+system.cpu.iew.WB:fanout                     0.831833                       # average fanout of values written-back
 system.cpu.iew.WB:penalized                         0                       # number of instrctions required to write to 'other' IQ
 system.cpu.iew.WB:penalized_rate                    0                       # fraction of instructions written-back that wrote to 'other' IQ
-system.cpu.iew.WB:producers                      8032                       # num instructions producing a value
+system.cpu.iew.WB:producers                      7484                       # num instructions producing a value
-system.cpu.iew.WB:rate                       0.633913                       # insts written-back per cycle
+system.cpu.iew.WB:rate                       0.560019                       # insts written-back per cycle
-system.cpu.iew.WB:sent                          20448                       # cumulative count of insts sent to commit
+system.cpu.iew.WB:sent                          17724                       # cumulative count of insts sent to commit
-system.cpu.iew.branchMispredicts                 2568                       # Number of branch mispredicts detected at execute
+system.cpu.iew.branchMispredicts                 2199                       # Number of branch mispredicts detected at execute
 system.cpu.iew.iewBlockCycles                       0                       # Number of cycles IEW is blocking
-system.cpu.iew.iewDispLoadInsts                  3198                       # Number of dispatched load instructions
+system.cpu.iew.iewDispLoadInsts                  2999                       # Number of dispatched load instructions
-system.cpu.iew.iewDispNonSpecInsts                610                       # Number of dispatched non-speculative instructions
+system.cpu.iew.iewDispNonSpecInsts                609                       # Number of dispatched non-speculative instructions
-system.cpu.iew.iewDispSquashedInsts              2750                       # Number of squashed instructions skipped by dispatch
+system.cpu.iew.iewDispSquashedInsts              1287                       # Number of squashed instructions skipped by dispatch
-system.cpu.iew.iewDispStoreInsts                 2970                       # Number of dispatched store instructions
+system.cpu.iew.iewDispStoreInsts                 2872                       # Number of dispatched store instructions
-system.cpu.iew.iewDispatchedInsts               25274                       # Number of instructions dispatched to IQ
+system.cpu.iew.iewDispatchedInsts               23636                       # Number of instructions dispatched to IQ
-system.cpu.iew.iewExecLoadInsts                  2883                       # Number of load instructions executed
+system.cpu.iew.iewExecLoadInsts                  2413                       # Number of load instructions executed
-system.cpu.iew.iewExecSquashedInsts              1463                       # Number of squashed instructions skipped in execute
+system.cpu.iew.iewExecSquashedInsts              3118                       # Number of squashed instructions skipped in execute
-system.cpu.iew.iewExecutedInsts                 21359                       # Number of executed instructions
+system.cpu.iew.iewExecutedInsts                 18257                       # Number of executed instructions
 system.cpu.iew.iewIQFullEvents                      0                       # Number of times the IQ has become full, causing a stall
 system.cpu.iew.iewIdleCycles                        0                       # Number of cycles IEW is idle
 system.cpu.iew.iewLSQFullEvents                     0                       # Number of times the LSQ has become full, causing a stall
-system.cpu.iew.iewSquashCycles                   3063                       # Number of cycles IEW is squashing
+system.cpu.iew.iewSquashCycles                   2804                       # Number of cycles IEW is squashing
 system.cpu.iew.iewUnblockCycles                     0                       # Number of cycles IEW is unblocking
 system.cpu.iew.lsq.thread.0.blockedLoads            0                       # Number of blocked loads due to partial load-store forwarding
 system.cpu.iew.lsq.thread.0.cacheBlocked            0                       # Number of times an access to memory failed due to the cache being blocked
-system.cpu.iew.lsq.thread.0.forwLoads              48                       # Number of loads that had data forwarded from stores
+system.cpu.iew.lsq.thread.0.forwLoads              43                       # Number of loads that had data forwarded from stores
 system.cpu.iew.lsq.thread.0.ignoredResponses            8                       # Number of memory responses ignored because the instruction is squashed
 system.cpu.iew.lsq.thread.0.invAddrLoads            0                       # Number of loads ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.invAddrSwpfs            0                       # Number of software prefetches ignored due to an invalid address
 system.cpu.iew.lsq.thread.0.memOrderViolation           52                       # Number of memory ordering violations
 system.cpu.iew.lsq.thread.0.rescheduledLoads            0                       # Number of loads that were rescheduled
-system.cpu.iew.lsq.thread.0.squashedLoads         1736                       # Number of loads squashed
+system.cpu.iew.lsq.thread.0.squashedLoads         1537                       # Number of loads squashed
-system.cpu.iew.lsq.thread.0.squashedStores         1672                       # Number of stores squashed
+system.cpu.iew.lsq.thread.0.squashedStores         1574                       # Number of stores squashed
 system.cpu.iew.memOrderViolationEvents             52                       # Number of memory order violations
-system.cpu.iew.predictedNotTakenIncorrect          958                       # Number of branches that were predicted not taken incorrectly
+system.cpu.iew.predictedNotTakenIncorrect          682                       # Number of branches that were predicted not taken incorrectly
-system.cpu.iew.predictedTakenIncorrect           1610                       # Number of branches that were predicted taken incorrectly
+system.cpu.iew.predictedTakenIncorrect           1517                       # Number of branches that were predicted taken incorrectly
-system.cpu.ipc                               0.344464                       # IPC: Instructions Per Cycle
+system.cpu.ipc                               0.349944                       # IPC: Instructions Per Cycle
-system.cpu.ipc_total                         0.344464                       # IPC: Total IPC of All Threads
+system.cpu.ipc_total                         0.349944                       # IPC: Total IPC of All Threads
-system.cpu.iq.ISSUE:FU_type_0                   22822                       # Type of FU issued
+system.cpu.iq.ISSUE:FU_type_0                   21375                       # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.start_dist
-                          (null)         1826      8.00%            # Type of FU issued
+                      No_OpClass         1750      8.19%            # Type of FU issued
-                          IntAlu        15247     66.81%            # Type of FU issued
+                          IntAlu        14209     66.47%            # Type of FU issued
                         IntMult            0      0.00%            # Type of FU issued
                          IntDiv            0      0.00%            # Type of FU issued
                        FloatAdd            0      0.00%            # Type of FU issued
@ -278,16 +278,16 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist
                       FloatMult            0      0.00%            # Type of FU issued
                        FloatDiv            0      0.00%            # Type of FU issued
                       FloatSqrt            0      0.00%            # Type of FU issued
-                         MemRead         3042     13.33%            # Type of FU issued
+                         MemRead         2832     13.25%            # Type of FU issued
-                        MemWrite         2707     11.86%            # Type of FU issued
+                        MemWrite         2584     12.09%            # Type of FU issued
                       IprAccess            0      0.00%            # Type of FU issued
                    InstPrefetch            0      0.00%            # Type of FU issued
 system.cpu.iq.ISSUE:FU_type_0.end_dist
-system.cpu.iq.ISSUE:fu_busy_cnt                   190                       # FU busy when requested
+system.cpu.iq.ISSUE:fu_busy_cnt                   160                       # FU busy when requested
-system.cpu.iq.ISSUE:fu_busy_rate             0.008325                       # FU busy rate (busy events/executed inst)
+system.cpu.iq.ISSUE:fu_busy_rate             0.007485                       # FU busy rate (busy events/executed inst)
 system.cpu.iq.ISSUE:fu_full.start_dist
-                          (null)            0      0.00%            # attempts to use FU when none available
+                      No_OpClass            0      0.00%            # attempts to use FU when none available
-                          IntAlu           50     26.32%            # attempts to use FU when none available
+                          IntAlu           27     16.88%            # attempts to use FU when none available
                         IntMult            0      0.00%            # attempts to use FU when none available
                          IntDiv            0      0.00%            # attempts to use FU when none available
                        FloatAdd            0      0.00%            # attempts to use FU when none available
@ -296,41 +296,41 @@ system.cpu.iq.ISSUE:fu_full.start_dist
                       FloatMult            0      0.00%            # attempts to use FU when none available
                        FloatDiv            0      0.00%            # attempts to use FU when none available
                       FloatSqrt            0      0.00%            # attempts to use FU when none available
-                         MemRead           25     13.16%            # attempts to use FU when none available
+                         MemRead           23     14.37%            # attempts to use FU when none available
-                        MemWrite          115     60.53%            # attempts to use FU when none available
+                        MemWrite          110     68.75%            # attempts to use FU when none available
                       IprAccess            0      0.00%            # attempts to use FU when none available
                    InstPrefetch            0      0.00%            # attempts to use FU when none available
 system.cpu.iq.ISSUE:fu_full.end_dist
 system.cpu.iq.ISSUE:issued_per_cycle.start_dist                     # Number of insts issued each cycle
-system.cpu.iq.ISSUE:issued_per_cycle.samples        31864                      
+system.cpu.iq.ISSUE:issued_per_cycle.samples        31365                      
 system.cpu.iq.ISSUE:issued_per_cycle.min_value            0                      
-                               0        22879   7180.20%           
+                               0        21827   6959.03%           
-                               1         3824   1200.10%           
+                               1         4212   1342.90%           
-                               2         1304    409.24%           
+                               2         2084    664.43%           
-                               3         1251    392.61%           
+                               3         1568    499.92%           
-                               4         1252    392.92%           
+                               4          766    244.22%           
-                               5          751    235.69%           
+                               5          454    144.75%           
-                               6          414    129.93%           
+                               6          283     90.23%           
-                               7          122     38.29%           
+                               7          109     34.75%           
-                               8           67     21.03%           
+                               8           62     19.77%           
 system.cpu.iq.ISSUE:issued_per_cycle.max_value            8                      
 system.cpu.iq.ISSUE:issued_per_cycle.end_dist
-system.cpu.iq.ISSUE:rate                     0.716231                       # Inst issue rate
+system.cpu.iq.ISSUE:rate                     0.681492                       # Inst issue rate
-system.cpu.iq.iqInstsAdded                      24664                       # Number of instructions added to the IQ (excludes non-spec)
+system.cpu.iq.iqInstsAdded                      23027                       # Number of instructions added to the IQ (excludes non-spec)
-system.cpu.iq.iqInstsIssued                     22822                       # Number of instructions issued
+system.cpu.iq.iqInstsIssued                     21375                       # Number of instructions issued
-system.cpu.iq.iqNonSpecInstsAdded                 610                       # Number of non-speculative instructions added to the IQ
+system.cpu.iq.iqNonSpecInstsAdded                 609                       # Number of non-speculative instructions added to the IQ
-system.cpu.iq.iqSquashedInstsExamined           11119                       # Number of squashed instructions iterated over during squash; mainly for profiling
+system.cpu.iq.iqSquashedInstsExamined           10843                       # Number of squashed instructions iterated over during squash; mainly for profiling
-system.cpu.iq.iqSquashedInstsIssued                83                       # Number of squashed instructions issued
+system.cpu.iq.iqSquashedInstsIssued                99                       # Number of squashed instructions issued
-system.cpu.iq.iqSquashedNonSpecRemoved            283                       # Number of squashed non-spec instructions that were removed
+system.cpu.iq.iqSquashedNonSpecRemoved            280                       # Number of squashed non-spec instructions that were removed
-system.cpu.iq.iqSquashedOperandsExamined         5685                       # Number of squashed operands that are examined and possibly removed from graph
+system.cpu.iq.iqSquashedOperandsExamined         7823                       # Number of squashed operands that are examined and possibly removed from graph
 system.cpu.l2cache.ReadReq_accesses               514                       # number of ReadReq accesses(hits+misses)
 system.cpu.l2cache.ReadReq_avg_miss_latency  4458.171206                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2373.540856                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency  2375.486381                       # average ReadReq mshr miss latency
 system.cpu.l2cache.ReadReq_miss_latency       2291500                       # number of ReadReq miss cycles
 system.cpu.l2cache.ReadReq_miss_rate                1                       # miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_misses                 514                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency      1220000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency      1221000                       # number of ReadReq MSHR miss cycles
 system.cpu.l2cache.ReadReq_mshr_miss_rate            1                       # mshr miss rate for ReadReq accesses
 system.cpu.l2cache.ReadReq_mshr_misses            514                       # number of ReadReq MSHR misses
 system.cpu.l2cache.avg_blocked_cycles_no_mshrs <err: div-0>                       # average number of cycles each access was blocked
@ -343,13 +343,13 @@ system.cpu.l2cache.blocked_cycles_no_targets            0
 system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
 system.cpu.l2cache.demand_accesses                514                       # number of demand (read+write) accesses
 system.cpu.l2cache.demand_avg_miss_latency  4458.171206                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency  2373.540856                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency  2375.486381                       # average overall mshr miss latency
 system.cpu.l2cache.demand_hits                      0                       # number of demand (read+write) hits
 system.cpu.l2cache.demand_miss_latency        2291500                       # number of demand (read+write) miss cycles
 system.cpu.l2cache.demand_miss_rate                 1                       # miss rate for demand accesses
 system.cpu.l2cache.demand_misses                  514                       # number of demand (read+write) misses
 system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency      1220000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency      1221000                       # number of demand (read+write) MSHR miss cycles
 system.cpu.l2cache.demand_mshr_miss_rate            1                       # mshr miss rate for demand accesses
 system.cpu.l2cache.demand_mshr_misses             514                       # number of demand (read+write) MSHR misses
 system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
@ -357,14 +357,14 @@ system.cpu.l2cache.mshr_cap_events                  0                       # nu
 system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
 system.cpu.l2cache.overall_accesses               514                       # number of overall (read+write) accesses
 system.cpu.l2cache.overall_avg_miss_latency  4458.171206                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency  2373.540856                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency  2375.486381                       # average overall mshr miss latency
 system.cpu.l2cache.overall_avg_mshr_uncacheable_latency <err: div-0>                       # average overall mshr uncacheable latency
 system.cpu.l2cache.overall_hits                     0                       # number of overall hits
 system.cpu.l2cache.overall_miss_latency       2291500                       # number of overall miss cycles
 system.cpu.l2cache.overall_miss_rate                1                       # miss rate for overall accesses
 system.cpu.l2cache.overall_misses                 514                       # number of overall misses
 system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency      1220000                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency      1221000                       # number of overall MSHR miss cycles
 system.cpu.l2cache.overall_mshr_miss_rate            1                       # mshr miss rate for overall accesses
 system.cpu.l2cache.overall_mshr_misses            514                       # number of overall MSHR misses
 system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@ -381,26 +381,25 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss            0
 system.cpu.l2cache.replacements                     0                       # number of replacements
 system.cpu.l2cache.sampled_refs                   514                       # Sample count of references to valid blocks.
 system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               345.564898                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               344.125692                       # Cycle average of tags in use
 system.cpu.l2cache.total_refs                       0                       # Total number of references to valid blocks.
 system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
 system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                            31864                       # number of cpu cycles simulated
+system.cpu.numCycles                            31365                       # number of cpu cycles simulated
 system.cpu.rename.RENAME:CommittedMaps           9868                       # Number of HB maps that are committed
-system.cpu.rename.RENAME:IdleCycles             16082                       # Number of cycles rename is idle
+system.cpu.rename.RENAME:IdleCycles             16585                       # Number of cycles rename is idle
-system.cpu.rename.RENAME:RenameLookups          44650                       # Number of register rename lookups that rename has made
+system.cpu.rename.RENAME:RenameLookups          46161                       # Number of register rename lookups that rename has made
-system.cpu.rename.RENAME:RenamedInsts           29655                       # Number of instructions processed by rename
+system.cpu.rename.RENAME:RenamedInsts           26550                       # Number of instructions processed by rename
-system.cpu.rename.RENAME:RenamedOperands        24195                       # Number of destination operands rename has renamed
+system.cpu.rename.RENAME:RenamedOperands        21893                       # Number of destination operands rename has renamed
-system.cpu.rename.RENAME:RunCycles               7618                       # Number of cycles rename is running
+system.cpu.rename.RENAME:RunCycles               8196                       # Number of cycles rename is running
-system.cpu.rename.RENAME:SquashCycles            3063                       # Number of cycles rename is squashing
+system.cpu.rename.RENAME:SquashCycles            2804                       # Number of cycles rename is squashing
-system.cpu.rename.RENAME:SquashedInsts           8815                       # Number of squashed instructions processed by rename
+system.cpu.rename.RENAME:UnblockCycles            229                       # Number of cycles rename is unblocking
-system.cpu.rename.RENAME:UnblockCycles            684                       # Number of cycles rename is unblocking
+system.cpu.rename.RENAME:UndoneMaps             12025                       # Number of HB maps that are undone due to squashing
-system.cpu.rename.RENAME:UndoneMaps             14327                       # Number of HB maps that are undone due to squashing
+system.cpu.rename.RENAME:serializeStallCycles         3551                       # count of cycles rename stalled for serializing inst
-system.cpu.rename.RENAME:serializeStallCycles         3915                       # count of cycles rename stalled for serializing inst
+system.cpu.rename.RENAME:serializingInsts          628                       # count of serializing insts renamed
-system.cpu.rename.RENAME:serializingInsts          631                       # count of serializing insts renamed
+system.cpu.rename.RENAME:skidInsts               4297                       # count of insts added to the skid buffer
-system.cpu.rename.RENAME:skidInsts               4702                       # count of insts added to the skid buffer
+system.cpu.rename.RENAME:tempSerializingInsts          640                       # count of temporary serializing insts renamed
-system.cpu.rename.RENAME:tempSerializingInsts          623                       # count of temporary serializing insts renamed
+system.cpu.timesIdled                               3                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.timesIdled                               1                       # Number of times that the entire CPU went into an idle state and unscheduled itself
 system.cpu.workload.PROG:num_syscalls               8                       # Number of system calls
 ---------- End Simulation Statistics   ----------
--- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout
+++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout
@ -16,9 +16,9 @@ The Regents of The University of Michigan
 All Rights Reserved
-M5 compiled May 15 2007 13:02:31
+M5 compiled Jun 21 2007 21:15:48
-M5 started Tue May 15 17:00:06 2007
+M5 started Fri Jun 22 00:32:08 2007
 M5 executing on zizzer.eecs.umich.edu
 command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/02.insttest/sparc/linux/o3-timing tests/run.py quick/02.insttest/sparc/linux/o3-timing
 Global frequency set at 1000000000000 ticks per second
-Exiting @ tick 15931500 because target called exit()
+Exiting @ tick 15682500 because target called exit()