Large update of several parts of my code. The most notable change is the inclusion of a full-fledged load/store queue. At the moment it still has some issues running, but most of the code is hopefully close to the final version.

SConscript: arch/isa_parser.py: cpu/base_dyn_inst.cc: Remove OOO CPU stuff. arch/alpha/faults.hh: Add fake memory fault. This will be removed eventually. arch/alpha/isa_desc: Change EA comp and Mem accessor to be const StaticInstPtrs. cpu/base_dyn_inst.hh: Update read/write calls to use load queue and store queue indices. cpu/beta_cpu/alpha_dyn_inst.hh: Change to const StaticInst in the register accessors. cpu/beta_cpu/alpha_dyn_inst_impl.hh: Update syscall code with thread numbers. cpu/beta_cpu/alpha_full_cpu.hh: Alter some of the full system code so it will compile without errors. cpu/beta_cpu/alpha_full_cpu_builder.cc: Created a DerivAlphaFullCPU class so I can instantiate different CPUs that have different template parameters. cpu/beta_cpu/alpha_full_cpu_impl.hh: Update some of the full system code so it compiles. cpu/beta_cpu/alpha_params.hh: cpu/beta_cpu/fetch_impl.hh: Remove asid. cpu/beta_cpu/comm.hh: Remove global history field. cpu/beta_cpu/commit.hh: Comment out rename map. cpu/beta_cpu/commit_impl.hh: Update some of the full system code so it compiles. Also change it so that it handles memory instructions properly. cpu/beta_cpu/cpu_policy.hh: Removed IQ from the IEW template parameter to make it more uniform. cpu/beta_cpu/decode.hh: Add debug function. cpu/beta_cpu/decode_impl.hh: Slight updates for decode in the case where it causes a squash. cpu/beta_cpu/fetch.hh: cpu/beta_cpu/rob.hh: Comment out unneccessary code. cpu/beta_cpu/full_cpu.cc: Changed some of the full system code so it compiles. Updated exec contexts and so forth to hopefully make multithreading easier. cpu/beta_cpu/full_cpu.hh: Updated some of the full system code to make it compile. cpu/beta_cpu/iew.cc: Removed IQ from template parameter to IEW. cpu/beta_cpu/iew.hh: Removed IQ from template parameter to IEW. Updated IEW to recognize the Load/Store queue. cpu/beta_cpu/iew_impl.hh: New handling of memory instructions through the Load/Store queue. cpu/beta_cpu/inst_queue.hh: Updated comment. cpu/beta_cpu/inst_queue_impl.hh: Slightly different handling of memory instructions due to Load/Store queue. cpu/beta_cpu/regfile.hh: Updated full system code so it compiles. cpu/beta_cpu/rob_impl.hh: Moved some code around; no major functional changes. cpu/ooo_cpu/ooo_cpu.hh: Slight updates to OOO CPU; still does not work. cpu/static_inst.hh: Remove OOO CPU stuff. Change ea comp and mem acc to return const StaticInst. kern/kernel_stats.hh: Extra forward declares added due to compile error. --HG-- extra : convert_revision : 594a7cdbe57f6c2bda7d08856fcd864604a6238e
2005-05-03 10:56:47 -04:00 · 2005-05-03 10:56:47 -04:00 · 61d95de4c8
parent 6191d3e444
commit 61d95de4c8
34 changed files with 838 additions and 927 deletions
--- a/8
+++ b/8
@ -52,7 +52,6 @@ base_sources = Split('''
 	arch/alpha/full_cpu_exec.cc
 	arch/alpha/faults.cc
 	arch/alpha/isa_traits.cc
-        arch/alpha/ooo_cpu_exec.cc

 	base/circlebuf.cc
 	base/copyright.cc
@ -157,10 +156,6 @@ base_sources = Split('''
        cpu/full_cpu/iq/seznec/iq_seznec.cc
        cpu/full_cpu/iq/standard/iq_standard.cc
        cpu/inorder_cpu/inorder_cpu.cc
-        cpu/ooo_cpu/ea_list.cc
-        cpu/ooo_cpu/ooo_cpu.cc
-        cpu/ooo_cpu/ooo_dyn_inst.cc
-        cpu/ooo_cpu/ooo_sim_obj.cc
        cpu/sampling_cpu/sampling_cpu.cc
        cpu/simple_cpu/simple_cpu.cc
        cpu/trace/reader/mem_trace_reader.cc
@ -402,8 +397,7 @@ env.Command(Split('''arch/alpha/decoder.cc
 		     arch/alpha/fast_cpu_exec.cc
                     arch/alpha/simple_cpu_exec.cc
                     arch/alpha/inorder_cpu_exec.cc
-                     arch/alpha/full_cpu_exec.cc
-                     arch/alpha/ooo_cpu_exec.cc'''),
+                     arch/alpha/full_cpu_exec.cc'''),
            Split('''arch/alpha/isa_desc
 		     arch/isa_parser.py'''),
            '$SRCDIR/arch/isa_parser.py $SOURCE $TARGET.dir arch/alpha')
--- a/arch/alpha/faults.hh
+++ b/arch/alpha/faults.hh
@ -47,6 +47,7 @@ enum Fault {
    Fen_Fault,			// FP not-enabled fault
    Pal_Fault,			// call_pal S/W interrupt
    Integer_Overflow_Fault,
+    Fake_Mem_Fault,
    Num_Faults			// number of faults
 };

--- a/arch/alpha/isa_desc
+++ b/arch/alpha/isa_desc
@ -744,9 +744,9 @@ output header {{
 	/// Memory request flags.  See mem_req_base.hh.
        unsigned memAccessFlags;
 	/// Pointer to EAComp object.
-	StaticInstPtr<AlphaISA> eaCompPtr;
+	const StaticInstPtr<AlphaISA> eaCompPtr;
 	/// Pointer to MemAcc object.
-	StaticInstPtr<AlphaISA> memAccPtr;
+	const StaticInstPtr<AlphaISA> memAccPtr;

 	/// Constructor
 	Memory(const char *mnem, MachInst _machInst, OpClass __opClass,
@ -762,8 +762,8 @@ output header {{

      public:

-	StaticInstPtr<AlphaISA> &eaCompInst() { return eaCompPtr; }
-	StaticInstPtr<AlphaISA> &memAccInst() { return memAccPtr; }
+	const StaticInstPtr<AlphaISA> &eaCompInst() const { return eaCompPtr; }
+	const StaticInstPtr<AlphaISA> &memAccInst() const { return memAccPtr; }
    };

    /**
@ -2539,9 +2539,9 @@ decode OPCODE default Unknown::unknown() {
 		xc->syscall();
 	    }}, IsNonSpeculative);
 	    // Read uniq reg into ABI return value register (r0)
-	    0x9e: rduniq({{ R0 = Runiq; }}, IsNonSpeculative);
+	    0x9e: rduniq({{ R0 = Runiq; }});
 	    // Write uniq reg with value from ABI arg register (r16)
-	    0x9f: wruniq({{ Runiq = R16; }}, IsNonSpeculative);
+	    0x9f: wruniq({{ Runiq = R16; }});
 	}
    }
 #endif
--- a/arch/isa_parser.py
+++ b/arch/isa_parser.py
@ -642,9 +642,6 @@ CpuModel('FullCPU', 'full_cpu_exec.cc',
 CpuModel('AlphaFullCPU', 'alpha_full_cpu_exec.cc',
         '#include "cpu/beta_cpu/alpha_dyn_inst.hh"',
         { 'CPU_exec_context': 'AlphaDynInst<AlphaSimpleImpl>' })
-CpuModel('OoOCPU', 'ooo_cpu_exec.cc',
-         '#include "cpu/ooo_cpu/ooo_dyn_inst.hh"',
-         { 'CPU_exec_context': 'OoODynInst<OoOImpl>' })

 # Expand template with CPU-specific references into a dictionary with
 # an entry for each CPU model name.  The entry key is the model name
--- a/cpu/base_dyn_inst.cc
+++ b/cpu/base_dyn_inst.cc
@ -43,8 +43,6 @@
 #include "cpu/base_dyn_inst.hh"
 #include "cpu/beta_cpu/alpha_impl.hh"
 #include "cpu/beta_cpu/alpha_full_cpu.hh"
-#include "cpu/ooo_cpu/ooo_impl.hh"
-#include "cpu/ooo_cpu/ooo_cpu.hh"

 using namespace std;

@ -384,14 +382,9 @@ BaseDynInst<Impl>::eaSrcsReady()

 // Forward declaration...
 template class BaseDynInst<AlphaSimpleImpl>;
-template class BaseDynInst<OoOImpl>;

 template <>
 int
 BaseDynInst<AlphaSimpleImpl>::instcount = 0;

-template <>
-int
-BaseDynInst<OoOImpl>::instcount = 0;
-
 #endif // __CPU_BASE_DYN_INST_CC__
--- a/cpu/base_dyn_inst.hh
+++ b/cpu/base_dyn_inst.hh
@ -404,6 +404,10 @@ class BaseDynInst : public FastAlloc, public RefCounted
    const Addr &getEA() const { return instEffAddr; }
    bool doneEACalc() { return eaCalcDone; }
    bool eaSrcsReady();
+
+  public:
+    int16_t lqIdx;
+    int16_t sqIdx;
 };

 template<class Impl>
@ -419,6 +423,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
    // Record key MemReq parameters so we can generate another one
    // just like it for the timing access without calling translate()
    // again (which might mess up the TLB).
+    // Do I ever really need this? -KTL 3/05
    effAddr = req->vaddr;
    physEffAddr = req->paddr;
    memReqFlags = req->flags;
@ -433,7 +438,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
 #endif

    if (fault == No_Fault) {
-        fault = cpu->read(req, data);
+        fault = cpu->read(req, data, lqIdx);
    }
    else {
        // Return a fixed value to keep simulation deterministic even
@ -459,8 +464,8 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
        traceData->setData(data);
    }

-    storeSize = sizeof(T);
-    storeData = data;
+//    storeSize = sizeof(T);
+//    storeData = data;

    MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags);

@ -485,7 +490,7 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
 #endif

    if (fault == No_Fault) {
-        fault = cpu->write(req, data);
+        fault = cpu->write(req, data, sqIdx);
    }

    if (res) {
--- a/cpu/beta_cpu/alpha_dyn_inst.hh
+++ b/cpu/beta_cpu/alpha_dyn_inst.hh
@ -47,11 +47,10 @@ class AlphaDynInst : public BaseDynInst<Impl>
    /** BaseDynInst constructor given a static inst pointer. */
    AlphaDynInst(StaticInstPtr<AlphaISA> &_staticInst);

-    /** Executes the instruction.  Why the hell did I put this here? */
+    /** Executes the instruction.*/
    Fault execute()
    {
-        this->fault = this->staticInst->execute(this, this->traceData);
-        return this->fault;
+        return this->fault = this->staticInst->execute(this, this->traceData);
    }

  public:
@ -105,47 +104,47 @@ class AlphaDynInst : public BaseDynInst<Impl>
    // storage (which is pretty hard to imagine they would have reason
    // to do).

-    uint64_t readIntReg(StaticInst<ISA> *si, int idx)
+    uint64_t readIntReg(const StaticInst<ISA> *si, int idx)
    {
        return this->cpu->readIntReg(_srcRegIdx[idx]);
    }

-    float readFloatRegSingle(StaticInst<ISA> *si, int idx)
+    float readFloatRegSingle(const StaticInst<ISA> *si, int idx)
    {
        return this->cpu->readFloatRegSingle(_srcRegIdx[idx]);
    }

-    double readFloatRegDouble(StaticInst<ISA> *si, int idx)
+    double readFloatRegDouble(const StaticInst<ISA> *si, int idx)
    {
        return this->cpu->readFloatRegDouble(_srcRegIdx[idx]);
    }

-    uint64_t readFloatRegInt(StaticInst<ISA> *si, int idx)
+    uint64_t readFloatRegInt(const StaticInst<ISA> *si, int idx)
    {
        return this->cpu->readFloatRegInt(_srcRegIdx[idx]);
    }
    /** @todo: Make results into arrays so they can handle multiple dest
     *  registers.
     */
-    void setIntReg(StaticInst<ISA> *si, int idx, uint64_t val)
+    void setIntReg(const StaticInst<ISA> *si, int idx, uint64_t val)
    {
        this->cpu->setIntReg(_destRegIdx[idx], val);
        this->instResult.integer = val;
    }

-    void setFloatRegSingle(StaticInst<ISA> *si, int idx, float val)
+    void setFloatRegSingle(const StaticInst<ISA> *si, int idx, float val)
    {
        this->cpu->setFloatRegSingle(_destRegIdx[idx], val);
        this->instResult.fp = val;
    }

-    void setFloatRegDouble(StaticInst<ISA> *si, int idx, double val)
+    void setFloatRegDouble(const StaticInst<ISA> *si, int idx, double val)
    {
        this->cpu->setFloatRegDouble(_destRegIdx[idx], val);
        this->instResult.dbl = val;
    }

-    void setFloatRegInt(StaticInst<ISA> *si, int idx, uint64_t val)
+    void setFloatRegInt(const StaticInst<ISA> *si, int idx, uint64_t val)
    {
        this->cpu->setFloatRegInt(_destRegIdx[idx], val);
        this->instResult.integer = val;
--- a/cpu/beta_cpu/alpha_dyn_inst_impl.hh
+++ b/cpu/beta_cpu/alpha_dyn_inst_impl.hh
@ -129,7 +129,8 @@ template <class Impl>
 void
 AlphaDynInst<Impl>::syscall()
 {
-    this->cpu->syscall();
+    this->cpu->syscall(this->threadNumber);
+//    this->cpu->syscall();
 }
 #endif

--- a/cpu/beta_cpu/alpha_full_cpu.hh
+++ b/cpu/beta_cpu/alpha_full_cpu.hh
@ -28,8 +28,6 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
    void regStats();

 #ifdef FULL_SYSTEM
-    bool inPalMode();
-
    //Note that the interrupt stuff from the base CPU might be somewhat
    //ISA specific (ie NumInterruptLevels).  These functions might not
    //be needed in FullCPU though.
@ -106,13 +104,16 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
    }

 #ifdef FULL_SYSTEM
-    uint64_t *getIPR();
+    uint64_t *getIpr();
    uint64_t readIpr(int idx, Fault &fault);
    Fault setIpr(int idx, uint64_t val);
    int readIntrFlag();
    void setIntrFlag(int val);
    Fault hwrei();
-    bool inPalMode();
+    bool inPalMode() { return AlphaISA::PcPAL(this->regFile.readPC()); }
+    bool inPalMode(uint64_t PC)
+    { return AlphaISA::PcPAL(PC); }
+
    void trap(Fault fault);
    bool simPalCheck(int palFunc);

@ -153,7 +154,7 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
        }
    }

-    void syscall();
+    void syscall(short thread_num);
    void squashStages();

 #endif
@ -168,11 +169,13 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
    // Not sure this is used anywhere.
    void intr_post(RegFile *regs, Fault fault, Addr pc);
    // Actually used within exec files.  Implement properly.
-    void swap_palshadow(RegFile *regs, bool use_shadow);
+    void swapPALShadow(bool use_shadow);
    // Called by CPU constructor.  Can implement as I please.
    void initCPU(RegFile *regs);
    // Called by initCPU.  Implement as I please.
    void initIPRs(RegFile *regs);
+
+    void halt() { panic("Halt not implemented!\n"); }
 #endif


@ -193,6 +196,11 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
        return error;
    }

+    template <class T>
+    Fault read(MemReqPtr &req, T &data, int load_idx)
+    {
+        return this->iew.ldstQueue.read(req, data, load_idx);
+    }

    template <class T>
    Fault write(MemReqPtr &req, T &data)
@ -218,7 +226,7 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
                        std::cerr << "Warning: "
                                  << req->xc->storeCondFailures
                                  << " consecutive store conditional failures "
-                                  << "on cpu " << cpu_id
+                                  << "on cpu " << this->cpu_id
                                  << std::endl;
                    }
                    return No_Fault;
@ -232,8 +240,8 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
        // and all other stores (WH64?).  Unsuccessful Store
        // Conditionals would have returned above, and wouldn't fall
        // through.
-        for (int i = 0; i < system->execContexts.size(); i++){
-            cregs = &system->execContexts[i]->regs.miscRegs;
+        for (int i = 0; i < this->system->execContexts.size(); i++){
+            cregs = &this->system->execContexts[i]->regs.miscRegs;
            if ((cregs->lock_addr & ~0xf) == (req->paddr & ~0xf)) {
                cregs->lock_flag = false;
            }
@ -244,6 +252,12 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
        return this->mem->write(req, (T)htoa(data));
    }

+    template <class T>
+    Fault write(MemReqPtr &req, T &data, int store_idx)
+    {
+        return this->iew.ldstQueue.write(req, data, store_idx);
+    }
+
 };

 #endif // __CPU_BETA_CPU_ALPHA_FULL_CPU_HH__
--- a/cpu/beta_cpu/alpha_full_cpu_builder.cc
+++ b/cpu/beta_cpu/alpha_full_cpu_builder.cc
@ -33,8 +33,17 @@
 #include "mem/functional_mem/functional_memory.hh"
 #endif // FULL_SYSTEM

-BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU)
+class DerivAlphaFullCPU : public AlphaFullCPU<AlphaSimpleImpl>
+{
+  public:
+    DerivAlphaFullCPU(AlphaSimpleParams p)
+        : AlphaFullCPU<AlphaSimpleImpl>(p)
+    { }
+};

+BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
+
+    Param<int> cycle_time;
    Param<int> numThreads;

 #ifdef FULL_SYSTEM
@ -44,8 +53,6 @@ SimObjectParam<AlphaDTB *> dtb;
 Param<int> mult;
 #else
 SimObjectVectorParam<Process *> workload;
-SimObjectParam<Process *> process;
-Param<short> asid;
 #endif // FULL_SYSTEM
 SimObjectParam<FunctionalMemory *> mem;

@ -120,23 +127,25 @@ Param<unsigned> numROBEntries;

 Param<unsigned> instShiftAmt;

-Param<bool> defReg;
+Param<bool> defer_registration;

-END_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU)
+Param<bool> function_trace;
+Param<Tick> function_trace_start;

-BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU)
+END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)

+BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
+
+    INIT_PARAM(cycle_time, "cpu cycle time"),
    INIT_PARAM(numThreads, "number of HW thread contexts"),

 #ifdef FULL_SYSTEM
    INIT_PARAM(system, "System object"),
    INIT_PARAM(itb, "Instruction translation buffer"),
    INIT_PARAM(dtb, "Data translation buffer"),
-    INIT_PARAM_DFLT(mult, "System clock multiplier", 1),
+    INIT_PARAM(mult, "System clock multiplier"),
 #else
    INIT_PARAM(workload, "Processes to run"),
-    INIT_PARAM_DFLT(process, "Process to run", NULL),
-    INIT_PARAM(asid, "Address space ID"),
 #endif // FULL_SYSTEM

    INIT_PARAM_DFLT(mem, "Memory", NULL),
@ -230,14 +239,16 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU)
    INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),

    INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
+    INIT_PARAM(defer_registration, "defer system registration (for sampling)"),

-    INIT_PARAM(defReg, "Defer registration")
+    INIT_PARAM(function_trace, "Enable function trace"),
+    INIT_PARAM(function_trace_start, "Cycle to start function trace")

-END_INIT_SIM_OBJECT_PARAMS(BaseFullCPU)
+END_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)

-CREATE_SIM_OBJECT(BaseFullCPU)
+CREATE_SIM_OBJECT(DerivAlphaFullCPU)
 {
-    AlphaFullCPU<AlphaSimpleImpl> *cpu;
+    DerivAlphaFullCPU *cpu;

 #ifdef FULL_SYSTEM
    if (mult != 1)
@ -255,30 +266,21 @@ CREATE_SIM_OBJECT(BaseFullCPU)
        fatal("Must specify at least one workload!");
    }

-    Process *actual_process;
-
-    if (process == NULL) {
-        actual_process = workload[0];
-    } else {
-        actual_process = process;
-    }
-
 #endif

    AlphaSimpleParams params;

+    params.cycleTime = cycle_time;
+
    params.name = getInstanceName();
    params.numberOfThreads = actual_num_threads;

 #ifdef FULL_SYSTEM
-    params._system = system;
+    params.system = system;
    params.itb = itb;
    params.dtb = dtb;
-    params.freq = ticksPerSecond * mult;
 #else
    params.workload = workload;
-    params.process = actual_process;
-    params.asid = asid;
 #endif // FULL_SYSTEM

    params.mem = mem;
@ -356,12 +358,15 @@ CREATE_SIM_OBJECT(BaseFullCPU)

    params.instShiftAmt = 2;

-    params.defReg = defReg;
+    params.defReg = defer_registration;

-    cpu = new AlphaFullCPU<AlphaSimpleImpl>(params);
+    params.functionTrace = function_trace;
+    params.functionTraceStart = function_trace_start;
+
+    cpu = new DerivAlphaFullCPU(params);

    return cpu;
 }

-REGISTER_SIM_OBJECT("AlphaFullCPU", BaseFullCPU)
+REGISTER_SIM_OBJECT("DerivAlphaFullCPU", DerivAlphaFullCPU)

--- a/cpu/beta_cpu/alpha_full_cpu_impl.hh
+++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh
@ -12,6 +12,14 @@
 #include "cpu/beta_cpu/alpha_params.hh"
 #include "cpu/beta_cpu/comm.hh"

+#ifdef FULL_SYSTEM
+#include "arch/alpha/osfpal.hh"
+#include "arch/alpha/isa_traits.hh"
+//#include "arch/alpha/ev5.hh"
+
+//using namespace EV5;
+#endif
+
 template <class Impl>
 AlphaFullCPU<Impl>::AlphaFullCPU(Params &params)
    : FullBetaCPU<Impl>(params)
@ -42,9 +50,12 @@ AlphaFullCPU<Impl>::regStats()

 #ifndef FULL_SYSTEM

+// Will probably need to know which thread is calling syscall
+// Will need to pass that information in to the DynInst when it is constructed,
+// so that this call can be made with the proper thread number.
 template <class Impl>
 void
-AlphaFullCPU<Impl>::syscall()
+AlphaFullCPU<Impl>::syscall(short thread_num)
 {
    DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n");

@ -60,7 +71,8 @@ AlphaFullCPU<Impl>::syscall()
    // Copy over all important state to xc once all the unrolling is done.
    copyToXC();

-    this->process->syscall(this->xc);
+    this->thread[0]->syscall();
+//    this->thread[thread_num]->syscall();

    // Copy over all important state back to CPU.
    copyFromXC();
@ -102,6 +114,8 @@ AlphaFullCPU<Impl>::squashStages()
    this->iew.squash();
    this->iewQueue.advance();
    this->iewQueue.advance();
+    // Needs to tell the LSQ to write back all of its data
+    this->iew.lsqWriteback();

    this->rob.squash(rob_head);
    this->commit.setSquashing();
@ -203,390 +217,35 @@ template <class Impl>
 uint64_t *
 AlphaFullCPU<Impl>::getIpr()
 {
-    return regFile.getIpr();
+    return this->regFile.getIpr();
 }

 template <class Impl>
 uint64_t
 AlphaFullCPU<Impl>::readIpr(int idx, Fault &fault)
 {
-    uint64_t *ipr = getIpr();
-    uint64_t retval = 0;	// return value, default 0
-
-    switch (idx) {
-      case AlphaISA::IPR_PALtemp0:
-      case AlphaISA::IPR_PALtemp1:
-      case AlphaISA::IPR_PALtemp2:
-      case AlphaISA::IPR_PALtemp3:
-      case AlphaISA::IPR_PALtemp4:
-      case AlphaISA::IPR_PALtemp5:
-      case AlphaISA::IPR_PALtemp6:
-      case AlphaISA::IPR_PALtemp7:
-      case AlphaISA::IPR_PALtemp8:
-      case AlphaISA::IPR_PALtemp9:
-      case AlphaISA::IPR_PALtemp10:
-      case AlphaISA::IPR_PALtemp11:
-      case AlphaISA::IPR_PALtemp12:
-      case AlphaISA::IPR_PALtemp13:
-      case AlphaISA::IPR_PALtemp14:
-      case AlphaISA::IPR_PALtemp15:
-      case AlphaISA::IPR_PALtemp16:
-      case AlphaISA::IPR_PALtemp17:
-      case AlphaISA::IPR_PALtemp18:
-      case AlphaISA::IPR_PALtemp19:
-      case AlphaISA::IPR_PALtemp20:
-      case AlphaISA::IPR_PALtemp21:
-      case AlphaISA::IPR_PALtemp22:
-      case AlphaISA::IPR_PALtemp23:
-      case AlphaISA::IPR_PAL_BASE:
-
-      case AlphaISA::IPR_IVPTBR:
-      case AlphaISA::IPR_DC_MODE:
-      case AlphaISA::IPR_MAF_MODE:
-      case AlphaISA::IPR_ISR:
-      case AlphaISA::IPR_EXC_ADDR:
-      case AlphaISA::IPR_IC_PERR_STAT:
-      case AlphaISA::IPR_DC_PERR_STAT:
-      case AlphaISA::IPR_MCSR:
-      case AlphaISA::IPR_ASTRR:
-      case AlphaISA::IPR_ASTER:
-      case AlphaISA::IPR_SIRR:
-      case AlphaISA::IPR_ICSR:
-      case AlphaISA::IPR_ICM:
-      case AlphaISA::IPR_DTB_CM:
-      case AlphaISA::IPR_IPLR:
-      case AlphaISA::IPR_INTID:
-      case AlphaISA::IPR_PMCTR:
-        // no side-effect
-        retval = ipr[idx];
-        break;
-
-      case AlphaISA::IPR_CC:
-        retval |= ipr[idx] & ULL(0xffffffff00000000);
-        retval |= curTick  & ULL(0x00000000ffffffff);
-        break;
-
-      case AlphaISA::IPR_VA:
-        retval = ipr[idx];
-        break;
-
-      case AlphaISA::IPR_VA_FORM:
-      case AlphaISA::IPR_MM_STAT:
-      case AlphaISA::IPR_IFAULT_VA_FORM:
-      case AlphaISA::IPR_EXC_MASK:
-      case AlphaISA::IPR_EXC_SUM:
-        retval = ipr[idx];
-        break;
-
-      case AlphaISA::IPR_DTB_PTE:
-        {
-            AlphaISA::PTE &pte = dtb->index(!misspeculating());
-
-            retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32;
-            retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8;
-            retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12;
-            retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1;
-            retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2;
-            retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4;
-            retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57;
-        }
-        break;
-
-        // write only registers
-      case AlphaISA::IPR_HWINT_CLR:
-      case AlphaISA::IPR_SL_XMIT:
-      case AlphaISA::IPR_DC_FLUSH:
-      case AlphaISA::IPR_IC_FLUSH:
-      case AlphaISA::IPR_ALT_MODE:
-      case AlphaISA::IPR_DTB_IA:
-      case AlphaISA::IPR_DTB_IAP:
-      case AlphaISA::IPR_ITB_IA:
-      case AlphaISA::IPR_ITB_IAP:
-        fault = Unimplemented_Opcode_Fault;
-        break;
-
-      default:
-        // invalid IPR
-        fault = Unimplemented_Opcode_Fault;
-        break;
-    }
-
-    return retval;
+    return this->regFile.readIpr(idx, fault);
 }

 template <class Impl>
 Fault
 AlphaFullCPU<Impl>::setIpr(int idx, uint64_t val)
 {
-    uint64_t *ipr = getIpr();
-    uint64_t old;
-
-    if (misspeculating())
-        return No_Fault;
-
-    switch (idx) {
-      case AlphaISA::IPR_PALtemp0:
-      case AlphaISA::IPR_PALtemp1:
-      case AlphaISA::IPR_PALtemp2:
-      case AlphaISA::IPR_PALtemp3:
-      case AlphaISA::IPR_PALtemp4:
-      case AlphaISA::IPR_PALtemp5:
-      case AlphaISA::IPR_PALtemp6:
-      case AlphaISA::IPR_PALtemp7:
-      case AlphaISA::IPR_PALtemp8:
-      case AlphaISA::IPR_PALtemp9:
-      case AlphaISA::IPR_PALtemp10:
-      case AlphaISA::IPR_PALtemp11:
-      case AlphaISA::IPR_PALtemp12:
-      case AlphaISA::IPR_PALtemp13:
-      case AlphaISA::IPR_PALtemp14:
-      case AlphaISA::IPR_PALtemp15:
-      case AlphaISA::IPR_PALtemp16:
-      case AlphaISA::IPR_PALtemp17:
-      case AlphaISA::IPR_PALtemp18:
-      case AlphaISA::IPR_PALtemp19:
-      case AlphaISA::IPR_PALtemp20:
-      case AlphaISA::IPR_PALtemp21:
-      case AlphaISA::IPR_PALtemp22:
-      case AlphaISA::IPR_PAL_BASE:
-      case AlphaISA::IPR_IC_PERR_STAT:
-      case AlphaISA::IPR_DC_PERR_STAT:
-      case AlphaISA::IPR_PMCTR:
-        // write entire quad w/ no side-effect
-        ipr[idx] = val;
-        break;
-
-      case AlphaISA::IPR_CC_CTL:
-        // This IPR resets the cycle counter.  We assume this only
-        // happens once... let's verify that.
-        assert(ipr[idx] == 0);
-        ipr[idx] = 1;
-        break;
-
-      case AlphaISA::IPR_CC:
-        // This IPR only writes the upper 64 bits.  It's ok to write
-        // all 64 here since we mask out the lower 32 in rpcc (see
-        // isa_desc).
-        ipr[idx] = val;
-        break;
-
-      case AlphaISA::IPR_PALtemp23:
-        // write entire quad w/ no side-effect
-        old = ipr[idx];
-        ipr[idx] = val;
-        kernelStats.context(old, val);
-        break;
-
-      case AlphaISA::IPR_DTB_PTE:
-        // write entire quad w/ no side-effect, tag is forthcoming
-        ipr[idx] = val;
-        break;
-
-      case AlphaISA::IPR_EXC_ADDR:
-        // second least significant bit in PC is always zero
-        ipr[idx] = val & ~2;
-        break;
-
-      case AlphaISA::IPR_ASTRR:
-      case AlphaISA::IPR_ASTER:
-        // only write least significant four bits - privilege mask
-        ipr[idx] = val & 0xf;
-        break;
-
-      case AlphaISA::IPR_IPLR:
-#ifdef DEBUG
-        if (break_ipl != -1 && break_ipl == (val & 0x1f))
-            debug_break();
-#endif
-
-        // only write least significant five bits - interrupt level
-        ipr[idx] = val & 0x1f;
-        kernelStats.swpipl(ipr[idx]);
-        break;
-
-      case AlphaISA::IPR_DTB_CM:
-        kernelStats.mode((val & 0x18) != 0);
-
-      case AlphaISA::IPR_ICM:
-        // only write two mode bits - processor mode
-        ipr[idx] = val & 0x18;
-        break;
-
-      case AlphaISA::IPR_ALT_MODE:
-        // only write two mode bits - processor mode
-        ipr[idx] = val & 0x18;
-        break;
-
-      case AlphaISA::IPR_MCSR:
-        // more here after optimization...
-        ipr[idx] = val;
-        break;
-
-      case AlphaISA::IPR_SIRR:
-        // only write software interrupt mask
-        ipr[idx] = val & 0x7fff0;
-        break;
-
-      case AlphaISA::IPR_ICSR:
-        ipr[idx] = val & ULL(0xffffff0300);
-        break;
-
-      case AlphaISA::IPR_IVPTBR:
-      case AlphaISA::IPR_MVPTBR:
-        ipr[idx] = val & ULL(0xffffffffc0000000);
-        break;
-
-      case AlphaISA::IPR_DC_TEST_CTL:
-        ipr[idx] = val & 0x1ffb;
-        break;
-
-      case AlphaISA::IPR_DC_MODE:
-      case AlphaISA::IPR_MAF_MODE:
-        ipr[idx] = val & 0x3f;
-        break;
-
-      case AlphaISA::IPR_ITB_ASN:
-        ipr[idx] = val & 0x7f0;
-        break;
-
-      case AlphaISA::IPR_DTB_ASN:
-        ipr[idx] = val & ULL(0xfe00000000000000);
-        break;
-
-      case AlphaISA::IPR_EXC_SUM:
-      case AlphaISA::IPR_EXC_MASK:
-        // any write to this register clears it
-        ipr[idx] = 0;
-        break;
-
-      case AlphaISA::IPR_INTID:
-      case AlphaISA::IPR_SL_RCV:
-      case AlphaISA::IPR_MM_STAT:
-      case AlphaISA::IPR_ITB_PTE_TEMP:
-      case AlphaISA::IPR_DTB_PTE_TEMP:
-        // read-only registers
-        return Unimplemented_Opcode_Fault;
-
-      case AlphaISA::IPR_HWINT_CLR:
-      case AlphaISA::IPR_SL_XMIT:
-      case AlphaISA::IPR_DC_FLUSH:
-      case AlphaISA::IPR_IC_FLUSH:
-        // the following are write only
-        ipr[idx] = val;
-        break;
-
-      case AlphaISA::IPR_DTB_IA:
-        // really a control write
-        ipr[idx] = 0;
-
-        dtb->flushAll();
-        break;
-
-      case AlphaISA::IPR_DTB_IAP:
-        // really a control write
-        ipr[idx] = 0;
-
-        dtb->flushProcesses();
-        break;
-
-      case AlphaISA::IPR_DTB_IS:
-        // really a control write
-        ipr[idx] = val;
-
-        dtb->flushAddr(val, DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]));
-        break;
-
-      case AlphaISA::IPR_DTB_TAG: {
-          struct AlphaISA::PTE pte;
-
-          // FIXME: granularity hints NYI...
-          if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0)
-              panic("PTE GH field != 0");
-
-          // write entire quad
-          ipr[idx] = val;
-
-          // construct PTE for new entry
-          pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]);
-          pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]);
-          pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]);
-          pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]);
-          pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]);
-          pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]);
-          pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]);
-
-          // insert new TAG/PTE value into data TLB
-          dtb->insert(val, pte);
-      }
-        break;
-
-      case AlphaISA::IPR_ITB_PTE: {
-          struct AlphaISA::PTE pte;
-
-          // FIXME: granularity hints NYI...
-          if (ITB_PTE_GH(val) != 0)
-              panic("PTE GH field != 0");
-
-          // write entire quad
-          ipr[idx] = val;
-
-          // construct PTE for new entry
-          pte.ppn = ITB_PTE_PPN(val);
-          pte.xre = ITB_PTE_XRE(val);
-          pte.xwe = 0;
-          pte.fonr = ITB_PTE_FONR(val);
-          pte.fonw = ITB_PTE_FONW(val);
-          pte.asma = ITB_PTE_ASMA(val);
-          pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]);
-
-          // insert new TAG/PTE value into data TLB
-          itb->insert(ipr[AlphaISA::IPR_ITB_TAG], pte);
-      }
-        break;
-
-      case AlphaISA::IPR_ITB_IA:
-        // really a control write
-        ipr[idx] = 0;
-
-        itb->flushAll();
-        break;
-
-      case AlphaISA::IPR_ITB_IAP:
-        // really a control write
-        ipr[idx] = 0;
-
-        itb->flushProcesses();
-        break;
-
-      case AlphaISA::IPR_ITB_IS:
-        // really a control write
-        ipr[idx] = val;
-
-        itb->flushAddr(val, ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]));
-        break;
-
-      default:
-        // invalid IPR
-        return Unimplemented_Opcode_Fault;
-    }
-
-    // no error...
-    return No_Fault;
-
+    return this->regFile.setIpr(idx, val);
 }

 template <class Impl>
 int
 AlphaFullCPU<Impl>::readIntrFlag()
 {
-    return regs.intrflag;
+    return this->regFile.readIntrFlag();
 }

 template <class Impl>
 void
 AlphaFullCPU<Impl>::setIntrFlag(int val)
 {
-    regs.intrflag = val;
+    this->regFile.setIntrFlag(val);
 }

 // Can force commit stage to squash and stuff.
@ -596,36 +255,27 @@ AlphaFullCPU<Impl>::hwrei()
 {
    uint64_t *ipr = getIpr();

-    if (!PC_PAL(regs.pc))
+    if (!inPalMode())
        return Unimplemented_Opcode_Fault;

    setNextPC(ipr[AlphaISA::IPR_EXC_ADDR]);

-    if (!misspeculating()) {
-        kernelStats.hwrei();
+//    kernelStats.hwrei();

-        if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0)
-            AlphaISA::swap_palshadow(&regs, false);
+    if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0)
+//        AlphaISA::swap_palshadow(&regs, false);

-        AlphaISA::check_interrupts = true;
-    }
+    this->checkInterrupts = true;

    // FIXME: XXX check for interrupts? XXX
    return No_Fault;
 }

-template <class Impl>
-bool
-AlphaFullCPU<Impl>::inPalMode()
-{
-    return PC_PAL(readPC());
-}
-
 template <class Impl>
 bool
 AlphaFullCPU<Impl>::simPalCheck(int palFunc)
 {
-    kernelStats.callpal(palFunc);
+//    kernelStats.callpal(palFunc);

    switch (palFunc) {
      case PAL::halt:
@ -636,7 +286,7 @@ AlphaFullCPU<Impl>::simPalCheck(int palFunc)

      case PAL::bpt:
      case PAL::bugchk:
-        if (system->breakpoint())
+        if (this->system->breakpoint())
            return false;
        break;
    }
@ -651,21 +301,22 @@ template <class Impl>
 void
 AlphaFullCPU<Impl>::trap(Fault fault)
 {
-    uint64_t PC = commit.readPC();
+    // Keep in mind that a trap may be initiated by fetch if there's a TLB
+    // miss
+    uint64_t PC = this->commit.readCommitPC();

    DPRINTF(Fault, "Fault %s\n", FaultName(fault));
-    Stats::recordEvent(csprintf("Fault %s", FaultName(fault)));
+    this->recordEvent(csprintf("Fault %s", FaultName(fault)));

-    assert(!misspeculating());
-    kernelStats.fault(fault);
+//    kernelStats.fault(fault);

    if (fault == Arithmetic_Fault)
        panic("Arithmetic traps are unimplemented!");

-    AlphaISA::InternalProcReg *ipr = getIpr();
+    typename AlphaISA::InternalProcReg *ipr = getIpr();

    // exception restart address - Get the commit PC
-    if (fault != Interrupt_Fault || !PC_PAL(PC))
+    if (fault != Interrupt_Fault || !inPalMode(PC))
        ipr[AlphaISA::IPR_EXC_ADDR] = PC;

    if (fault == Pal_Fault || fault == Arithmetic_Fault /* ||
@ -674,11 +325,12 @@ AlphaFullCPU<Impl>::trap(Fault fault)
        ipr[AlphaISA::IPR_EXC_ADDR] += 4;
    }

-    if (!PC_PAL(PC))
-        AlphaISA::swap_palshadow(&regs, true);
+    if (!inPalMode(PC))
+        swapPALShadow(true);

-    setPC( ipr[AlphaISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault] );
-    setNextPC(PC + sizeof(MachInst));
+    this->regFile.setPC( ipr[AlphaISA::IPR_PAL_BASE] +
+                         AlphaISA::fault_addr[fault] );
+    this->regFile.setNextPC(PC + sizeof(MachInst));
 }

 template <class Impl>
@ -694,7 +346,7 @@ AlphaFullCPU<Impl>::processInterrupts()
 // same logical index.
 template <class Impl>
 void
-AlphaFullCPU<Impl>::swap_palshadow(RegFile *regs, bool use_shadow)
+AlphaFullCPU<Impl>::swapPALShadow(bool use_shadow)
 {
    if (palShadowEnabled == use_shadow)
        panic("swap_palshadow: wrong PAL shadow state");
@ -703,6 +355,7 @@ AlphaFullCPU<Impl>::swap_palshadow(RegFile *regs, bool use_shadow)

    // Will have to lookup in rename map to get physical registers, then
    // swap.
+/*
    for (int i = 0; i < AlphaISA::NumIntRegs; i++) {
        if (reg_redir[i]) {
            AlphaISA::IntReg temp = regs->intRegFile[i];
@ -710,6 +363,7 @@ AlphaFullCPU<Impl>::swap_palshadow(RegFile *regs, bool use_shadow)
            regs->palregs[i] = temp;
        }
    }
+*/
 }

 #endif // FULL_SYSTEM
--- a/cpu/beta_cpu/alpha_params.hh
+++ b/cpu/beta_cpu/alpha_params.hh
@ -20,12 +20,12 @@ class MemInterface;
 class AlphaSimpleParams : public BaseFullCPU::Params
 {
  public:
+
 #ifdef FULL_SYSTEM
    AlphaITB *itb; AlphaDTB *dtb;
 #else
    std::vector<Process *> workload;
    Process *process;
-    short asid;
 #endif // FULL_SYSTEM

    FunctionalMemory *mem;
--- a/cpu/beta_cpu/comm.hh
+++ b/cpu/beta_cpu/comm.hh
@ -50,7 +50,6 @@ struct SimpleIEWSimpleCommit {
    bool branchTaken;
    uint64_t mispredPC;
    uint64_t nextPC;
-    unsigned globalHist;
    InstSeqNum squashedSeqNum;
 };

@ -78,7 +77,6 @@ struct TimeBufStruct {
        bool branchTaken;
        uint64_t mispredPC;
        uint64_t nextPC;
-        unsigned globalHist;
    };

    decodeComm decodeInfo;
@ -113,12 +111,11 @@ struct TimeBufStruct {
        bool branchTaken;
        uint64_t mispredPC;
        uint64_t nextPC;
-        unsigned globalHist;

        // Think of better names here.
        // Will need to be a variety of sizes...
        // Maybe make it a vector, that way only need one object.
-        std::vector<PhysRegIndex> freeRegs;
+//        std::vector<PhysRegIndex> freeRegs;

        bool robSquashing;

@ -129,7 +126,7 @@ struct TimeBufStruct {

        // Extra bits of information so that the LDSTQ only updates when it
        // needs to.
-        bool commitIsStore;
+//        bool commitIsStore;
        bool commitIsLoad;

        // Communication specifically to the IQ to tell the IQ that it can
--- a/cpu/beta_cpu/commit.hh
+++ b/cpu/beta_cpu/commit.hh
@ -113,9 +113,6 @@ class SimpleCommit
    /** Pointer to FullCPU. */
    FullCPU *cpu;

-    /** Pointer to the rename map.  DO NOT USE if possible. */
-//    typename Impl::CPUPol::RenameMap *renameMap;
-
    //Store buffer interface?  Will need to move committed stores to the
    //store buffer

--- a/cpu/beta_cpu/commit_impl.hh
+++ b/cpu/beta_cpu/commit_impl.hh
@ -166,9 +166,9 @@ SimpleCommit<Impl>::commit()
    // hwrei() is what resets the PC to the place where instruction execution
    // beings again.
 #ifdef FULL_SYSTEM
-    if (ISA::check_interrupts &&
+    if (//checkInterrupts &&
        cpu->check_interrupts() &&
-        !xc->inPalMode()) {
+        !cpu->inPalMode(readCommitPC())) {
        // Will need to squash all instructions currently in flight and have
        // the interrupt handler restart at the last non-committed inst.
        // Most of that can be handled through the trap() function.  The
@ -215,8 +215,6 @@ SimpleCommit<Impl>::commit()

        toIEW->commitInfo.mispredPC = fromIEW->mispredPC;

-        toIEW->commitInfo.globalHist = fromIEW->globalHist;
-
        if (toIEW->commitInfo.branchMispredict) {
            ++branchMispredicts;
        }
@ -257,6 +255,9 @@ SimpleCommit<Impl>::commitInsts()
    // Can't commit and squash things at the same time...
    ////////////////////////////////////

+    if (rob->isEmpty())
+        return;
+
    DynInstPtr head_inst = rob->readHeadInst();

    unsigned num_committed = 0;
@ -275,9 +276,11 @@ SimpleCommit<Impl>::commitInsts()
        if (head_inst->isSquashed()) {
            // Hack to avoid the instruction being retired (and deleted) if
            // it hasn't been through the IEW stage yet.
+/*
            if (!head_inst->isExecuted()) {
                break;
            }
+*/

            DPRINTF(Commit, "Commit: Retiring squashed instruction from "
                    "ROB.\n");
@ -341,7 +344,7 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
        // and committed this instruction.
        cpu->funcExeInst--;

-        if (head_inst->isStore() || head_inst->isNonSpeculative()) {
+        if (head_inst->isNonSpeculative()) {
            DPRINTF(Commit, "Commit: Encountered a store or non-speculative "
                    "instruction at the head of the ROB, PC %#x.\n",
                    head_inst->readPC());
@ -376,12 +379,14 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
    }

    // Check if the instruction caused a fault.  If so, trap.
-    if (head_inst->getFault() != No_Fault) {
+    Fault inst_fault = head_inst->getFault();
+
+    if (inst_fault != No_Fault && inst_fault != Fake_Mem_Fault) {
        if (!head_inst->isNop()) {
 #ifdef FULL_SYSTEM
-            cpu->trap(fault);
+            cpu->trap(inst_fault);
 #else // !FULL_SYSTEM
-            panic("fault (%d) detected @ PC %08p", head_inst->getFault(),
+            panic("fault (%d) detected @ PC %08p", inst_fault,
                  head_inst->PC);
 #endif // FULL_SYSTEM
        }
@ -390,7 +395,7 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
    // Check if we're really ready to commit.  If not then return false.
    // I'm pretty sure all instructions should be able to commit if they've
    // reached this far.  For now leave this in as a check.
-    if(!rob->isHeadReady()) {
+    if (!rob->isHeadReady()) {
        panic("Commit: Unable to commit head instruction!\n");
        return false;
    }
@ -413,17 +418,7 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
        ++commitCommittedBranches;
    }

-
 #if 0
-    // Check if the instruction has a destination register.
-    // If so add the previous physical register of its logical register's
-    // destination to the free list through the time buffer.
-    for (int i = 0; i < head_inst->numDestRegs(); i++)
-    {
-        toIEW->commitInfo.freeRegs.push_back(head_inst->prevDestRegIdx(i));
-    }
-#endif
-
    // Explicit communication back to the LDSTQ that a load has been committed
    // and can be removed from the LDSTQ.  Stores don't need this because
    // the LDSTQ will already have been told that a store has reached the head
@ -436,6 +431,7 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
            ++commitCommittedLoads;
        }
    }
+#endif

    // Now that the instruction is going to be committed, finalize its
    // trace data.
@ -487,7 +483,7 @@ SimpleCommit<Impl>::markCompletedInsts()
    // Grab completed insts out of the IEW instruction queue, and mark
    // instructions completed within the ROB.
    for (int inst_num = 0;
-         inst_num < iewWidth && fromIEW->insts[inst_num];
+         inst_num < fromIEW->size && fromIEW->insts[inst_num];
         ++inst_num)
    {
        DPRINTF(Commit, "Commit: Marking PC %#x, SN %i ready within ROB.\n",
--- a/cpu/beta_cpu/cpu_policy.hh
+++ b/cpu/beta_cpu/cpu_policy.hh
@ -34,7 +34,7 @@ struct SimpleCPUPolicy
    typedef SimpleFetch<Impl> Fetch;
    typedef SimpleDecode<Impl> Decode;
    typedef SimpleRename<Impl> Rename;
-    typedef SimpleIEW<Impl, IQ> IEW;
+    typedef SimpleIEW<Impl> IEW;
    typedef SimpleCommit<Impl> Commit;

    /** The struct for communication between fetch and decode. */
--- a/cpu/beta_cpu/decode.hh
+++ b/cpu/beta_cpu/decode.hh
@ -68,12 +68,16 @@ class SimpleDecode
    void squash();

  private:
+    inline bool fetchInstsValid();
+
    void block();

    inline void unblock();

    void squash(DynInstPtr &inst);

+    void dumpFetchQueue();
+
    // Interfaces to objects outside of decode.
    /** CPU interface. */
    FullCPU *cpu;
--- a/cpu/beta_cpu/decode_impl.hh
+++ b/cpu/beta_cpu/decode_impl.hh
@ -98,6 +98,13 @@ SimpleDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
    fromFetch = fetchQueue->getWire(-fetchToDecodeDelay);
 }

+template<class Impl>
+inline bool
+SimpleDecode<Impl>::fetchInstsValid()
+{
+    return fromFetch->size > 0;
+}
+
 template<class Impl>
 void
 SimpleDecode<Impl>::block()
@ -156,14 +163,14 @@ SimpleDecode<Impl>::squash(DynInstPtr &inst)
    // Set status to squashing.
    _status = Squashing;

-    // Maybe advance the time buffer?  Not sure what to do in the normal
-    // case.
-
    // Clear the skid buffer in case it has any data in it.
-    while (!skidBuffer.empty())
-    {
+    while (!skidBuffer.empty()) {
        skidBuffer.pop();
    }
+
+    // Squash instructions up until this one
+    // Slightly unrealistic!
+    cpu->removeInstsUntil(inst->seqNum);
 }

 template<class Impl>
@ -205,7 +212,7 @@ SimpleDecode<Impl>::tick()
        if (_status == Unblocking) {
            ++decodeUnblockCycles;

-            if (fromFetch->size > 0) {
+            if (fetchInstsValid()) {
                // Add the current inputs to the skid buffer so they can be
                // reprocessed when this stage unblocks.
                skidBuffer.push(*fromFetch);
@ -216,7 +223,7 @@ SimpleDecode<Impl>::tick()
    } else if (_status == Blocked) {
        ++decodeBlockedCycles;

-        if (fromFetch->size > 0) {
+        if (fetchInstsValid()) {
            block();
        }

@ -240,12 +247,12 @@ SimpleDecode<Impl>::tick()
            squash();
        }
    } else if (_status == Squashing) {
-        ++decodeSquashCycles;
-
        if (!fromCommit->commitInfo.squash &&
            !fromCommit->commitInfo.robSquashing) {
            _status = Running;
        } else if (fromCommit->commitInfo.squash) {
+            ++decodeSquashCycles;
+
            squash();
        }
    }
@ -264,8 +271,7 @@ SimpleDecode<Impl>::decode()
    // Check time buffer if being told to stall.
    if (fromRename->renameInfo.stall ||
        fromIEW->iewInfo.stall ||
-        fromCommit->commitInfo.stall)
-    {
+        fromCommit->commitInfo.stall) {
        block();
        return;
    }
@ -273,7 +279,7 @@ SimpleDecode<Impl>::decode()
    // Check fetch queue to see if instructions are available.
    // If no available instructions, do nothing, unless this stage is
    // currently unblocking.
-    if (fromFetch->size == 0 && _status != Unblocking) {
+    if (!fetchInstsValid() && _status != Unblocking) {
        DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n");
        // Should I change the status to idle?
        ++decodeIdleCycles;
@ -286,7 +292,7 @@ SimpleDecode<Impl>::decode()
    unsigned to_rename_index = 0;

    int insts_available = _status == Unblocking ?
-        skidBuffer.front().size :
+        skidBuffer.front().size - numInst :
        fromFetch->size;

    // Debug block...
@ -308,8 +314,8 @@ SimpleDecode<Impl>::decode()
    }
 #endif

-     while (insts_available > 0)
-     {
+    while (insts_available > 0)
+    {
        DPRINTF(Decode, "Decode: Sending instruction to rename.\n");

        inst = _status == Unblocking ? skidBuffer.front().insts[numInst] :
@ -331,6 +337,16 @@ SimpleDecode<Impl>::decode()
            continue;
        }

+
+        // Also check if instructions have no source registers.  Mark
+        // them as ready to issue at any time.  Not sure if this check
+        // should exist here or at a later stage; however it doesn't matter
+        // too much for function correctness.
+        // Isn't this handled by the inst queue?
+        if (inst->numSrcRegs() == 0) {
+            inst->setCanIssue();
+        }
+
        // This current instruction is valid, so add it into the decode
        // queue.  The next instruction may not be valid, so check to
        // see if branches were predicted correctly.
@ -369,16 +385,6 @@ SimpleDecode<Impl>::decode()
        // addr (either the immediate, or the branch PC + 4) and redirect
        // fetch if it's incorrect.

-
-        // Also check if instructions have no source registers.  Mark
-        // them as ready to issue at any time.  Not sure if this check
-        // should exist here or at a later stage; however it doesn't matter
-        // too much for function correctness.
-        // Isn't this handled by the inst queue?
-        if (inst->numSrcRegs() == 0) {
-            inst->setCanIssue();
-        }
-
        // Increment which instruction we're looking at.
        ++numInst;
        ++to_rename_index;
--- a/cpu/beta_cpu/fetch.hh
+++ b/cpu/beta_cpu/fetch.hh
@ -74,7 +74,6 @@ class SimpleFetch

    void processCacheCompletion();

-//  private:
    // Figure out PC vs next PC and how it should be updated
    void squash(const Addr &new_PC);

@ -93,9 +92,6 @@ class SimpleFetch
     */
    bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC);

-    // Might not want this function...
-//    inline void recordGlobalHist(DynInstPtr &inst);
-
    /**
     * Fetches the cache line that contains fetch_PC.  Returns any
     * fault that happened.  Puts the data into the class variable
@ -184,9 +180,6 @@ class SimpleFetch
    /** Mask to get a cache block's address. */
    Addr cacheBlkMask;

-    /** The instruction being fetched. */
-//    MachInst inst;
-
    /** The cache line being fetched. */
    uint8_t *cacheData;

--- a/cpu/beta_cpu/fetch_impl.hh
+++ b/cpu/beta_cpu/fetch_impl.hh
@ -44,6 +44,8 @@ SimpleFetch<Impl>::SimpleFetch(Params &params)
      commitToFetchDelay(params.commitToFetchDelay),
      fetchWidth(params.fetchWidth)
 {
+    DPRINTF(Fetch, "Fetch: Fetch constructor called\n");
+
    // Set status to idle.
    _status = Idle;

@ -52,7 +54,7 @@ SimpleFetch<Impl>::SimpleFetch(Params &params)
    // Not sure of this parameter.  I think it should be based on the
    // thread number.
 #ifndef FULL_SYSTEM
-    memReq->asid = params.asid;
+    memReq->asid = 0;
 #else
    memReq->asid = 0;
 #endif // FULL_SYSTEM
@ -163,21 +165,10 @@ SimpleFetch<Impl>::processCacheCompletion()
    // to return.
    // Can keep track of how many cache accesses go unused due to
    // misspeculation here.
-    // How to handle an outstanding miss which gets cancelled due to squash,
-    // then a new icache miss gets scheduled?
    if (_status == IcacheMissStall)
        _status = IcacheMissComplete;
 }

-#if 0
-template <class Impl>
-inline void
-SimpleFetch<Impl>::recordGlobalHist(DynInstPtr &inst)
-{
-    inst->setGlobalHist(branchPred.BPReadGlobalHist());
-}
-#endif
-
 template <class Impl>
 bool
 SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
@ -311,7 +302,6 @@ SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC,
    // Tell the CPU to remove any instructions that are in flight between
    // fetch and decode.
    cpu->removeInstsUntil(seq_num);
-
 }

 template <class Impl>
@ -428,7 +418,9 @@ SimpleFetch<Impl>::tick()
        // Switch status to running
        _status = Running;

-        ++fetchSquashCycles;
+        ++fetchCycles;
+
+        fetch();
    } else if (_status != IcacheMissStall) {
        DPRINTF(Fetch, "Fetch: Running stage.\n");

--- a/cpu/beta_cpu/full_cpu.cc
+++ b/cpu/beta_cpu/full_cpu.cc
@ -16,7 +16,7 @@
 using namespace std;

 BaseFullCPU::BaseFullCPU(Params &params)
-    : BaseCPU(&params)
+    : BaseCPU(&params), cpu_id(0)
 {
 }

@ -82,15 +82,14 @@ FullBetaCPU<Impl>::FullBetaCPU(Params &params)

 #ifdef FULL_SYSTEM
      system(params.system),
-      memCtrl(system->memCtrl),
+      memCtrl(system->memctrl),
      physmem(system->physmem),
      itb(params.itb),
      dtb(params.dtb),
      mem(params.mem),
 #else
-      process(params.process),
-      asid(params.asid),
-      mem(process->getMemory()),
+      // Hardcoded for a single thread!!
+      mem(params.workload[0]->getMemory()),
 #endif // FULL_SYSTEM

      icacheInterface(params.icacheInterface),
@ -100,20 +99,40 @@ FullBetaCPU<Impl>::FullBetaCPU(Params &params)
      funcExeInst(0)
 {
    _status = Idle;
+
+#ifndef FULL_SYSTEM
+    thread.resize(this->number_of_threads);
+#endif
+
+    for (int i = 0; i < this->number_of_threads; ++i) {
 #ifdef FULL_SYSTEM
-    xc = new ExecContext(this, 0, system, itb, dtb, mem);
+        assert(i == 0);
+        system->execContexts[i] =
+            new ExecContext(this, i, system, itb, dtb, mem);

-    // initialize CPU, including PC
-    TheISA::initCPU(&xc->regs);
+        // initialize CPU, including PC
+        TheISA::initCPU(&system->execContexts[i]->regs);
+        execContexts.push_back(system->execContexts[i]);
 #else
-    DPRINTF(FullCPU, "FullCPU: Process's starting PC is %#x, process is %#x",
-            process->prog_entry, process);
-    xc = new ExecContext(this, /* thread_num */ 0, process, /* asid */ 0);
-
-    assert(process->getMemory() != NULL);
-    assert(mem != NULL);
+        if (i < params.workload.size()) {
+            DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, "
+                    "process is %#x",
+                    i, params.workload[i]->prog_entry, thread[i]);
+            thread[i] = new ExecContext(this, i, params.workload[i], i);
+        }
+        assert(params.workload[i]->getMemory() != NULL);
+        assert(mem != NULL);
+        execContexts.push_back(thread[i]);
 #endif // !FULL_SYSTEM
-    execContexts.push_back(xc);
+    }
+
+    // Note that this is a hack so that my code which still uses xc-> will
+    // still work.  I should remove this eventually
+#ifdef FULL_SYSTEM
+    xc = system->execContexts[0];
+#else
+    xc = thread[0];
+#endif

    // The stages also need their CPU pointer setup.  However this must be
    // done at the upper level CPU because they have pointers to the upper
@ -202,29 +221,33 @@ FullBetaCPU<Impl>::init()

        // Need to do a copy of the xc->regs into the CPU's regfile so
        // that it can start properly.
-
+#ifdef FULL_SYSTEM
+        ExecContext *src_xc = system->execContexts[0];
+#else
+        ExecContext *src_xc = thread[0];
+#endif
        // First loop through the integer registers.
        for (int i = 0; i < Impl::ISA::NumIntRegs; ++i)
        {
-            regFile.intRegFile[i] = xc->regs.intRegFile[i];
+            regFile.intRegFile[i] = src_xc->regs.intRegFile[i];
        }

        // Then loop through the floating point registers.
        for (int i = 0; i < Impl::ISA::NumFloatRegs; ++i)
        {
-            regFile.floatRegFile[i].d = xc->regs.floatRegFile.d[i];
-            regFile.floatRegFile[i].q = xc->regs.floatRegFile.q[i];
+            regFile.floatRegFile[i].d = src_xc->regs.floatRegFile.d[i];
+            regFile.floatRegFile[i].q = src_xc->regs.floatRegFile.q[i];
        }

        // Then loop through the misc registers.
-        regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr;
-        regFile.miscRegs.uniq = xc->regs.miscRegs.uniq;
-        regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag;
-        regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr;
+        regFile.miscRegs.fpcr = src_xc->regs.miscRegs.fpcr;
+        regFile.miscRegs.uniq = src_xc->regs.miscRegs.uniq;
+        regFile.miscRegs.lock_flag = src_xc->regs.miscRegs.lock_flag;
+        regFile.miscRegs.lock_addr = src_xc->regs.miscRegs.lock_addr;

        // Then finally set the PC and the next PC.
-        regFile.pc = xc->regs.pc;
-        regFile.npc = xc->regs.npc;
+        regFile.pc = src_xc->regs.pc;
+        regFile.npc = src_xc->regs.npc;
    }
 }

@ -277,13 +300,13 @@ FullBetaCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)

    // Set all status's to active, schedule the
    // CPU's tick event.
-    tickEvent.schedule(curTick);
    for (int i = 0; i < execContexts.size(); ++i) {
-        execContexts[i]->activate();
+        ExecContext *xc = execContexts[i];
+        if (xc->status() == ExecContext::Active && _status != Running) {
+            _status = Running;
+            tickEvent.schedule(curTick);
+        }
    }
-
-    // Switch out the other CPU.
-    oldCPU->switchOut();
 }

 template <class Impl>
@ -463,6 +486,7 @@ FullBetaCPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num)
                inst_to_delete->seqNum, inst_to_delete->readPC());

        // Remove the instruction from the list.
+        instList.back() = NULL;
        instList.pop_back();

        // Mark it as squashed.
--- a/cpu/beta_cpu/full_cpu.hh
+++ b/cpu/beta_cpu/full_cpu.hh
@ -5,11 +5,12 @@
 //itself properly.  Constructor.  Derived alpha class.  Threads!
 // Avoid running stages and advancing queues if idle/stalled.

-#ifndef __SIMPLE_FULL_CPU_HH__
-#define __SIMPLE_FULL_CPU_HH__
+#ifndef __CPU_BETA_CPU_FULL_CPU_HH__
+#define __CPU_BETA_CPU_FULL_CPU_HH__

 #include <iostream>
 #include <list>
+#include <vector>

 #include "cpu/beta_cpu/comm.hh"

@ -20,6 +21,11 @@
 #include "cpu/beta_cpu/cpu_policy.hh"
 #include "sim/process.hh"

+#ifdef FULL_SYSTEM
+#include "arch/alpha/ev5.hh"
+using namespace EV5;
+#endif
+
 class FunctionalMemory;
 class Process;

@ -34,6 +40,9 @@ class BaseFullCPU : public BaseCPU
 #else
    BaseFullCPU(Params &params);
 #endif // FULL_SYSTEM
+
+  private:
+    int cpu_id;
 };

 template <class Impl>
@ -41,6 +50,7 @@ class FullBetaCPU : public BaseFullCPU
 {
  public:
    //Put typedefs from the Impl here.
+    typedef typename Impl::ISA ISA;
    typedef typename Impl::CPUPol CPUPolicy;
    typedef typename Impl::Params Params;
    typedef typename Impl::DynInstPtr DynInstPtr;
@ -114,19 +124,21 @@ class FullBetaCPU : public BaseFullCPU
    bool validDataAddr(Addr addr) { return true; }

    /** Get instruction asid. */
-    int getInstAsid() { return ITB_ASN_ASN(regs.ipr[ISA::IPR_ITB_ASN]); }
+    int getInstAsid()
+    { return ITB_ASN_ASN(regFile.getIpr()[ISA::IPR_ITB_ASN]); }

    /** Get data asid. */
-    int getDataAsid() { return DTB_ASN_ASN(regs.ipr[ISA::IPR_DTB_ASN]); }
+    int getDataAsid()
+    { return DTB_ASN_ASN(regFile.getIpr()[ISA::IPR_DTB_ASN]); }
 #else
    bool validInstAddr(Addr addr)
-    { return process->validInstAddr(addr); }
+    { return thread[0]->validInstAddr(addr); }

    bool validDataAddr(Addr addr)
-    { return process->validDataAddr(addr); }
+    { return thread[0]->validDataAddr(addr); }

-    int getInstAsid() { return asid; }
-    int getDataAsid() { return asid; }
+    int getInstAsid() { return thread[0]->asid; }
+    int getDataAsid() { return thread[0]->asid; }

 #endif

@ -284,7 +296,14 @@ class FullBetaCPU : public BaseFullCPU
    ExecContext *xc;

    /** Temporary function to get pointer to exec context. */
-    ExecContext *xcBase() { return xc; }
+    ExecContext *xcBase()
+    {
+#ifdef FULL_SYSTEM
+        return system->execContexts[0];
+#else
+        return thread[0];
+#endif
+    }

    InstSeqNum globalSeqNum;

@ -299,12 +318,7 @@ class FullBetaCPU : public BaseFullCPU

 //    SWContext *swCtx;
 #else
-    Process *process;
-
-    // Address space ID.  Note that this is used for TIMING cache
-    // simulation only; all functional memory accesses should use
-    // one of the FunctionalMemory pointers above.
-    short asid;
+    std::vector<ExecContext *> thread;
 #endif

    FunctionalMemory *mem;
--- a/cpu/beta_cpu/iew.cc
+++ b/cpu/beta_cpu/iew.cc
@ -4,4 +4,4 @@
 #include "cpu/beta_cpu/iew_impl.hh"
 #include "cpu/beta_cpu/inst_queue.hh"

-template class SimpleIEW<AlphaSimpleImpl, AlphaSimpleImpl::CPUPol::IQ>;
+template class SimpleIEW<AlphaSimpleImpl>;
--- a/cpu/beta_cpu/iew.hh
+++ b/cpu/beta_cpu/iew.hh
@ -14,7 +14,7 @@
 //Can IEW even stall?  Space should be available/allocated already...maybe
 //if there's not enough write ports on the ROB or waiting for CDB
 //arbitration.
-template<class Impl, class IQ>
+template<class Impl>
 class SimpleIEW
 {
  private:
@ -25,6 +25,7 @@ class SimpleIEW
    typedef typename Impl::FullCPU FullCPU;
    typedef typename Impl::Params Params;

+    typedef typename CPUPol::IQ IQ;
    typedef typename CPUPol::RenameMap RenameMap;
    typedef typename CPUPol::LDSTQ LDSTQ;

@ -33,6 +34,7 @@ class SimpleIEW
    typedef typename CPUPol::RenameStruct RenameStruct;
    typedef typename CPUPol::IssueStruct IssueStruct;

+    friend class Impl::FullCPU;
  public:
    enum Status {
        Running,
@ -49,15 +51,17 @@ class SimpleIEW
    Status _wbStatus;

  public:
-    void squash();
+    class WritebackEvent : public Event {
+      private:
+        DynInstPtr inst;
+        SimpleIEW<Impl> *iewStage;

-    void squashDueToBranch(DynInstPtr &inst);
+      public:
+        WritebackEvent(DynInstPtr &_inst, SimpleIEW<Impl> *_iew);

-    void squashDueToMem(DynInstPtr &inst);
-
-    void block();
-
-    inline void unblock();
+        virtual void process();
+        virtual const char *description();
+    };

  public:
    SimpleIEW(Params &params);
@ -74,17 +78,30 @@ class SimpleIEW

    void setRenameMap(RenameMap *rm_ptr);

+    void squash();
+
+    void squashDueToBranch(DynInstPtr &inst);
+
+    void squashDueToMem(DynInstPtr &inst);
+
+    void block();
+
+    inline void unblock();
+
    void wakeDependents(DynInstPtr &inst);

-    void tick();
-
-    void iew();
+    void instToCommit(DynInstPtr &inst);

  private:
    void dispatchInsts();

    void executeInsts();

+  public:
+    void tick();
+
+    void iew();
+
    //Interfaces to objects inside and outside of IEW.
    /** Time buffer interface. */
    TimeBuffer<TimeStruct> *timeBuffer;
@ -121,11 +138,18 @@ class SimpleIEW
    /** Skid buffer between rename and IEW. */
    std::queue<RenameStruct> skidBuffer;

+  protected:
    /** Instruction queue. */
    IQ instQueue;

    LDSTQ ldstQueue;

+#ifndef FULL_SYSTEM
+  public:
+    void lsqWriteback();
+#endif
+
+  private:
    /** Pointer to rename map.  Might not want this stage to directly
     *  access this though...
     */
--- a/cpu/beta_cpu/iew_impl.hh
+++ b/cpu/beta_cpu/iew_impl.hh
@ -12,8 +12,36 @@
 #include "base/timebuf.hh"
 #include "cpu/beta_cpu/iew.hh"

-template<class Impl, class IQ>
-SimpleIEW<Impl, IQ>::SimpleIEW(Params &params)
+template<class Impl>
+SimpleIEW<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst,
+                                                SimpleIEW<Impl> *_iew)
+    : Event(&mainEventQueue, CPU_Tick_Pri), inst(_inst), iewStage(_iew)
+{
+    this->setFlags(Event::AutoDelete);
+}
+
+template<class Impl>
+void
+SimpleIEW<Impl>::WritebackEvent::process()
+{
+    DPRINTF(IEW, "IEW: WRITEBACK EVENT!!!!\n");
+
+    // Need to insert instruction into queue to commit
+    iewStage->instToCommit(inst);
+    // Need to execute second half of the instruction, do actual writing to
+    // registers and such
+    inst->execute();
+}
+
+template<class Impl>
+const char *
+SimpleIEW<Impl>::WritebackEvent::description()
+{
+    return "LSQ writeback event";
+}
+
+template<class Impl>
+SimpleIEW<Impl>::SimpleIEW(Params &params)
    : // Just make this time buffer really big for now
      issueToExecQueue(5, 5),
      instQueue(params),
@ -36,11 +64,13 @@ SimpleIEW<Impl, IQ>::SimpleIEW(Params &params)

    // Instruction queue needs the queue between issue and execute.
    instQueue.setIssueToExecuteQueue(&issueToExecQueue);
+
+    ldstQueue.setIEW(this);
 }

-template <class Impl, class IQ>
+template <class Impl>
 void
-SimpleIEW<Impl, IQ>::regStats()
+SimpleIEW<Impl>::regStats()
 {
    instQueue.regStats();

@ -111,9 +141,9 @@ SimpleIEW<Impl, IQ>::regStats()
        .desc("Number of branches that were predicted taken incorrectly");
 }

-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::setCPU(FullCPU *cpu_ptr)
+SimpleIEW<Impl>::setCPU(FullCPU *cpu_ptr)
 {
    DPRINTF(IEW, "IEW: Setting CPU pointer.\n");
    cpu = cpu_ptr;
@ -122,9 +152,9 @@ SimpleIEW<Impl, IQ>::setCPU(FullCPU *cpu_ptr)
    ldstQueue.setCPU(cpu_ptr);
 }

-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+SimpleIEW<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
    DPRINTF(IEW, "IEW: Setting time buffer pointer.\n");
    timeBuffer = tb_ptr;
@ -139,9 +169,9 @@ SimpleIEW<Impl, IQ>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
    instQueue.setTimeBuffer(tb_ptr);
 }

-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
+SimpleIEW<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
 {
    DPRINTF(IEW, "IEW: Setting rename queue pointer.\n");
    renameQueue = rq_ptr;
@ -150,9 +180,9 @@ SimpleIEW<Impl, IQ>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
    fromRename = renameQueue->getWire(-renameToIEWDelay);
 }

-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
+SimpleIEW<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
 {
    DPRINTF(IEW, "IEW: Setting IEW queue pointer.\n");
    iewQueue = iq_ptr;
@ -161,63 +191,17 @@ SimpleIEW<Impl, IQ>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
    toCommit = iewQueue->getWire(0);
 }

-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::setRenameMap(RenameMap *rm_ptr)
+SimpleIEW<Impl>::setRenameMap(RenameMap *rm_ptr)
 {
    DPRINTF(IEW, "IEW: Setting rename map pointer.\n");
    renameMap = rm_ptr;
 }

-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::wakeDependents(DynInstPtr &inst)
-{
-    instQueue.wakeDependents(inst);
-}
-
-template<class Impl, class IQ>
-void
-SimpleIEW<Impl, IQ>::block()
-{
-    DPRINTF(IEW, "IEW: Blocking.\n");
-    // Set the status to Blocked.
-    _status = Blocked;
-
-    // Add the current inputs to the skid buffer so they can be
-    // reprocessed when this stage unblocks.
-    skidBuffer.push(*fromRename);
-
-    // Note that this stage only signals previous stages to stall when
-    // it is the cause of the stall originates at this stage.  Otherwise
-    // the previous stages are expected to check all possible stall signals.
-}
-
-template<class Impl, class IQ>
-inline void
-SimpleIEW<Impl, IQ>::unblock()
-{
-    // Check if there's information in the skid buffer.  If there is, then
-    // set status to unblocking, otherwise set it directly to running.
-    DPRINTF(IEW, "IEW: Reading instructions out of the skid "
-            "buffer.\n");
-    // Remove the now processed instructions from the skid buffer.
-    skidBuffer.pop();
-
-    // If there's still information in the skid buffer, then
-    // continue to tell previous stages to stall.  They will be
-    // able to restart once the skid buffer is empty.
-    if (!skidBuffer.empty()) {
-        toRename->iewInfo.stall = true;
-    } else {
-        DPRINTF(IEW, "IEW: Stage is done unblocking.\n");
-        _status = Running;
-    }
-}
-
-template<class Impl, class IQ>
-void
-SimpleIEW<Impl, IQ>::squash()
+SimpleIEW<Impl>::squash()
 {
    DPRINTF(IEW, "IEW: Squashing all instructions.\n");
    _status = Squashing;
@ -229,9 +213,9 @@ SimpleIEW<Impl, IQ>::squash()
    ldstQueue.squash(fromCommit->commitInfo.doneSeqNum);
 }

-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::squashDueToBranch(DynInstPtr &inst)
+SimpleIEW<Impl>::squashDueToBranch(DynInstPtr &inst)
 {
    DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
            inst->PC);
@ -251,9 +235,9 @@ SimpleIEW<Impl, IQ>::squashDueToBranch(DynInstPtr &inst)
        (inst->readPC() + sizeof(MachInst));
 }

-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::squashDueToMem(DynInstPtr &inst)
+SimpleIEW<Impl>::squashDueToMem(DynInstPtr &inst)
 {
    DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
            inst->PC);
@ -268,9 +252,63 @@ SimpleIEW<Impl, IQ>::squashDueToMem(DynInstPtr &inst)
    toCommit->nextPC = inst->readNextPC();
 }

-template <class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::dispatchInsts()
+SimpleIEW<Impl>::block()
+{
+    DPRINTF(IEW, "IEW: Blocking.\n");
+    // Set the status to Blocked.
+    _status = Blocked;
+
+    // Add the current inputs to the skid buffer so they can be
+    // reprocessed when this stage unblocks.
+    skidBuffer.push(*fromRename);
+
+    // Note that this stage only signals previous stages to stall when
+    // it is the cause of the stall originates at this stage.  Otherwise
+    // the previous stages are expected to check all possible stall signals.
+}
+
+template<class Impl>
+inline void
+SimpleIEW<Impl>::unblock()
+{
+    // Check if there's information in the skid buffer.  If there is, then
+    // set status to unblocking, otherwise set it directly to running.
+    DPRINTF(IEW, "IEW: Reading instructions out of the skid "
+            "buffer.\n");
+    // Remove the now processed instructions from the skid buffer.
+    skidBuffer.pop();
+
+    // If there's still information in the skid buffer, then
+    // continue to tell previous stages to stall.  They will be
+    // able to restart once the skid buffer is empty.
+    if (!skidBuffer.empty()) {
+        toRename->iewInfo.stall = true;
+    } else {
+        DPRINTF(IEW, "IEW: Stage is done unblocking.\n");
+        _status = Running;
+    }
+}
+
+template<class Impl>
+void
+SimpleIEW<Impl>::wakeDependents(DynInstPtr &inst)
+{
+    instQueue.wakeDependents(inst);
+}
+
+
+template<class Impl>
+void
+SimpleIEW<Impl>::instToCommit(DynInstPtr &inst)
+{
+
+}
+
+template <class Impl>
+void
+SimpleIEW<Impl>::dispatchInsts()
 {
    ////////////////////////////////////////
    // DISPATCH/ISSUE stage
@ -329,14 +367,14 @@ SimpleIEW<Impl, IQ>::dispatchInsts()
                // a signal to this stage to issue and execute that
                // store.  Change to be a bit that says the instruction
                // has extra work to do at commit.
-                inst->setCanCommit();
+//                inst->setCanCommit();

-                instQueue.insertNonSpec(inst);
+//                instQueue.insertNonSpec(inst);

                ++iewDispStoreInsts;
-                ++iewDispNonSpecInsts;
+//                ++iewDispNonSpecInsts;

-                continue;
+//                continue;
            } else if (inst->isNonSpeculative()) {
                DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction "
                        "encountered, skipping.\n");
@ -385,9 +423,9 @@ SimpleIEW<Impl, IQ>::dispatchInsts()
    }
 }

-template <class Impl, class IQ>
+template <class Impl>
 void
-SimpleIEW<Impl, IQ>::executeInsts()
+SimpleIEW<Impl>::executeInsts()
 {
    ////////////////////////////////////////
    //EXECUTE/WRITEBACK stage
@ -403,6 +441,8 @@ SimpleIEW<Impl, IQ>::executeInsts()

    int fu_usage = 0;
    bool fetch_redirect = false;
+    int inst_slot = 0;
+    int time_slot = 0;

    // Execute/writeback any instructions that are available.
    for (int inst_num = 0;
@ -452,7 +492,7 @@ SimpleIEW<Impl, IQ>::executeInsts()

                ++iewExecLoadInsts;
            } else if (inst->isStore()) {
-                ldstQueue.executeStore();
+                ldstQueue.executeStore(inst);

                ++iewExecStoreInsts;
            } else {
@ -473,9 +513,23 @@ SimpleIEW<Impl, IQ>::executeInsts()
        // For now naively assume that all instructions take one cycle.
        // Otherwise would have to look into the time buffer based on the
        // latency of the instruction.
+        (*iewQueue)[time_slot].insts[inst_slot];
+        while ((*iewQueue)[time_slot].insts[inst_slot]) {
+            if (inst_slot < issueWidth) {
+                ++inst_slot;
+            } else {
+                ++time_slot;
+                inst_slot = 0;
+            }
+
+            assert(time_slot < 5);
+        }
+
+        // May actually have to work this out, especially with loads and stores

        // Add finished instruction to queue to commit.
-        toCommit->insts[inst_num] = inst;
+        (*iewQueue)[time_slot].insts[inst_slot] = inst;
+        (*iewQueue)[time_slot].size++;

        // Check if branch was correct.  This check happens after the
        // instruction is added to the queue because even if the branch
@ -518,9 +572,9 @@ SimpleIEW<Impl, IQ>::executeInsts()
    }
 }

-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::tick()
+SimpleIEW<Impl>::tick()
 {
    // Considering putting all the state-determining stuff in this section.

@ -594,14 +648,20 @@ SimpleIEW<Impl, IQ>::tick()
    // Write back number of free IQ entries here.
    toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries();

+    ldstQueue.writebackStores();
+
    // Check the committed load/store signals to see if there's a load
    // or store to commit.  Also check if it's being told to execute a
    // nonspeculative instruction.
-    if (fromCommit->commitInfo.commitIsStore) {
+    // This is pretty inefficient...
+//    if (0/*fromCommit->commitInfo.commitIsStore*/) {
+    if (!fromCommit->commitInfo.squash &&
+        !fromCommit->commitInfo.robSquashing) {
        ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum);
-    } else if (fromCommit->commitInfo.commitIsLoad) {
+//    } else if (fromCommit->commitInfo.commitIsLoad) {
        ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum);
    }
+//    }

    if (fromCommit->commitInfo.nonSpecSeqNum != 0) {
        instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum);
@ -611,9 +671,9 @@ SimpleIEW<Impl, IQ>::tick()
            instQueue.numFreeEntries());
 }

-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::iew()
+SimpleIEW<Impl>::iew()
 {
    // Might want to put all state checks in the tick() function.
    // Check if being told to stall from commit.
@ -663,3 +723,12 @@ SimpleIEW<Impl, IQ>::iew()
    // Not the best place for it, but this works (hopefully).
    issueToExecQueue.advance();
 }
+
+#ifndef FULL_SYSTEM
+template<class Impl>
+void
+SimpleIEW<Impl>::lsqWriteback()
+{
+    ldstQueue.writebackAllInsts();
+}
+#endif
--- a/cpu/beta_cpu/inst_queue.hh
+++ b/cpu/beta_cpu/inst_queue.hh
@ -174,7 +174,7 @@ class InstructionQueue
     *  once the IQ gets a signal from commit.  While it's redundant to
     *  have the key be a part of the value (the sequence number is stored
     *  inside of DynInst), when these instructions are woken up only
-     *  the sequence number will be available.  Thus it is necessary to be
+     *  the sequence number will be available.  Thus it is most efficient to be
     *  able to search by the sequence number alone.
     */
    std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
--- a/cpu/beta_cpu/inst_queue_impl.hh
+++ b/cpu/beta_cpu/inst_queue_impl.hh
@ -31,8 +31,6 @@ InstructionQueue<Impl>::InstructionQueue(Params &params)
      numPhysFloatRegs(params.numPhysFloatRegs),
      commitToIEWDelay(params.commitToIEWDelay)
 {
-    DPRINTF(IQ, "IQ: Int width is %i.\n", params.executeIntWidth);
-
    // Initialize the number of free IQ entries.
    freeEntries = numEntries;

@ -291,10 +289,6 @@ InstructionQueue<Impl>::insertNonSpec(DynInstPtr &inst)
    // Decrease the number of free entries.
    --freeEntries;

-    // Look through its source registers (physical regs), and mark any
-    // dependencies.
-//    addToDependents(inst);
-
    // Have this instruction set itself as the producer of its destination
    // register(s).
    createDependency(inst);
@ -568,15 +562,20 @@ InstructionQueue<Impl>::scheduleReadyInsts()
            break;

          case Squashed:
-            issuing_inst = squashed_head_inst;
+//            issuing_inst = squashed_head_inst;
+            assert(0 && "Squashed insts should not issue any more!");
            squashedInsts.pop();
+            // Set the squashed instruction as able to commit so that commit
+            // can just drop it from the ROB.  This is a bit faked.
            ++squashed_issued;
+            ++freeEntries;
+
            DPRINTF(IQ, "IQ: Issuing squashed instruction PC %#x.\n",
-                    issuing_inst->readPC());
+                    squashed_head_inst->readPC());
            break;
        }

-        if (list_with_oldest != None) {
+        if (list_with_oldest != None && list_with_oldest != Squashed) {
            i2e_info->insts[total_issued] = issuing_inst;
            i2e_info->size++;

@ -641,8 +640,10 @@ InstructionQueue<Impl>::squash()
    // Setup the squash iterator to point to the tail.
    squashIt = tail;

-    // Call doSquash.
-    doSquash();
+    // Call doSquash if there are insts in the IQ
+    if (freeEntries != numEntries) {
+        doSquash();
+    }

    // Also tell the memory dependence unit to squash.
    memDepUnit.squash(squashedSeqNum);
@ -672,12 +673,12 @@ InstructionQueue<Impl>::doSquash()
            // Remove the instruction from the dependency list.
            // Hack for now: These below don't add themselves to the
            // dependency list, so don't try to remove them.
-            if (!squashed_inst->isNonSpeculative() &&
-                !squashed_inst->isStore()) {
-                int8_t total_src_regs = squashed_inst->numSrcRegs();
+            if (!squashed_inst->isNonSpeculative()/* &&
+                                                     !squashed_inst->isStore()*/
+                ) {

                for (int src_reg_idx = 0;
-                     src_reg_idx < total_src_regs;
+                     src_reg_idx < squashed_inst->numSrcRegs();
                     src_reg_idx++)
                {
                    PhysRegIndex src_reg =
@ -699,6 +700,8 @@ InstructionQueue<Impl>::doSquash()

                // Might want to remove producers as well.
            } else {
+                nonSpecInsts[squashed_inst->seqNum] = NULL;
+
                nonSpecInsts.erase(squashed_inst->seqNum);

                ++iqSquashedNonSpecRemoved;
@ -709,7 +712,11 @@ InstructionQueue<Impl>::doSquash()
            // Mark it as squashed within the IQ.
            squashed_inst->setSquashedInIQ();

-            squashedInsts.push(squashed_inst);
+//            squashedInsts.push(squashed_inst);
+            squashed_inst->setIssued();
+            squashed_inst->setCanCommit();
+
+            ++freeEntries;

            DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n",
                    squashed_inst->readPC());
@ -718,6 +725,13 @@ InstructionQueue<Impl>::doSquash()
        --squashIt;
        ++iqSquashedInstsExamined;
    }
+
+    assert(freeEntries <= numEntries);
+
+    if (freeEntries == numEntries) {
+        tail = cpu->instList.end();
+    }
+
 }

 template <class Impl>
@ -739,8 +753,6 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
    //Look at the physical destination register of the DynInst
    //and look it up on the dependency graph.  Then mark as ready
    //any instructions within the instruction queue.
-    int8_t total_dest_regs = completed_inst->numDestRegs();
-
    DependencyEntry *curr;

    // Tell the memory dependence unit to wake any dependents on this
@ -751,7 +763,7 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
    }

    for (int dest_reg_idx = 0;
-         dest_reg_idx < total_dest_regs;
+         dest_reg_idx < completed_inst->numDestRegs();
         dest_reg_idx++)
    {
        PhysRegIndex dest_reg =
@ -759,7 +771,7 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)

        // Special case of uniq or control registers.  They are not
        // handled by the IQ and thus have no dependency graph entry.
-        // @todo Figure out a cleaner way to handle thie.
+        // @todo Figure out a cleaner way to handle this.
        if (dest_reg >= numPhysRegs) {
            continue;
        }
@ -789,6 +801,8 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)

            DependencyEntry::mem_alloc_counter--;

+            curr->inst = NULL;
+
            delete curr;
        }

@ -874,7 +888,10 @@ InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst)

        dependGraph[dest_reg].inst = new_inst;

-        assert(!dependGraph[dest_reg].next);
+        if (dependGraph[dest_reg].next) {
+            dumpDependGraph();
+            panic("IQ: Dependency graph not empty!");
+        }

        // Mark the scoreboard to say it's not yet ready.
        regScoreboard[dest_reg] = false;
@ -929,36 +946,12 @@ InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)

    --mem_alloc_counter;

+    // Could push this off to the destructor of DependencyEntry
+    curr->inst = NULL;
+
    delete curr;
 }

-template <class Impl>
-void
-InstructionQueue<Impl>::dumpDependGraph()
-{
-    DependencyEntry *curr;
-
-    for (int i = 0; i < numPhysRegs; ++i)
-    {
-        curr = &dependGraph[i];
-
-        if (curr->inst) {
-            cprintf("dependGraph[%i]: producer: %#x consumer: ", i,
-                    curr->inst->readPC());
-        } else {
-            cprintf("dependGraph[%i]: No producer. consumer: ", i);
-        }
-
-        while (curr->next != NULL) {
-            curr = curr->next;
-
-            cprintf("%#x ", curr->inst->readPC());
-        }
-
-        cprintf("\n");
-    }
-}
-
 template <class Impl>
 void
 InstructionQueue<Impl>::addIfReady(DynInstPtr &inst)
@ -1024,6 +1017,12 @@ InstructionQueue<Impl>::addIfReady(DynInstPtr &inst)
    }
 }

+/*
+ * Caution, this function must not be called prior to tail being updated at
+ * least once, otherwise it will fail the assertion.  This is because
+ * instList.begin() actually changes upon the insertion of an element into the
+ * list when the list is empty.
+ */
 template <class Impl>
 int
 InstructionQueue<Impl>::countInsts()
@ -1031,6 +1030,9 @@ InstructionQueue<Impl>::countInsts()
    ListIt count_it = cpu->instList.begin();
    int total_insts = 0;

+    if (tail == cpu->instList.end())
+        return 0;
+
    while (count_it != tail) {
        if (!(*count_it)->isIssued()) {
            ++total_insts;
@ -1051,6 +1053,33 @@ InstructionQueue<Impl>::countInsts()
    return total_insts;
 }

+template <class Impl>
+void
+InstructionQueue<Impl>::dumpDependGraph()
+{
+    DependencyEntry *curr;
+
+    for (int i = 0; i < numPhysRegs; ++i)
+    {
+        curr = &dependGraph[i];
+
+        if (curr->inst) {
+            cprintf("dependGraph[%i]: producer: %#x consumer: ", i,
+                    curr->inst->readPC());
+        } else {
+            cprintf("dependGraph[%i]: No producer. consumer: ", i);
+        }
+
+        while (curr->next != NULL) {
+            curr = curr->next;
+
+            cprintf("%#x ", curr->inst->readPC());
+        }
+
+        cprintf("\n");
+    }
+}
+
 template <class Impl>
 void
 InstructionQueue<Impl>::dumpLists()
--- a/cpu/beta_cpu/regfile.hh
+++ b/cpu/beta_cpu/regfile.hh
@ -1,18 +1,26 @@
-#ifndef __REGFILE_HH__
-#define __REGFILE_HH__
+#ifndef __CPU_BETA_CPU_REGFILE_HH__
+#define __CPU_BETA_CPU_REGFILE_HH__

 // @todo: Destructor

 #include "arch/alpha/isa_traits.hh"
+#include "base/trace.hh"
 #include "cpu/beta_cpu/comm.hh"

-#include "base/trace.hh"
+#ifdef FULL_SYSTEM
+#include "kern/kernel_stats.hh"
+#include "arch/alpha/ev5.hh"
+
+using namespace EV5;
+#endif

 // This really only depends on the ISA, and not the Impl.  It might be nicer
 // to see if I can make it depend on nothing...
 // Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA,
 // and should go in the AlphaFullCPU.

+extern void debug_break();
+
 template <class Impl>
 class PhysRegFile
 {
@ -27,6 +35,7 @@ class PhysRegFile
    //be private eventually with some accessor functions.
  public:
    typedef typename Impl::ISA ISA;
+    typedef typename Impl::FullCPU FullCPU;

    PhysRegFile(unsigned _numPhysicalIntRegs,
                unsigned _numPhysicalFloatRegs);
@ -177,6 +186,7 @@ class PhysRegFile
 #ifdef FULL_SYSTEM
    uint64_t readIpr(int idx, Fault &fault);
    Fault setIpr(int idx, uint64_t val);
+    InternalProcReg *getIpr() { return ipr; }
    int readIntrFlag() { return intrflag; }
    void setIntrFlag(int val) { intrflag = val; }
 #endif
@ -196,7 +206,21 @@ class PhysRegFile
    Addr pc;            // program counter
    Addr npc;            // next-cycle program counter

+#ifdef FULL_SYSTEM
  private:
+    // This is ISA specifc stuff; remove it eventually once ISAImpl is used
+    IntReg palregs[NumIntRegs];	// PAL shadow registers
+    InternalProcReg ipr[NumInternalProcRegs]; // internal processor regs
+    int intrflag;			// interrupt flag
+    bool pal_shadow;		// using pal_shadow registers
+#endif
+
+  private:
+    FullCPU *cpu;
+
+  public:
+    void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; }
+
    unsigned numPhysicalIntRegs;
    unsigned numPhysicalFloatRegs;
 };
@ -269,46 +293,42 @@ PhysRegFile<Impl>::readIpr(int idx, Fault &fault)
      case ISA::IPR_IPLR:
      case ISA::IPR_INTID:
      case ISA::IPR_PMCTR:
-    // no side-effect
-    retval = ipr[idx];
-    break;
+        // no side-effect
+        retval = ipr[idx];
+        break;

      case ISA::IPR_CC:
-    retval |= ipr[idx] & ULL(0xffffffff00000000);
-    retval |= curTick  & ULL(0x00000000ffffffff);
-    break;
+        retval |= ipr[idx] & ULL(0xffffffff00000000);
+        retval |= curTick  & ULL(0x00000000ffffffff);
+        break;

      case ISA::IPR_VA:
-    // SFX: unlocks interrupt status registers
-    retval = ipr[idx];
-
-        if (!misspeculating())
-            regs.intrlock = false;
-    break;
+        retval = ipr[idx];
+        break;

      case ISA::IPR_VA_FORM:
      case ISA::IPR_MM_STAT:
      case ISA::IPR_IFAULT_VA_FORM:
      case ISA::IPR_EXC_MASK:
      case ISA::IPR_EXC_SUM:
-    retval = ipr[idx];
-    break;
+        retval = ipr[idx];
+        break;

      case ISA::IPR_DTB_PTE:
-    {
-        ISA::PTE &pte = dtb->index(!misspeculating());
+        {
+            typename ISA::PTE &pte = cpu->dtb->index(1);

-        retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32;
-        retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8;
-        retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12;
-        retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1;
-        retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2;
-        retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4;
-        retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57;
-    }
-    break;
+            retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32;
+            retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8;
+            retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12;
+            retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1;
+            retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2;
+            retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4;
+            retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57;
+        }
+        break;

-    // write only registers
+        // write only registers
      case ISA::IPR_HWINT_CLR:
      case ISA::IPR_SL_XMIT:
      case ISA::IPR_DC_FLUSH:
@ -318,22 +338,19 @@ PhysRegFile<Impl>::readIpr(int idx, Fault &fault)
      case ISA::IPR_DTB_IAP:
      case ISA::IPR_ITB_IA:
      case ISA::IPR_ITB_IAP:
-    fault = Unimplemented_Opcode_Fault;
-    break;
+        fault = Unimplemented_Opcode_Fault;
+        break;

      default:
-    // invalid IPR
-    fault = Unimplemented_Opcode_Fault;
-    break;
+        // invalid IPR
+        fault = Unimplemented_Opcode_Fault;
+        break;
    }

    return retval;
 }

-#ifdef DEBUG
-// Cause the simulator to break when changing to the following IPL
-int break_ipl = -1;
-#endif
+extern int break_ipl;

 template <class Impl>
 Fault
@ -341,9 +358,6 @@ PhysRegFile<Impl>::setIpr(int idx, uint64_t val)
 {
    uint64_t old;

-    if (misspeculating())
-    return No_Fault;
-
    switch (idx) {
      case ISA::IPR_PALtemp0:
      case ISA::IPR_PALtemp1:
@ -372,222 +386,225 @@ PhysRegFile<Impl>::setIpr(int idx, uint64_t val)
      case ISA::IPR_IC_PERR_STAT:
      case ISA::IPR_DC_PERR_STAT:
      case ISA::IPR_PMCTR:
-    // write entire quad w/ no side-effect
-    ipr[idx] = val;
-    break;
+        // write entire quad w/ no side-effect
+        ipr[idx] = val;
+        break;

      case ISA::IPR_CC_CTL:
-    // This IPR resets the cycle counter.  We assume this only
-    // happens once... let's verify that.
-    assert(ipr[idx] == 0);
-    ipr[idx] = 1;
-    break;
+        // This IPR resets the cycle counter.  We assume this only
+        // happens once... let's verify that.
+        assert(ipr[idx] == 0);
+        ipr[idx] = 1;
+        break;

      case ISA::IPR_CC:
-    // This IPR only writes the upper 64 bits.  It's ok to write
-    // all 64 here since we mask out the lower 32 in rpcc (see
-    // isa_desc).
-    ipr[idx] = val;
-    break;
+        // This IPR only writes the upper 64 bits.  It's ok to write
+        // all 64 here since we mask out the lower 32 in rpcc (see
+        // isa_desc).
+        ipr[idx] = val;
+        break;

      case ISA::IPR_PALtemp23:
-    // write entire quad w/ no side-effect
-    old = ipr[idx];
-    ipr[idx] = val;
-    kernelStats.context(old, val);
-    break;
+        // write entire quad w/ no side-effect
+        old = ipr[idx];
+        ipr[idx] = val;
+//        kernelStats.context(old, val);
+        break;

      case ISA::IPR_DTB_PTE:
-    // write entire quad w/ no side-effect, tag is forthcoming
-    ipr[idx] = val;
-    break;
+        // write entire quad w/ no side-effect, tag is forthcoming
+        ipr[idx] = val;
+        break;

      case ISA::IPR_EXC_ADDR:
-    // second least significant bit in PC is always zero
-    ipr[idx] = val & ~2;
-    break;
+        // second least significant bit in PC is always zero
+        ipr[idx] = val & ~2;
+        break;

      case ISA::IPR_ASTRR:
      case ISA::IPR_ASTER:
-    // only write least significant four bits - privilege mask
-    ipr[idx] = val & 0xf;
-    break;
+        // only write least significant four bits - privilege mask
+        ipr[idx] = val & 0xf;
+        break;

      case ISA::IPR_IPLR:
 #ifdef DEBUG
-    if (break_ipl != -1 && break_ipl == (val & 0x1f))
-        debug_break();
+        if (break_ipl != -1 && break_ipl == (val & 0x1f))
+            debug_break();
 #endif

-    // only write least significant five bits - interrupt level
-    ipr[idx] = val & 0x1f;
-    kernelStats.swpipl(ipr[idx]);
-    break;
+        // only write least significant five bits - interrupt level
+        ipr[idx] = val & 0x1f;
+//        kernelStats.swpipl(ipr[idx]);
+        break;

      case ISA::IPR_DTB_CM:
-    kernelStats.mode((val & 0x18) != 0);
+//        if (val & 0x18)
+//            kernelStats->mode(Kernel::user);
+//        else
+//            kernelStats->mode(Kernel::kernel);

      case ISA::IPR_ICM:
-    // only write two mode bits - processor mode
-    ipr[idx] = val & 0x18;
-    break;
+        // only write two mode bits - processor mode
+        ipr[idx] = val & 0x18;
+        break;

      case ISA::IPR_ALT_MODE:
-    // only write two mode bits - processor mode
-    ipr[idx] = val & 0x18;
-    break;
+        // only write two mode bits - processor mode
+        ipr[idx] = val & 0x18;
+        break;

      case ISA::IPR_MCSR:
-    // more here after optimization...
-    ipr[idx] = val;
-    break;
+        // more here after optimization...
+        ipr[idx] = val;
+        break;

      case ISA::IPR_SIRR:
-    // only write software interrupt mask
-    ipr[idx] = val & 0x7fff0;
-    break;
+        // only write software interrupt mask
+        ipr[idx] = val & 0x7fff0;
+        break;

      case ISA::IPR_ICSR:
-    ipr[idx] = val & ULL(0xffffff0300);
-    break;
+        ipr[idx] = val & ULL(0xffffff0300);
+        break;

      case ISA::IPR_IVPTBR:
      case ISA::IPR_MVPTBR:
-    ipr[idx] = val & ULL(0xffffffffc0000000);
-    break;
+        ipr[idx] = val & ULL(0xffffffffc0000000);
+        break;

      case ISA::IPR_DC_TEST_CTL:
-    ipr[idx] = val & 0x1ffb;
-    break;
+        ipr[idx] = val & 0x1ffb;
+        break;

      case ISA::IPR_DC_MODE:
      case ISA::IPR_MAF_MODE:
-    ipr[idx] = val & 0x3f;
-    break;
+        ipr[idx] = val & 0x3f;
+        break;

      case ISA::IPR_ITB_ASN:
-    ipr[idx] = val & 0x7f0;
-    break;
+        ipr[idx] = val & 0x7f0;
+        break;

      case ISA::IPR_DTB_ASN:
-    ipr[idx] = val & ULL(0xfe00000000000000);
-    break;
+        ipr[idx] = val & ULL(0xfe00000000000000);
+        break;

      case ISA::IPR_EXC_SUM:
      case ISA::IPR_EXC_MASK:
-    // any write to this register clears it
-    ipr[idx] = 0;
-    break;
+        // any write to this register clears it
+        ipr[idx] = 0;
+        break;

      case ISA::IPR_INTID:
      case ISA::IPR_SL_RCV:
      case ISA::IPR_MM_STAT:
      case ISA::IPR_ITB_PTE_TEMP:
      case ISA::IPR_DTB_PTE_TEMP:
-    // read-only registers
-    return Unimplemented_Opcode_Fault;
+        // read-only registers
+        return Unimplemented_Opcode_Fault;

      case ISA::IPR_HWINT_CLR:
      case ISA::IPR_SL_XMIT:
      case ISA::IPR_DC_FLUSH:
      case ISA::IPR_IC_FLUSH:
-    // the following are write only
-    ipr[idx] = val;
-    break;
+        // the following are write only
+        ipr[idx] = val;
+        break;

      case ISA::IPR_DTB_IA:
-    // really a control write
-    ipr[idx] = 0;
+        // really a control write
+        ipr[idx] = 0;

-    dtb->flushAll();
-    break;
+        cpu->dtb->flushAll();
+        break;

      case ISA::IPR_DTB_IAP:
-    // really a control write
-    ipr[idx] = 0;
+        // really a control write
+        ipr[idx] = 0;

-    dtb->flushProcesses();
-    break;
+        cpu->dtb->flushProcesses();
+        break;

      case ISA::IPR_DTB_IS:
-    // really a control write
-    ipr[idx] = val;
+        // really a control write
+        ipr[idx] = val;

-    dtb->flushAddr(val, DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]));
-    break;
+        cpu->dtb->flushAddr(val, DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]));
+        break;

      case ISA::IPR_DTB_TAG: {
-      struct ISA::PTE pte;
+          struct ISA::PTE pte;

-      // FIXME: granularity hints NYI...
-      if (DTB_PTE_GH(ipr[ISA::IPR_DTB_PTE]) != 0)
-          panic("PTE GH field != 0");
+          // FIXME: granularity hints NYI...
+          if (DTB_PTE_GH(ipr[ISA::IPR_DTB_PTE]) != 0)
+              panic("PTE GH field != 0");

-      // write entire quad
-      ipr[idx] = val;
+          // write entire quad
+          ipr[idx] = val;

-      // construct PTE for new entry
-      pte.ppn = DTB_PTE_PPN(ipr[ISA::IPR_DTB_PTE]);
-      pte.xre = DTB_PTE_XRE(ipr[ISA::IPR_DTB_PTE]);
-      pte.xwe = DTB_PTE_XWE(ipr[ISA::IPR_DTB_PTE]);
-      pte.fonr = DTB_PTE_FONR(ipr[ISA::IPR_DTB_PTE]);
-      pte.fonw = DTB_PTE_FONW(ipr[ISA::IPR_DTB_PTE]);
-      pte.asma = DTB_PTE_ASMA(ipr[ISA::IPR_DTB_PTE]);
-      pte.asn = DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]);
+          // construct PTE for new entry
+          pte.ppn = DTB_PTE_PPN(ipr[ISA::IPR_DTB_PTE]);
+          pte.xre = DTB_PTE_XRE(ipr[ISA::IPR_DTB_PTE]);
+          pte.xwe = DTB_PTE_XWE(ipr[ISA::IPR_DTB_PTE]);
+          pte.fonr = DTB_PTE_FONR(ipr[ISA::IPR_DTB_PTE]);
+          pte.fonw = DTB_PTE_FONW(ipr[ISA::IPR_DTB_PTE]);
+          pte.asma = DTB_PTE_ASMA(ipr[ISA::IPR_DTB_PTE]);
+          pte.asn = DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]);

-      // insert new TAG/PTE value into data TLB
-      dtb->insert(val, pte);
+          // insert new TAG/PTE value into data TLB
+          cpu->dtb->insert(val, pte);
      }
-    break;
+        break;

      case ISA::IPR_ITB_PTE: {
-      struct ISA::PTE pte;
+          struct ISA::PTE pte;

-      // FIXME: granularity hints NYI...
-      if (ITB_PTE_GH(val) != 0)
-          panic("PTE GH field != 0");
+          // FIXME: granularity hints NYI...
+          if (ITB_PTE_GH(val) != 0)
+              panic("PTE GH field != 0");

-      // write entire quad
-      ipr[idx] = val;
+          // write entire quad
+          ipr[idx] = val;

-      // construct PTE for new entry
-      pte.ppn = ITB_PTE_PPN(val);
-      pte.xre = ITB_PTE_XRE(val);
-      pte.xwe = 0;
-      pte.fonr = ITB_PTE_FONR(val);
-      pte.fonw = ITB_PTE_FONW(val);
-      pte.asma = ITB_PTE_ASMA(val);
-      pte.asn = ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]);
+          // construct PTE for new entry
+          pte.ppn = ITB_PTE_PPN(val);
+          pte.xre = ITB_PTE_XRE(val);
+          pte.xwe = 0;
+          pte.fonr = ITB_PTE_FONR(val);
+          pte.fonw = ITB_PTE_FONW(val);
+          pte.asma = ITB_PTE_ASMA(val);
+          pte.asn = ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]);

-      // insert new TAG/PTE value into data TLB
-      itb->insert(ipr[ISA::IPR_ITB_TAG], pte);
+          // insert new TAG/PTE value into data TLB
+          cpu->itb->insert(ipr[ISA::IPR_ITB_TAG], pte);
      }
-    break;
+        break;

      case ISA::IPR_ITB_IA:
-    // really a control write
-    ipr[idx] = 0;
+        // really a control write
+        ipr[idx] = 0;

-    itb->flushAll();
-    break;
+        cpu->itb->flushAll();
+        break;

      case ISA::IPR_ITB_IAP:
-    // really a control write
-    ipr[idx] = 0;
+        // really a control write
+        ipr[idx] = 0;

-    itb->flushProcesses();
-    break;
+        cpu->itb->flushProcesses();
+        break;

      case ISA::IPR_ITB_IS:
-    // really a control write
-    ipr[idx] = val;
+        // really a control write
+        ipr[idx] = val;

-    itb->flushAddr(val, ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]));
-    break;
+        cpu->itb->flushAddr(val, ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]));
+        break;

      default:
-    // invalid IPR
-    return Unimplemented_Opcode_Fault;
+        // invalid IPR
+        return Unimplemented_Opcode_Fault;
    }

    // no error...
@ -596,4 +613,4 @@ PhysRegFile<Impl>::setIpr(int idx, uint64_t val)

 #endif // #ifdef FULL_SYSTEM

-#endif // __REGFILE_HH__
+#endif // __CPU_BETA_CPU_REGFILE_HH__
--- a/cpu/beta_cpu/rob.hh
+++ b/cpu/beta_cpu/rob.hh
@ -10,8 +10,6 @@
 #include <utility>
 #include <vector>

-//#include "arch/alpha/isa_traits.hh"
-
 /**
 * ROB class.  Uses the instruction list that exists within the CPU to
 * represent the ROB.  This class doesn't contain that list, but instead
--- a/cpu/beta_cpu/rob_impl.hh
+++ b/cpu/beta_cpu/rob_impl.hh
@ -1,5 +1,5 @@
-#ifndef __ROB_IMPL_HH__
-#define __ROB_IMPL_HH__
+#ifndef __CPU_BETA_CPU_ROB_IMPL_HH__
+#define __CPU_BETA_CPU_ROB_IMPL_HH__

 #include "cpu/beta_cpu/rob.hh"

@ -107,10 +107,8 @@ ROB<Impl>::retireHead()
    assert(numInstsInROB == countInsts());
    assert(numInstsInROB > 0);

-    DynInstPtr head_inst;
-
    // Get the head ROB instruction.
-    head_inst = cpu->instList.front();
+    DynInstPtr head_inst = cpu->instList.front();

    // Make certain this can retire.
    assert(head_inst->readyToCommit());
@ -126,11 +124,10 @@ ROB<Impl>::retireHead()
    // A special case is needed if the instruction being retired is the
    // only instruction in the ROB; otherwise the tail iterator will become
    // invalidated.
-    if (tail == cpu->instList.begin()) {
-        cpu->removeFrontInst(head_inst);
+    cpu->removeFrontInst(head_inst);
+
+    if (numInstsInROB == 0) {
        tail = cpu->instList.end();
-    } else {
-        cpu->removeFrontInst(head_inst);
    }
 }

@ -283,4 +280,4 @@ ROB<Impl>::readTailSeqNum()
    return (*tail)->seqNum;
 }

-#endif // __ROB_IMPL_HH__
+#endif // __CPU_BETA_CPU_ROB_IMPL_HH__
--- a/cpu/ooo_cpu/ooo_cpu.hh
+++ b/cpu/ooo_cpu/ooo_cpu.hh
@ -122,7 +122,7 @@ class OoOCPU : public BaseCPU
    enum Status {
        Running,
        Idle,
-        IcacheMissStall,
+        IcacheMiss,
        IcacheMissComplete,
        DcacheMissStall,
        SwitchedOut
@ -161,6 +161,8 @@ class OoOCPU : public BaseCPU

    virtual ~OoOCPU();

+    void init();
+
  private:
    void copyFromXC();

@ -203,14 +205,21 @@ class OoOCPU : public BaseCPU
    // Will need to create a cache completion event upon any memory miss.
    ICacheCompletionEvent iCacheCompletionEvent;

+    class DCacheCompletionEvent;
+
+    typedef typename
+    std::list<DCacheCompletionEvent>::iterator DCacheCompEventIt;
+
    class DCacheCompletionEvent : public Event
    {
      private:
        OoOCPU *cpu;
        DynInstPtr inst;
+        DCacheCompEventIt dcceIt;

      public:
-        DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst);
+        DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst,
+                              DCacheCompEventIt &_dcceIt);

        virtual void process();
        virtual const char *description();
@ -218,6 +227,11 @@ class OoOCPU : public BaseCPU

    friend class DCacheCompletionEvent;

+  protected:
+    std::list<DCacheCompletionEvent> dCacheCompList;
+    DCacheCompEventIt dcceIt;
+
+  private:
    Status status() const { return _status; }

    virtual void activateContext(int thread_num, int delay);
@ -260,6 +274,8 @@ class OoOCPU : public BaseCPU

    void processICacheCompletion();

+  public:
+
    virtual void serialize(std::ostream &os);
    virtual void unserialize(Checkpoint *cp, const std::string &section);

@ -350,7 +366,7 @@ class OoOCPU : public BaseCPU

    void commitHeadInst();

-    bool grabInst();
+    bool getOneInst();

    Fault fetchCacheLine();

@ -471,6 +487,7 @@ class OoOCPU : public BaseCPU
    // ROB tracking stuff.
    DynInstPtr robHeadPtr;
    DynInstPtr robTailPtr;
+    unsigned robSize;
    unsigned robInsts;

    // List of outstanding EA instructions.
@ -545,10 +562,8 @@ OoOCPU<Impl>::read(Addr addr, T &data, unsigned flags, DynInstPtr inst)
        /*MemAccessResult result = */dcacheInterface->access(readReq);

        if (dcacheInterface->doEvents()) {
-            readReq->completionEvent = new DCacheCompletionEvent(this, inst);
-            lastDcacheStall = curTick;
-            unscheduleTickEvent();
-            _status = DcacheMissStall;
+            readReq->completionEvent = new DCacheCompletionEvent(this, inst,
+                                                                 dcceIt);
        }
    }

@ -579,7 +594,7 @@ OoOCPU<Impl>::write(T data, Addr addr, unsigned flags,
    writeReq->reset(addr, sizeof(T), flags);

    // translate to physical address
-    Fault fault = xc->translateDataWriteReq(writeReq);
+    Fault fault = translateDataWriteReq(writeReq);

    // do functional access
    if (fault == No_Fault)
@ -593,10 +608,8 @@ OoOCPU<Impl>::write(T data, Addr addr, unsigned flags,
        /*MemAccessResult result = */dcacheInterface->access(writeReq);

        if (dcacheInterface->doEvents()) {
-            writeReq->completionEvent = new DCacheCompletionEvent(this, inst);
-            lastDcacheStall = curTick;
-            unscheduleTickEvent();
-            _status = DcacheMissStall;
+            writeReq->completionEvent = new DCacheCompletionEvent(this, inst,
+                                                                  dcceIt);
        }
    }

--- a/cpu/static_inst.hh
+++ b/cpu/static_inst.hh
@ -41,16 +41,12 @@

 // forward declarations
 struct AlphaSimpleImpl;
-struct OoOImpl;
 class ExecContext;
 class DynInst;

 template <class Impl>
 class AlphaDynInst;

-template <class Impl>
-class OoODynInst;
-
 class FastCPU;
 class SimpleCPU;
 class InorderCPU;
@ -260,7 +256,7 @@ class StaticInst : public StaticInstBase
     * obtain the dependence info (numSrcRegs and srcRegIdx[]) for
     * just the EA computation.
     */
-    virtual
+    virtual const
    StaticInstPtr<ISA> &eaCompInst() const { return nullStaticInstPtr; }

    /**
@ -269,7 +265,7 @@ class StaticInst : public StaticInstBase
     * obtain the dependence info (numSrcRegs and srcRegIdx[]) for
     * just the memory access (not the EA computation).
     */
-    virtual
+    virtual const
    StaticInstPtr<ISA> &memAccInst() const { return nullStaticInstPtr; }

    /// The binary machine instruction.
--- a/kern/kernel_stats.hh
+++ b/kern/kernel_stats.hh
@ -41,6 +41,9 @@
 class BaseCPU;
 class ExecContext;
 class FnEvent;
+// What does kernel stats expect is included?
+class StaticInstBase;
+class System;
 enum Fault;

 namespace Kernel {
--- a/python/m5/objects/AlphaFullCPU.mpy
+++ b/python/m5/objects/AlphaFullCPU.mpy
@ -0,0 +1,79 @@
+from BaseCPU import BaseCPU
+
+simobj DerivAlphaFullCPU(BaseCPU):
+    type = 'DerivAlphaFullCPU'
+
+    numThreads = Param.Unsigned("number of HW thread contexts")
+
+    if not build_env['FULL_SYSTEM']:
+        mem = Param.FunctionalMemory(NULL, "memory")
+
+    decodeToFetchDelay = Param.Unsigned("Decode to fetch delay")
+    renameToFetchDelay = Param.Unsigned("Rename to fetch delay")
+    iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch "
+               "delay")
+    commitToFetchDelay = Param.Unsigned("Commit to fetch delay")
+    fetchWidth = Param.Unsigned("Fetch width")
+
+    renameToDecodeDelay = Param.Unsigned("Rename to decode delay")
+    iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode "
+               "delay")
+    commitToDecodeDelay = Param.Unsigned("Commit to decode delay")
+    fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay")
+    decodeWidth = Param.Unsigned("Decode width")
+
+    iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename "
+               "delay")
+    commitToRenameDelay = Param.Unsigned("Commit to rename delay")
+    decodeToRenameDelay = Param.Unsigned("Decode to rename delay")
+    renameWidth = Param.Unsigned("Rename width")
+
+    commitToIEWDelay = Param.Unsigned("Commit to "
+               "Issue/Execute/Writeback delay")
+    renameToIEWDelay = Param.Unsigned("Rename to "
+               "Issue/Execute/Writeback delay")
+    issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal "
+              "to the IEW stage)")
+    issueWidth = Param.Unsigned("Issue width")
+    executeWidth = Param.Unsigned("Execute width")
+    executeIntWidth = Param.Unsigned("Integer execute width")
+    executeFloatWidth = Param.Unsigned("Floating point execute width")
+    executeBranchWidth = Param.Unsigned("Branch execute width")
+    executeMemoryWidth = Param.Unsigned("Memory execute width")
+
+    iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit "
+               "delay")
+    renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay")
+    commitWidth = Param.Unsigned("Commit width")
+    squashWidth = Param.Unsigned("Squash width")
+
+    local_predictor_size = Param.Unsigned("Size of local predictor")
+    local_ctr_bits = Param.Unsigned("Bits per counter")
+    local_history_table_size = Param.Unsigned("Size of local history table")
+    local_history_bits = Param.Unsigned("Bits for the local history")
+    global_predictor_size = Param.Unsigned("Size of global predictor")
+    global_ctr_bits = Param.Unsigned("Bits per counter")
+    global_history_bits = Param.Unsigned("Bits of history")
+    choice_predictor_size = Param.Unsigned("Size of choice predictor")
+    choice_ctr_bits = Param.Unsigned("Bits of choice counters")
+
+    BTBEntries = Param.Unsigned("Number of BTB entries")
+    BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits")
+
+    RASSize = Param.Unsigned("RAS size")
+
+    LQEntries = Param.Unsigned("Number of load queue entries")
+    SQEntries = Param.Unsigned("Number of store queue entries")
+    LFSTSize = Param.Unsigned("Last fetched store table size")
+    SSITSize = Param.Unsigned("Store set ID table size")
+
+    numPhysIntRegs = Param.Unsigned("Number of physical integer registers")
+    numPhysFloatRegs = Param.Unsigned("Number of physical floating point "
+               "registers")
+    numIQEntries = Param.Unsigned("Number of instruction queue entries")
+    numROBEntries = Param.Unsigned("Number of reorder buffer entries")
+
+    instShiftAmt = Param.Unsigned("Number of bits to shift instructions by")
+
+    function_trace = Param.Bool(False, "Enable function trace")
+    function_trace_start = Param.Tick(0, "Cycle to start function trace")