From 61d95de4c886911fa0b7dc9d587ffe5b292b739e Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Tue, 3 May 2005 10:56:47 -0400 Subject: [PATCH] Large update of several parts of my code. The most notable change is the inclusion of a full-fledged load/store queue. At the moment it still has some issues running, but most of the code is hopefully close to the final version. SConscript: arch/isa_parser.py: cpu/base_dyn_inst.cc: Remove OOO CPU stuff. arch/alpha/faults.hh: Add fake memory fault. This will be removed eventually. arch/alpha/isa_desc: Change EA comp and Mem accessor to be const StaticInstPtrs. cpu/base_dyn_inst.hh: Update read/write calls to use load queue and store queue indices. cpu/beta_cpu/alpha_dyn_inst.hh: Change to const StaticInst in the register accessors. cpu/beta_cpu/alpha_dyn_inst_impl.hh: Update syscall code with thread numbers. cpu/beta_cpu/alpha_full_cpu.hh: Alter some of the full system code so it will compile without errors. cpu/beta_cpu/alpha_full_cpu_builder.cc: Created a DerivAlphaFullCPU class so I can instantiate different CPUs that have different template parameters. cpu/beta_cpu/alpha_full_cpu_impl.hh: Update some of the full system code so it compiles. cpu/beta_cpu/alpha_params.hh: cpu/beta_cpu/fetch_impl.hh: Remove asid. cpu/beta_cpu/comm.hh: Remove global history field. cpu/beta_cpu/commit.hh: Comment out rename map. cpu/beta_cpu/commit_impl.hh: Update some of the full system code so it compiles. Also change it so that it handles memory instructions properly. cpu/beta_cpu/cpu_policy.hh: Removed IQ from the IEW template parameter to make it more uniform. cpu/beta_cpu/decode.hh: Add debug function. cpu/beta_cpu/decode_impl.hh: Slight updates for decode in the case where it causes a squash. cpu/beta_cpu/fetch.hh: cpu/beta_cpu/rob.hh: Comment out unneccessary code. cpu/beta_cpu/full_cpu.cc: Changed some of the full system code so it compiles. Updated exec contexts and so forth to hopefully make multithreading easier. cpu/beta_cpu/full_cpu.hh: Updated some of the full system code to make it compile. cpu/beta_cpu/iew.cc: Removed IQ from template parameter to IEW. cpu/beta_cpu/iew.hh: Removed IQ from template parameter to IEW. Updated IEW to recognize the Load/Store queue. cpu/beta_cpu/iew_impl.hh: New handling of memory instructions through the Load/Store queue. cpu/beta_cpu/inst_queue.hh: Updated comment. cpu/beta_cpu/inst_queue_impl.hh: Slightly different handling of memory instructions due to Load/Store queue. cpu/beta_cpu/regfile.hh: Updated full system code so it compiles. cpu/beta_cpu/rob_impl.hh: Moved some code around; no major functional changes. cpu/ooo_cpu/ooo_cpu.hh: Slight updates to OOO CPU; still does not work. cpu/static_inst.hh: Remove OOO CPU stuff. Change ea comp and mem acc to return const StaticInst. kern/kernel_stats.hh: Extra forward declares added due to compile error. --HG-- extra : convert_revision : 594a7cdbe57f6c2bda7d08856fcd864604a6238e --- SConscript | 8 +- arch/alpha/faults.hh | 1 + arch/alpha/isa_desc | 12 +- arch/isa_parser.py | 3 - cpu/base_dyn_inst.cc | 7 - cpu/base_dyn_inst.hh | 13 +- cpu/beta_cpu/alpha_dyn_inst.hh | 21 +- cpu/beta_cpu/alpha_dyn_inst_impl.hh | 3 +- cpu/beta_cpu/alpha_full_cpu.hh | 32 +- cpu/beta_cpu/alpha_full_cpu_builder.cc | 61 ++-- cpu/beta_cpu/alpha_full_cpu_impl.hh | 432 +++---------------------- cpu/beta_cpu/alpha_params.hh | 2 +- cpu/beta_cpu/comm.hh | 7 +- cpu/beta_cpu/commit.hh | 3 - cpu/beta_cpu/commit_impl.hh | 36 +-- cpu/beta_cpu/cpu_policy.hh | 2 +- cpu/beta_cpu/decode.hh | 4 + cpu/beta_cpu/decode_impl.hh | 56 ++-- cpu/beta_cpu/fetch.hh | 7 - cpu/beta_cpu/fetch_impl.hh | 20 +- cpu/beta_cpu/full_cpu.cc | 84 +++-- cpu/beta_cpu/full_cpu.hh | 44 ++- cpu/beta_cpu/iew.cc | 2 +- cpu/beta_cpu/iew.hh | 46 ++- cpu/beta_cpu/iew_impl.hh | 233 ++++++++----- cpu/beta_cpu/inst_queue.hh | 2 +- cpu/beta_cpu/inst_queue_impl.hh | 125 ++++--- cpu/beta_cpu/regfile.hh | 353 ++++++++++---------- cpu/beta_cpu/rob.hh | 2 - cpu/beta_cpu/rob_impl.hh | 17 +- cpu/ooo_cpu/ooo_cpu.hh | 37 ++- cpu/static_inst.hh | 8 +- kern/kernel_stats.hh | 3 + python/m5/objects/AlphaFullCPU.mpy | 79 +++++ 34 files changed, 838 insertions(+), 927 deletions(-) create mode 100644 python/m5/objects/AlphaFullCPU.mpy diff --git a/SConscript b/SConscript index 525a94818..7b5b2a970 100644 --- a/SConscript +++ b/SConscript @@ -52,7 +52,6 @@ base_sources = Split(''' arch/alpha/full_cpu_exec.cc arch/alpha/faults.cc arch/alpha/isa_traits.cc - arch/alpha/ooo_cpu_exec.cc base/circlebuf.cc base/copyright.cc @@ -157,10 +156,6 @@ base_sources = Split(''' cpu/full_cpu/iq/seznec/iq_seznec.cc cpu/full_cpu/iq/standard/iq_standard.cc cpu/inorder_cpu/inorder_cpu.cc - cpu/ooo_cpu/ea_list.cc - cpu/ooo_cpu/ooo_cpu.cc - cpu/ooo_cpu/ooo_dyn_inst.cc - cpu/ooo_cpu/ooo_sim_obj.cc cpu/sampling_cpu/sampling_cpu.cc cpu/simple_cpu/simple_cpu.cc cpu/trace/reader/mem_trace_reader.cc @@ -402,8 +397,7 @@ env.Command(Split('''arch/alpha/decoder.cc arch/alpha/fast_cpu_exec.cc arch/alpha/simple_cpu_exec.cc arch/alpha/inorder_cpu_exec.cc - arch/alpha/full_cpu_exec.cc - arch/alpha/ooo_cpu_exec.cc'''), + arch/alpha/full_cpu_exec.cc'''), Split('''arch/alpha/isa_desc arch/isa_parser.py'''), '$SRCDIR/arch/isa_parser.py $SOURCE $TARGET.dir arch/alpha') diff --git a/arch/alpha/faults.hh b/arch/alpha/faults.hh index 45ac122dc..a49a1c4f0 100644 --- a/arch/alpha/faults.hh +++ b/arch/alpha/faults.hh @@ -47,6 +47,7 @@ enum Fault { Fen_Fault, // FP not-enabled fault Pal_Fault, // call_pal S/W interrupt Integer_Overflow_Fault, + Fake_Mem_Fault, Num_Faults // number of faults }; diff --git a/arch/alpha/isa_desc b/arch/alpha/isa_desc index 904af3ef0..0e07400d3 100644 --- a/arch/alpha/isa_desc +++ b/arch/alpha/isa_desc @@ -744,9 +744,9 @@ output header {{ /// Memory request flags. See mem_req_base.hh. unsigned memAccessFlags; /// Pointer to EAComp object. - StaticInstPtr eaCompPtr; + const StaticInstPtr eaCompPtr; /// Pointer to MemAcc object. - StaticInstPtr memAccPtr; + const StaticInstPtr memAccPtr; /// Constructor Memory(const char *mnem, MachInst _machInst, OpClass __opClass, @@ -762,8 +762,8 @@ output header {{ public: - StaticInstPtr &eaCompInst() { return eaCompPtr; } - StaticInstPtr &memAccInst() { return memAccPtr; } + const StaticInstPtr &eaCompInst() const { return eaCompPtr; } + const StaticInstPtr &memAccInst() const { return memAccPtr; } }; /** @@ -2539,9 +2539,9 @@ decode OPCODE default Unknown::unknown() { xc->syscall(); }}, IsNonSpeculative); // Read uniq reg into ABI return value register (r0) - 0x9e: rduniq({{ R0 = Runiq; }}, IsNonSpeculative); + 0x9e: rduniq({{ R0 = Runiq; }}); // Write uniq reg with value from ABI arg register (r16) - 0x9f: wruniq({{ Runiq = R16; }}, IsNonSpeculative); + 0x9f: wruniq({{ Runiq = R16; }}); } } #endif diff --git a/arch/isa_parser.py b/arch/isa_parser.py index 5e0267c9e..8187cf188 100755 --- a/arch/isa_parser.py +++ b/arch/isa_parser.py @@ -642,9 +642,6 @@ CpuModel('FullCPU', 'full_cpu_exec.cc', CpuModel('AlphaFullCPU', 'alpha_full_cpu_exec.cc', '#include "cpu/beta_cpu/alpha_dyn_inst.hh"', { 'CPU_exec_context': 'AlphaDynInst' }) -CpuModel('OoOCPU', 'ooo_cpu_exec.cc', - '#include "cpu/ooo_cpu/ooo_dyn_inst.hh"', - { 'CPU_exec_context': 'OoODynInst' }) # Expand template with CPU-specific references into a dictionary with # an entry for each CPU model name. The entry key is the model name diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc index b8424f576..ecfe5a4b0 100644 --- a/cpu/base_dyn_inst.cc +++ b/cpu/base_dyn_inst.cc @@ -43,8 +43,6 @@ #include "cpu/base_dyn_inst.hh" #include "cpu/beta_cpu/alpha_impl.hh" #include "cpu/beta_cpu/alpha_full_cpu.hh" -#include "cpu/ooo_cpu/ooo_impl.hh" -#include "cpu/ooo_cpu/ooo_cpu.hh" using namespace std; @@ -384,14 +382,9 @@ BaseDynInst::eaSrcsReady() // Forward declaration... template class BaseDynInst; -template class BaseDynInst; template <> int BaseDynInst::instcount = 0; -template <> -int -BaseDynInst::instcount = 0; - #endif // __CPU_BASE_DYN_INST_CC__ diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh index 943293b25..509874fad 100644 --- a/cpu/base_dyn_inst.hh +++ b/cpu/base_dyn_inst.hh @@ -404,6 +404,10 @@ class BaseDynInst : public FastAlloc, public RefCounted const Addr &getEA() const { return instEffAddr; } bool doneEACalc() { return eaCalcDone; } bool eaSrcsReady(); + + public: + int16_t lqIdx; + int16_t sqIdx; }; template @@ -419,6 +423,7 @@ BaseDynInst::read(Addr addr, T &data, unsigned flags) // Record key MemReq parameters so we can generate another one // just like it for the timing access without calling translate() // again (which might mess up the TLB). + // Do I ever really need this? -KTL 3/05 effAddr = req->vaddr; physEffAddr = req->paddr; memReqFlags = req->flags; @@ -433,7 +438,7 @@ BaseDynInst::read(Addr addr, T &data, unsigned flags) #endif if (fault == No_Fault) { - fault = cpu->read(req, data); + fault = cpu->read(req, data, lqIdx); } else { // Return a fixed value to keep simulation deterministic even @@ -459,8 +464,8 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) traceData->setData(data); } - storeSize = sizeof(T); - storeData = data; +// storeSize = sizeof(T); +// storeData = data; MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags); @@ -485,7 +490,7 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) #endif if (fault == No_Fault) { - fault = cpu->write(req, data); + fault = cpu->write(req, data, sqIdx); } if (res) { diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh index b2f0d703e..d34fa071c 100644 --- a/cpu/beta_cpu/alpha_dyn_inst.hh +++ b/cpu/beta_cpu/alpha_dyn_inst.hh @@ -47,11 +47,10 @@ class AlphaDynInst : public BaseDynInst /** BaseDynInst constructor given a static inst pointer. */ AlphaDynInst(StaticInstPtr &_staticInst); - /** Executes the instruction. Why the hell did I put this here? */ + /** Executes the instruction.*/ Fault execute() { - this->fault = this->staticInst->execute(this, this->traceData); - return this->fault; + return this->fault = this->staticInst->execute(this, this->traceData); } public: @@ -105,47 +104,47 @@ class AlphaDynInst : public BaseDynInst // storage (which is pretty hard to imagine they would have reason // to do). - uint64_t readIntReg(StaticInst *si, int idx) + uint64_t readIntReg(const StaticInst *si, int idx) { return this->cpu->readIntReg(_srcRegIdx[idx]); } - float readFloatRegSingle(StaticInst *si, int idx) + float readFloatRegSingle(const StaticInst *si, int idx) { return this->cpu->readFloatRegSingle(_srcRegIdx[idx]); } - double readFloatRegDouble(StaticInst *si, int idx) + double readFloatRegDouble(const StaticInst *si, int idx) { return this->cpu->readFloatRegDouble(_srcRegIdx[idx]); } - uint64_t readFloatRegInt(StaticInst *si, int idx) + uint64_t readFloatRegInt(const StaticInst *si, int idx) { return this->cpu->readFloatRegInt(_srcRegIdx[idx]); } /** @todo: Make results into arrays so they can handle multiple dest * registers. */ - void setIntReg(StaticInst *si, int idx, uint64_t val) + void setIntReg(const StaticInst *si, int idx, uint64_t val) { this->cpu->setIntReg(_destRegIdx[idx], val); this->instResult.integer = val; } - void setFloatRegSingle(StaticInst *si, int idx, float val) + void setFloatRegSingle(const StaticInst *si, int idx, float val) { this->cpu->setFloatRegSingle(_destRegIdx[idx], val); this->instResult.fp = val; } - void setFloatRegDouble(StaticInst *si, int idx, double val) + void setFloatRegDouble(const StaticInst *si, int idx, double val) { this->cpu->setFloatRegDouble(_destRegIdx[idx], val); this->instResult.dbl = val; } - void setFloatRegInt(StaticInst *si, int idx, uint64_t val) + void setFloatRegInt(const StaticInst *si, int idx, uint64_t val) { this->cpu->setFloatRegInt(_destRegIdx[idx], val); this->instResult.integer = val; diff --git a/cpu/beta_cpu/alpha_dyn_inst_impl.hh b/cpu/beta_cpu/alpha_dyn_inst_impl.hh index 4a3ae99d4..3f530e182 100644 --- a/cpu/beta_cpu/alpha_dyn_inst_impl.hh +++ b/cpu/beta_cpu/alpha_dyn_inst_impl.hh @@ -129,7 +129,8 @@ template void AlphaDynInst::syscall() { - this->cpu->syscall(); + this->cpu->syscall(this->threadNumber); +// this->cpu->syscall(); } #endif diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh index 3c29dd277..065b2fc4e 100644 --- a/cpu/beta_cpu/alpha_full_cpu.hh +++ b/cpu/beta_cpu/alpha_full_cpu.hh @@ -28,8 +28,6 @@ class AlphaFullCPU : public FullBetaCPU void regStats(); #ifdef FULL_SYSTEM - bool inPalMode(); - //Note that the interrupt stuff from the base CPU might be somewhat //ISA specific (ie NumInterruptLevels). These functions might not //be needed in FullCPU though. @@ -106,13 +104,16 @@ class AlphaFullCPU : public FullBetaCPU } #ifdef FULL_SYSTEM - uint64_t *getIPR(); + uint64_t *getIpr(); uint64_t readIpr(int idx, Fault &fault); Fault setIpr(int idx, uint64_t val); int readIntrFlag(); void setIntrFlag(int val); Fault hwrei(); - bool inPalMode(); + bool inPalMode() { return AlphaISA::PcPAL(this->regFile.readPC()); } + bool inPalMode(uint64_t PC) + { return AlphaISA::PcPAL(PC); } + void trap(Fault fault); bool simPalCheck(int palFunc); @@ -153,7 +154,7 @@ class AlphaFullCPU : public FullBetaCPU } } - void syscall(); + void syscall(short thread_num); void squashStages(); #endif @@ -168,11 +169,13 @@ class AlphaFullCPU : public FullBetaCPU // Not sure this is used anywhere. void intr_post(RegFile *regs, Fault fault, Addr pc); // Actually used within exec files. Implement properly. - void swap_palshadow(RegFile *regs, bool use_shadow); + void swapPALShadow(bool use_shadow); // Called by CPU constructor. Can implement as I please. void initCPU(RegFile *regs); // Called by initCPU. Implement as I please. void initIPRs(RegFile *regs); + + void halt() { panic("Halt not implemented!\n"); } #endif @@ -193,6 +196,11 @@ class AlphaFullCPU : public FullBetaCPU return error; } + template + Fault read(MemReqPtr &req, T &data, int load_idx) + { + return this->iew.ldstQueue.read(req, data, load_idx); + } template Fault write(MemReqPtr &req, T &data) @@ -218,7 +226,7 @@ class AlphaFullCPU : public FullBetaCPU std::cerr << "Warning: " << req->xc->storeCondFailures << " consecutive store conditional failures " - << "on cpu " << cpu_id + << "on cpu " << this->cpu_id << std::endl; } return No_Fault; @@ -232,8 +240,8 @@ class AlphaFullCPU : public FullBetaCPU // and all other stores (WH64?). Unsuccessful Store // Conditionals would have returned above, and wouldn't fall // through. - for (int i = 0; i < system->execContexts.size(); i++){ - cregs = &system->execContexts[i]->regs.miscRegs; + for (int i = 0; i < this->system->execContexts.size(); i++){ + cregs = &this->system->execContexts[i]->regs.miscRegs; if ((cregs->lock_addr & ~0xf) == (req->paddr & ~0xf)) { cregs->lock_flag = false; } @@ -244,6 +252,12 @@ class AlphaFullCPU : public FullBetaCPU return this->mem->write(req, (T)htoa(data)); } + template + Fault write(MemReqPtr &req, T &data, int store_idx) + { + return this->iew.ldstQueue.write(req, data, store_idx); + } + }; #endif // __CPU_BETA_CPU_ALPHA_FULL_CPU_HH__ diff --git a/cpu/beta_cpu/alpha_full_cpu_builder.cc b/cpu/beta_cpu/alpha_full_cpu_builder.cc index cf9536cb8..dc5b1aad1 100644 --- a/cpu/beta_cpu/alpha_full_cpu_builder.cc +++ b/cpu/beta_cpu/alpha_full_cpu_builder.cc @@ -33,8 +33,17 @@ #include "mem/functional_mem/functional_memory.hh" #endif // FULL_SYSTEM -BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU) +class DerivAlphaFullCPU : public AlphaFullCPU +{ + public: + DerivAlphaFullCPU(AlphaSimpleParams p) + : AlphaFullCPU(p) + { } +}; +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) + + Param cycle_time; Param numThreads; #ifdef FULL_SYSTEM @@ -44,8 +53,6 @@ SimObjectParam dtb; Param mult; #else SimObjectVectorParam workload; -SimObjectParam process; -Param asid; #endif // FULL_SYSTEM SimObjectParam mem; @@ -120,23 +127,25 @@ Param numROBEntries; Param instShiftAmt; -Param defReg; +Param defer_registration; -END_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU) +Param function_trace; +Param function_trace_start; -BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU) +END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) + + INIT_PARAM(cycle_time, "cpu cycle time"), INIT_PARAM(numThreads, "number of HW thread contexts"), #ifdef FULL_SYSTEM INIT_PARAM(system, "System object"), INIT_PARAM(itb, "Instruction translation buffer"), INIT_PARAM(dtb, "Data translation buffer"), - INIT_PARAM_DFLT(mult, "System clock multiplier", 1), + INIT_PARAM(mult, "System clock multiplier"), #else INIT_PARAM(workload, "Processes to run"), - INIT_PARAM_DFLT(process, "Process to run", NULL), - INIT_PARAM(asid, "Address space ID"), #endif // FULL_SYSTEM INIT_PARAM_DFLT(mem, "Memory", NULL), @@ -230,14 +239,16 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU) INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), - INIT_PARAM(defReg, "Defer registration") + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") -END_INIT_SIM_OBJECT_PARAMS(BaseFullCPU) +END_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) -CREATE_SIM_OBJECT(BaseFullCPU) +CREATE_SIM_OBJECT(DerivAlphaFullCPU) { - AlphaFullCPU *cpu; + DerivAlphaFullCPU *cpu; #ifdef FULL_SYSTEM if (mult != 1) @@ -255,30 +266,21 @@ CREATE_SIM_OBJECT(BaseFullCPU) fatal("Must specify at least one workload!"); } - Process *actual_process; - - if (process == NULL) { - actual_process = workload[0]; - } else { - actual_process = process; - } - #endif AlphaSimpleParams params; + params.cycleTime = cycle_time; + params.name = getInstanceName(); params.numberOfThreads = actual_num_threads; #ifdef FULL_SYSTEM - params._system = system; + params.system = system; params.itb = itb; params.dtb = dtb; - params.freq = ticksPerSecond * mult; #else params.workload = workload; - params.process = actual_process; - params.asid = asid; #endif // FULL_SYSTEM params.mem = mem; @@ -356,12 +358,15 @@ CREATE_SIM_OBJECT(BaseFullCPU) params.instShiftAmt = 2; - params.defReg = defReg; + params.defReg = defer_registration; - cpu = new AlphaFullCPU(params); + params.functionTrace = function_trace; + params.functionTraceStart = function_trace_start; + + cpu = new DerivAlphaFullCPU(params); return cpu; } -REGISTER_SIM_OBJECT("AlphaFullCPU", BaseFullCPU) +REGISTER_SIM_OBJECT("DerivAlphaFullCPU", DerivAlphaFullCPU) diff --git a/cpu/beta_cpu/alpha_full_cpu_impl.hh b/cpu/beta_cpu/alpha_full_cpu_impl.hh index fccded193..c42e9e362 100644 --- a/cpu/beta_cpu/alpha_full_cpu_impl.hh +++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh @@ -12,6 +12,14 @@ #include "cpu/beta_cpu/alpha_params.hh" #include "cpu/beta_cpu/comm.hh" +#ifdef FULL_SYSTEM +#include "arch/alpha/osfpal.hh" +#include "arch/alpha/isa_traits.hh" +//#include "arch/alpha/ev5.hh" + +//using namespace EV5; +#endif + template AlphaFullCPU::AlphaFullCPU(Params ¶ms) : FullBetaCPU(params) @@ -42,9 +50,12 @@ AlphaFullCPU::regStats() #ifndef FULL_SYSTEM +// Will probably need to know which thread is calling syscall +// Will need to pass that information in to the DynInst when it is constructed, +// so that this call can be made with the proper thread number. template void -AlphaFullCPU::syscall() +AlphaFullCPU::syscall(short thread_num) { DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n"); @@ -60,7 +71,8 @@ AlphaFullCPU::syscall() // Copy over all important state to xc once all the unrolling is done. copyToXC(); - this->process->syscall(this->xc); + this->thread[0]->syscall(); +// this->thread[thread_num]->syscall(); // Copy over all important state back to CPU. copyFromXC(); @@ -102,6 +114,8 @@ AlphaFullCPU::squashStages() this->iew.squash(); this->iewQueue.advance(); this->iewQueue.advance(); + // Needs to tell the LSQ to write back all of its data + this->iew.lsqWriteback(); this->rob.squash(rob_head); this->commit.setSquashing(); @@ -203,390 +217,35 @@ template uint64_t * AlphaFullCPU::getIpr() { - return regFile.getIpr(); + return this->regFile.getIpr(); } template uint64_t AlphaFullCPU::readIpr(int idx, Fault &fault) { - uint64_t *ipr = getIpr(); - uint64_t retval = 0; // return value, default 0 - - switch (idx) { - case AlphaISA::IPR_PALtemp0: - case AlphaISA::IPR_PALtemp1: - case AlphaISA::IPR_PALtemp2: - case AlphaISA::IPR_PALtemp3: - case AlphaISA::IPR_PALtemp4: - case AlphaISA::IPR_PALtemp5: - case AlphaISA::IPR_PALtemp6: - case AlphaISA::IPR_PALtemp7: - case AlphaISA::IPR_PALtemp8: - case AlphaISA::IPR_PALtemp9: - case AlphaISA::IPR_PALtemp10: - case AlphaISA::IPR_PALtemp11: - case AlphaISA::IPR_PALtemp12: - case AlphaISA::IPR_PALtemp13: - case AlphaISA::IPR_PALtemp14: - case AlphaISA::IPR_PALtemp15: - case AlphaISA::IPR_PALtemp16: - case AlphaISA::IPR_PALtemp17: - case AlphaISA::IPR_PALtemp18: - case AlphaISA::IPR_PALtemp19: - case AlphaISA::IPR_PALtemp20: - case AlphaISA::IPR_PALtemp21: - case AlphaISA::IPR_PALtemp22: - case AlphaISA::IPR_PALtemp23: - case AlphaISA::IPR_PAL_BASE: - - case AlphaISA::IPR_IVPTBR: - case AlphaISA::IPR_DC_MODE: - case AlphaISA::IPR_MAF_MODE: - case AlphaISA::IPR_ISR: - case AlphaISA::IPR_EXC_ADDR: - case AlphaISA::IPR_IC_PERR_STAT: - case AlphaISA::IPR_DC_PERR_STAT: - case AlphaISA::IPR_MCSR: - case AlphaISA::IPR_ASTRR: - case AlphaISA::IPR_ASTER: - case AlphaISA::IPR_SIRR: - case AlphaISA::IPR_ICSR: - case AlphaISA::IPR_ICM: - case AlphaISA::IPR_DTB_CM: - case AlphaISA::IPR_IPLR: - case AlphaISA::IPR_INTID: - case AlphaISA::IPR_PMCTR: - // no side-effect - retval = ipr[idx]; - break; - - case AlphaISA::IPR_CC: - retval |= ipr[idx] & ULL(0xffffffff00000000); - retval |= curTick & ULL(0x00000000ffffffff); - break; - - case AlphaISA::IPR_VA: - retval = ipr[idx]; - break; - - case AlphaISA::IPR_VA_FORM: - case AlphaISA::IPR_MM_STAT: - case AlphaISA::IPR_IFAULT_VA_FORM: - case AlphaISA::IPR_EXC_MASK: - case AlphaISA::IPR_EXC_SUM: - retval = ipr[idx]; - break; - - case AlphaISA::IPR_DTB_PTE: - { - AlphaISA::PTE &pte = dtb->index(!misspeculating()); - - retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32; - retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8; - retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12; - retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1; - retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2; - retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4; - retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57; - } - break; - - // write only registers - case AlphaISA::IPR_HWINT_CLR: - case AlphaISA::IPR_SL_XMIT: - case AlphaISA::IPR_DC_FLUSH: - case AlphaISA::IPR_IC_FLUSH: - case AlphaISA::IPR_ALT_MODE: - case AlphaISA::IPR_DTB_IA: - case AlphaISA::IPR_DTB_IAP: - case AlphaISA::IPR_ITB_IA: - case AlphaISA::IPR_ITB_IAP: - fault = Unimplemented_Opcode_Fault; - break; - - default: - // invalid IPR - fault = Unimplemented_Opcode_Fault; - break; - } - - return retval; + return this->regFile.readIpr(idx, fault); } template Fault AlphaFullCPU::setIpr(int idx, uint64_t val) { - uint64_t *ipr = getIpr(); - uint64_t old; - - if (misspeculating()) - return No_Fault; - - switch (idx) { - case AlphaISA::IPR_PALtemp0: - case AlphaISA::IPR_PALtemp1: - case AlphaISA::IPR_PALtemp2: - case AlphaISA::IPR_PALtemp3: - case AlphaISA::IPR_PALtemp4: - case AlphaISA::IPR_PALtemp5: - case AlphaISA::IPR_PALtemp6: - case AlphaISA::IPR_PALtemp7: - case AlphaISA::IPR_PALtemp8: - case AlphaISA::IPR_PALtemp9: - case AlphaISA::IPR_PALtemp10: - case AlphaISA::IPR_PALtemp11: - case AlphaISA::IPR_PALtemp12: - case AlphaISA::IPR_PALtemp13: - case AlphaISA::IPR_PALtemp14: - case AlphaISA::IPR_PALtemp15: - case AlphaISA::IPR_PALtemp16: - case AlphaISA::IPR_PALtemp17: - case AlphaISA::IPR_PALtemp18: - case AlphaISA::IPR_PALtemp19: - case AlphaISA::IPR_PALtemp20: - case AlphaISA::IPR_PALtemp21: - case AlphaISA::IPR_PALtemp22: - case AlphaISA::IPR_PAL_BASE: - case AlphaISA::IPR_IC_PERR_STAT: - case AlphaISA::IPR_DC_PERR_STAT: - case AlphaISA::IPR_PMCTR: - // write entire quad w/ no side-effect - ipr[idx] = val; - break; - - case AlphaISA::IPR_CC_CTL: - // This IPR resets the cycle counter. We assume this only - // happens once... let's verify that. - assert(ipr[idx] == 0); - ipr[idx] = 1; - break; - - case AlphaISA::IPR_CC: - // This IPR only writes the upper 64 bits. It's ok to write - // all 64 here since we mask out the lower 32 in rpcc (see - // isa_desc). - ipr[idx] = val; - break; - - case AlphaISA::IPR_PALtemp23: - // write entire quad w/ no side-effect - old = ipr[idx]; - ipr[idx] = val; - kernelStats.context(old, val); - break; - - case AlphaISA::IPR_DTB_PTE: - // write entire quad w/ no side-effect, tag is forthcoming - ipr[idx] = val; - break; - - case AlphaISA::IPR_EXC_ADDR: - // second least significant bit in PC is always zero - ipr[idx] = val & ~2; - break; - - case AlphaISA::IPR_ASTRR: - case AlphaISA::IPR_ASTER: - // only write least significant four bits - privilege mask - ipr[idx] = val & 0xf; - break; - - case AlphaISA::IPR_IPLR: -#ifdef DEBUG - if (break_ipl != -1 && break_ipl == (val & 0x1f)) - debug_break(); -#endif - - // only write least significant five bits - interrupt level - ipr[idx] = val & 0x1f; - kernelStats.swpipl(ipr[idx]); - break; - - case AlphaISA::IPR_DTB_CM: - kernelStats.mode((val & 0x18) != 0); - - case AlphaISA::IPR_ICM: - // only write two mode bits - processor mode - ipr[idx] = val & 0x18; - break; - - case AlphaISA::IPR_ALT_MODE: - // only write two mode bits - processor mode - ipr[idx] = val & 0x18; - break; - - case AlphaISA::IPR_MCSR: - // more here after optimization... - ipr[idx] = val; - break; - - case AlphaISA::IPR_SIRR: - // only write software interrupt mask - ipr[idx] = val & 0x7fff0; - break; - - case AlphaISA::IPR_ICSR: - ipr[idx] = val & ULL(0xffffff0300); - break; - - case AlphaISA::IPR_IVPTBR: - case AlphaISA::IPR_MVPTBR: - ipr[idx] = val & ULL(0xffffffffc0000000); - break; - - case AlphaISA::IPR_DC_TEST_CTL: - ipr[idx] = val & 0x1ffb; - break; - - case AlphaISA::IPR_DC_MODE: - case AlphaISA::IPR_MAF_MODE: - ipr[idx] = val & 0x3f; - break; - - case AlphaISA::IPR_ITB_ASN: - ipr[idx] = val & 0x7f0; - break; - - case AlphaISA::IPR_DTB_ASN: - ipr[idx] = val & ULL(0xfe00000000000000); - break; - - case AlphaISA::IPR_EXC_SUM: - case AlphaISA::IPR_EXC_MASK: - // any write to this register clears it - ipr[idx] = 0; - break; - - case AlphaISA::IPR_INTID: - case AlphaISA::IPR_SL_RCV: - case AlphaISA::IPR_MM_STAT: - case AlphaISA::IPR_ITB_PTE_TEMP: - case AlphaISA::IPR_DTB_PTE_TEMP: - // read-only registers - return Unimplemented_Opcode_Fault; - - case AlphaISA::IPR_HWINT_CLR: - case AlphaISA::IPR_SL_XMIT: - case AlphaISA::IPR_DC_FLUSH: - case AlphaISA::IPR_IC_FLUSH: - // the following are write only - ipr[idx] = val; - break; - - case AlphaISA::IPR_DTB_IA: - // really a control write - ipr[idx] = 0; - - dtb->flushAll(); - break; - - case AlphaISA::IPR_DTB_IAP: - // really a control write - ipr[idx] = 0; - - dtb->flushProcesses(); - break; - - case AlphaISA::IPR_DTB_IS: - // really a control write - ipr[idx] = val; - - dtb->flushAddr(val, DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN])); - break; - - case AlphaISA::IPR_DTB_TAG: { - struct AlphaISA::PTE pte; - - // FIXME: granularity hints NYI... - if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0) - panic("PTE GH field != 0"); - - // write entire quad - ipr[idx] = val; - - // construct PTE for new entry - pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]); - pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]); - pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]); - pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]); - pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]); - pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]); - pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]); - - // insert new TAG/PTE value into data TLB - dtb->insert(val, pte); - } - break; - - case AlphaISA::IPR_ITB_PTE: { - struct AlphaISA::PTE pte; - - // FIXME: granularity hints NYI... - if (ITB_PTE_GH(val) != 0) - panic("PTE GH field != 0"); - - // write entire quad - ipr[idx] = val; - - // construct PTE for new entry - pte.ppn = ITB_PTE_PPN(val); - pte.xre = ITB_PTE_XRE(val); - pte.xwe = 0; - pte.fonr = ITB_PTE_FONR(val); - pte.fonw = ITB_PTE_FONW(val); - pte.asma = ITB_PTE_ASMA(val); - pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]); - - // insert new TAG/PTE value into data TLB - itb->insert(ipr[AlphaISA::IPR_ITB_TAG], pte); - } - break; - - case AlphaISA::IPR_ITB_IA: - // really a control write - ipr[idx] = 0; - - itb->flushAll(); - break; - - case AlphaISA::IPR_ITB_IAP: - // really a control write - ipr[idx] = 0; - - itb->flushProcesses(); - break; - - case AlphaISA::IPR_ITB_IS: - // really a control write - ipr[idx] = val; - - itb->flushAddr(val, ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN])); - break; - - default: - // invalid IPR - return Unimplemented_Opcode_Fault; - } - - // no error... - return No_Fault; - + return this->regFile.setIpr(idx, val); } template int AlphaFullCPU::readIntrFlag() { - return regs.intrflag; + return this->regFile.readIntrFlag(); } template void AlphaFullCPU::setIntrFlag(int val) { - regs.intrflag = val; + this->regFile.setIntrFlag(val); } // Can force commit stage to squash and stuff. @@ -596,36 +255,27 @@ AlphaFullCPU::hwrei() { uint64_t *ipr = getIpr(); - if (!PC_PAL(regs.pc)) + if (!inPalMode()) return Unimplemented_Opcode_Fault; setNextPC(ipr[AlphaISA::IPR_EXC_ADDR]); - if (!misspeculating()) { - kernelStats.hwrei(); +// kernelStats.hwrei(); - if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0) - AlphaISA::swap_palshadow(®s, false); + if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0) +// AlphaISA::swap_palshadow(®s, false); - AlphaISA::check_interrupts = true; - } + this->checkInterrupts = true; // FIXME: XXX check for interrupts? XXX return No_Fault; } -template -bool -AlphaFullCPU::inPalMode() -{ - return PC_PAL(readPC()); -} - template bool AlphaFullCPU::simPalCheck(int palFunc) { - kernelStats.callpal(palFunc); +// kernelStats.callpal(palFunc); switch (palFunc) { case PAL::halt: @@ -636,7 +286,7 @@ AlphaFullCPU::simPalCheck(int palFunc) case PAL::bpt: case PAL::bugchk: - if (system->breakpoint()) + if (this->system->breakpoint()) return false; break; } @@ -651,21 +301,22 @@ template void AlphaFullCPU::trap(Fault fault) { - uint64_t PC = commit.readPC(); + // Keep in mind that a trap may be initiated by fetch if there's a TLB + // miss + uint64_t PC = this->commit.readCommitPC(); DPRINTF(Fault, "Fault %s\n", FaultName(fault)); - Stats::recordEvent(csprintf("Fault %s", FaultName(fault))); + this->recordEvent(csprintf("Fault %s", FaultName(fault))); - assert(!misspeculating()); - kernelStats.fault(fault); +// kernelStats.fault(fault); if (fault == Arithmetic_Fault) panic("Arithmetic traps are unimplemented!"); - AlphaISA::InternalProcReg *ipr = getIpr(); + typename AlphaISA::InternalProcReg *ipr = getIpr(); // exception restart address - Get the commit PC - if (fault != Interrupt_Fault || !PC_PAL(PC)) + if (fault != Interrupt_Fault || !inPalMode(PC)) ipr[AlphaISA::IPR_EXC_ADDR] = PC; if (fault == Pal_Fault || fault == Arithmetic_Fault /* || @@ -674,11 +325,12 @@ AlphaFullCPU::trap(Fault fault) ipr[AlphaISA::IPR_EXC_ADDR] += 4; } - if (!PC_PAL(PC)) - AlphaISA::swap_palshadow(®s, true); + if (!inPalMode(PC)) + swapPALShadow(true); - setPC( ipr[AlphaISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault] ); - setNextPC(PC + sizeof(MachInst)); + this->regFile.setPC( ipr[AlphaISA::IPR_PAL_BASE] + + AlphaISA::fault_addr[fault] ); + this->regFile.setNextPC(PC + sizeof(MachInst)); } template @@ -694,7 +346,7 @@ AlphaFullCPU::processInterrupts() // same logical index. template void -AlphaFullCPU::swap_palshadow(RegFile *regs, bool use_shadow) +AlphaFullCPU::swapPALShadow(bool use_shadow) { if (palShadowEnabled == use_shadow) panic("swap_palshadow: wrong PAL shadow state"); @@ -703,6 +355,7 @@ AlphaFullCPU::swap_palshadow(RegFile *regs, bool use_shadow) // Will have to lookup in rename map to get physical registers, then // swap. +/* for (int i = 0; i < AlphaISA::NumIntRegs; i++) { if (reg_redir[i]) { AlphaISA::IntReg temp = regs->intRegFile[i]; @@ -710,6 +363,7 @@ AlphaFullCPU::swap_palshadow(RegFile *regs, bool use_shadow) regs->palregs[i] = temp; } } +*/ } #endif // FULL_SYSTEM diff --git a/cpu/beta_cpu/alpha_params.hh b/cpu/beta_cpu/alpha_params.hh index ecde4b016..fb3468098 100644 --- a/cpu/beta_cpu/alpha_params.hh +++ b/cpu/beta_cpu/alpha_params.hh @@ -20,12 +20,12 @@ class MemInterface; class AlphaSimpleParams : public BaseFullCPU::Params { public: + #ifdef FULL_SYSTEM AlphaITB *itb; AlphaDTB *dtb; #else std::vector workload; Process *process; - short asid; #endif // FULL_SYSTEM FunctionalMemory *mem; diff --git a/cpu/beta_cpu/comm.hh b/cpu/beta_cpu/comm.hh index c0afe3d1b..18f76d921 100644 --- a/cpu/beta_cpu/comm.hh +++ b/cpu/beta_cpu/comm.hh @@ -50,7 +50,6 @@ struct SimpleIEWSimpleCommit { bool branchTaken; uint64_t mispredPC; uint64_t nextPC; - unsigned globalHist; InstSeqNum squashedSeqNum; }; @@ -78,7 +77,6 @@ struct TimeBufStruct { bool branchTaken; uint64_t mispredPC; uint64_t nextPC; - unsigned globalHist; }; decodeComm decodeInfo; @@ -113,12 +111,11 @@ struct TimeBufStruct { bool branchTaken; uint64_t mispredPC; uint64_t nextPC; - unsigned globalHist; // Think of better names here. // Will need to be a variety of sizes... // Maybe make it a vector, that way only need one object. - std::vector freeRegs; +// std::vector freeRegs; bool robSquashing; @@ -129,7 +126,7 @@ struct TimeBufStruct { // Extra bits of information so that the LDSTQ only updates when it // needs to. - bool commitIsStore; +// bool commitIsStore; bool commitIsLoad; // Communication specifically to the IQ to tell the IQ that it can diff --git a/cpu/beta_cpu/commit.hh b/cpu/beta_cpu/commit.hh index 731307bf7..c04dc8085 100644 --- a/cpu/beta_cpu/commit.hh +++ b/cpu/beta_cpu/commit.hh @@ -113,9 +113,6 @@ class SimpleCommit /** Pointer to FullCPU. */ FullCPU *cpu; - /** Pointer to the rename map. DO NOT USE if possible. */ -// typename Impl::CPUPol::RenameMap *renameMap; - //Store buffer interface? Will need to move committed stores to the //store buffer diff --git a/cpu/beta_cpu/commit_impl.hh b/cpu/beta_cpu/commit_impl.hh index 3e97b980c..17ede9694 100644 --- a/cpu/beta_cpu/commit_impl.hh +++ b/cpu/beta_cpu/commit_impl.hh @@ -166,9 +166,9 @@ SimpleCommit::commit() // hwrei() is what resets the PC to the place where instruction execution // beings again. #ifdef FULL_SYSTEM - if (ISA::check_interrupts && + if (//checkInterrupts && cpu->check_interrupts() && - !xc->inPalMode()) { + !cpu->inPalMode(readCommitPC())) { // Will need to squash all instructions currently in flight and have // the interrupt handler restart at the last non-committed inst. // Most of that can be handled through the trap() function. The @@ -215,8 +215,6 @@ SimpleCommit::commit() toIEW->commitInfo.mispredPC = fromIEW->mispredPC; - toIEW->commitInfo.globalHist = fromIEW->globalHist; - if (toIEW->commitInfo.branchMispredict) { ++branchMispredicts; } @@ -257,6 +255,9 @@ SimpleCommit::commitInsts() // Can't commit and squash things at the same time... //////////////////////////////////// + if (rob->isEmpty()) + return; + DynInstPtr head_inst = rob->readHeadInst(); unsigned num_committed = 0; @@ -275,9 +276,11 @@ SimpleCommit::commitInsts() if (head_inst->isSquashed()) { // Hack to avoid the instruction being retired (and deleted) if // it hasn't been through the IEW stage yet. +/* if (!head_inst->isExecuted()) { break; } +*/ DPRINTF(Commit, "Commit: Retiring squashed instruction from " "ROB.\n"); @@ -341,7 +344,7 @@ SimpleCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) // and committed this instruction. cpu->funcExeInst--; - if (head_inst->isStore() || head_inst->isNonSpeculative()) { + if (head_inst->isNonSpeculative()) { DPRINTF(Commit, "Commit: Encountered a store or non-speculative " "instruction at the head of the ROB, PC %#x.\n", head_inst->readPC()); @@ -376,12 +379,14 @@ SimpleCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) } // Check if the instruction caused a fault. If so, trap. - if (head_inst->getFault() != No_Fault) { + Fault inst_fault = head_inst->getFault(); + + if (inst_fault != No_Fault && inst_fault != Fake_Mem_Fault) { if (!head_inst->isNop()) { #ifdef FULL_SYSTEM - cpu->trap(fault); + cpu->trap(inst_fault); #else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", head_inst->getFault(), + panic("fault (%d) detected @ PC %08p", inst_fault, head_inst->PC); #endif // FULL_SYSTEM } @@ -390,7 +395,7 @@ SimpleCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) // Check if we're really ready to commit. If not then return false. // I'm pretty sure all instructions should be able to commit if they've // reached this far. For now leave this in as a check. - if(!rob->isHeadReady()) { + if (!rob->isHeadReady()) { panic("Commit: Unable to commit head instruction!\n"); return false; } @@ -413,17 +418,7 @@ SimpleCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) ++commitCommittedBranches; } - #if 0 - // Check if the instruction has a destination register. - // If so add the previous physical register of its logical register's - // destination to the free list through the time buffer. - for (int i = 0; i < head_inst->numDestRegs(); i++) - { - toIEW->commitInfo.freeRegs.push_back(head_inst->prevDestRegIdx(i)); - } -#endif - // Explicit communication back to the LDSTQ that a load has been committed // and can be removed from the LDSTQ. Stores don't need this because // the LDSTQ will already have been told that a store has reached the head @@ -436,6 +431,7 @@ SimpleCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) ++commitCommittedLoads; } } +#endif // Now that the instruction is going to be committed, finalize its // trace data. @@ -487,7 +483,7 @@ SimpleCommit::markCompletedInsts() // Grab completed insts out of the IEW instruction queue, and mark // instructions completed within the ROB. for (int inst_num = 0; - inst_num < iewWidth && fromIEW->insts[inst_num]; + inst_num < fromIEW->size && fromIEW->insts[inst_num]; ++inst_num) { DPRINTF(Commit, "Commit: Marking PC %#x, SN %i ready within ROB.\n", diff --git a/cpu/beta_cpu/cpu_policy.hh b/cpu/beta_cpu/cpu_policy.hh index 6606aba29..50d231609 100644 --- a/cpu/beta_cpu/cpu_policy.hh +++ b/cpu/beta_cpu/cpu_policy.hh @@ -34,7 +34,7 @@ struct SimpleCPUPolicy typedef SimpleFetch Fetch; typedef SimpleDecode Decode; typedef SimpleRename Rename; - typedef SimpleIEW IEW; + typedef SimpleIEW IEW; typedef SimpleCommit Commit; /** The struct for communication between fetch and decode. */ diff --git a/cpu/beta_cpu/decode.hh b/cpu/beta_cpu/decode.hh index dd18cf176..af2a5ee54 100644 --- a/cpu/beta_cpu/decode.hh +++ b/cpu/beta_cpu/decode.hh @@ -68,12 +68,16 @@ class SimpleDecode void squash(); private: + inline bool fetchInstsValid(); + void block(); inline void unblock(); void squash(DynInstPtr &inst); + void dumpFetchQueue(); + // Interfaces to objects outside of decode. /** CPU interface. */ FullCPU *cpu; diff --git a/cpu/beta_cpu/decode_impl.hh b/cpu/beta_cpu/decode_impl.hh index 9d88f94ac..43a4e8e95 100644 --- a/cpu/beta_cpu/decode_impl.hh +++ b/cpu/beta_cpu/decode_impl.hh @@ -98,6 +98,13 @@ SimpleDecode::setFetchQueue(TimeBuffer *fq_ptr) fromFetch = fetchQueue->getWire(-fetchToDecodeDelay); } +template +inline bool +SimpleDecode::fetchInstsValid() +{ + return fromFetch->size > 0; +} + template void SimpleDecode::block() @@ -156,14 +163,14 @@ SimpleDecode::squash(DynInstPtr &inst) // Set status to squashing. _status = Squashing; - // Maybe advance the time buffer? Not sure what to do in the normal - // case. - // Clear the skid buffer in case it has any data in it. - while (!skidBuffer.empty()) - { + while (!skidBuffer.empty()) { skidBuffer.pop(); } + + // Squash instructions up until this one + // Slightly unrealistic! + cpu->removeInstsUntil(inst->seqNum); } template @@ -205,7 +212,7 @@ SimpleDecode::tick() if (_status == Unblocking) { ++decodeUnblockCycles; - if (fromFetch->size > 0) { + if (fetchInstsValid()) { // Add the current inputs to the skid buffer so they can be // reprocessed when this stage unblocks. skidBuffer.push(*fromFetch); @@ -216,7 +223,7 @@ SimpleDecode::tick() } else if (_status == Blocked) { ++decodeBlockedCycles; - if (fromFetch->size > 0) { + if (fetchInstsValid()) { block(); } @@ -240,12 +247,12 @@ SimpleDecode::tick() squash(); } } else if (_status == Squashing) { - ++decodeSquashCycles; - if (!fromCommit->commitInfo.squash && !fromCommit->commitInfo.robSquashing) { _status = Running; } else if (fromCommit->commitInfo.squash) { + ++decodeSquashCycles; + squash(); } } @@ -264,8 +271,7 @@ SimpleDecode::decode() // Check time buffer if being told to stall. if (fromRename->renameInfo.stall || fromIEW->iewInfo.stall || - fromCommit->commitInfo.stall) - { + fromCommit->commitInfo.stall) { block(); return; } @@ -273,7 +279,7 @@ SimpleDecode::decode() // Check fetch queue to see if instructions are available. // If no available instructions, do nothing, unless this stage is // currently unblocking. - if (fromFetch->size == 0 && _status != Unblocking) { + if (!fetchInstsValid() && _status != Unblocking) { DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n"); // Should I change the status to idle? ++decodeIdleCycles; @@ -286,7 +292,7 @@ SimpleDecode::decode() unsigned to_rename_index = 0; int insts_available = _status == Unblocking ? - skidBuffer.front().size : + skidBuffer.front().size - numInst : fromFetch->size; // Debug block... @@ -308,8 +314,8 @@ SimpleDecode::decode() } #endif - while (insts_available > 0) - { + while (insts_available > 0) + { DPRINTF(Decode, "Decode: Sending instruction to rename.\n"); inst = _status == Unblocking ? skidBuffer.front().insts[numInst] : @@ -331,6 +337,16 @@ SimpleDecode::decode() continue; } + + // Also check if instructions have no source registers. Mark + // them as ready to issue at any time. Not sure if this check + // should exist here or at a later stage; however it doesn't matter + // too much for function correctness. + // Isn't this handled by the inst queue? + if (inst->numSrcRegs() == 0) { + inst->setCanIssue(); + } + // This current instruction is valid, so add it into the decode // queue. The next instruction may not be valid, so check to // see if branches were predicted correctly. @@ -369,16 +385,6 @@ SimpleDecode::decode() // addr (either the immediate, or the branch PC + 4) and redirect // fetch if it's incorrect. - - // Also check if instructions have no source registers. Mark - // them as ready to issue at any time. Not sure if this check - // should exist here or at a later stage; however it doesn't matter - // too much for function correctness. - // Isn't this handled by the inst queue? - if (inst->numSrcRegs() == 0) { - inst->setCanIssue(); - } - // Increment which instruction we're looking at. ++numInst; ++to_rename_index; diff --git a/cpu/beta_cpu/fetch.hh b/cpu/beta_cpu/fetch.hh index 7a3893708..da22baa9b 100644 --- a/cpu/beta_cpu/fetch.hh +++ b/cpu/beta_cpu/fetch.hh @@ -74,7 +74,6 @@ class SimpleFetch void processCacheCompletion(); -// private: // Figure out PC vs next PC and how it should be updated void squash(const Addr &new_PC); @@ -93,9 +92,6 @@ class SimpleFetch */ bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC); - // Might not want this function... -// inline void recordGlobalHist(DynInstPtr &inst); - /** * Fetches the cache line that contains fetch_PC. Returns any * fault that happened. Puts the data into the class variable @@ -184,9 +180,6 @@ class SimpleFetch /** Mask to get a cache block's address. */ Addr cacheBlkMask; - /** The instruction being fetched. */ -// MachInst inst; - /** The cache line being fetched. */ uint8_t *cacheData; diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh index 90caf9ffe..0ec4c63a3 100644 --- a/cpu/beta_cpu/fetch_impl.hh +++ b/cpu/beta_cpu/fetch_impl.hh @@ -44,6 +44,8 @@ SimpleFetch::SimpleFetch(Params ¶ms) commitToFetchDelay(params.commitToFetchDelay), fetchWidth(params.fetchWidth) { + DPRINTF(Fetch, "Fetch: Fetch constructor called\n"); + // Set status to idle. _status = Idle; @@ -52,7 +54,7 @@ SimpleFetch::SimpleFetch(Params ¶ms) // Not sure of this parameter. I think it should be based on the // thread number. #ifndef FULL_SYSTEM - memReq->asid = params.asid; + memReq->asid = 0; #else memReq->asid = 0; #endif // FULL_SYSTEM @@ -163,21 +165,10 @@ SimpleFetch::processCacheCompletion() // to return. // Can keep track of how many cache accesses go unused due to // misspeculation here. - // How to handle an outstanding miss which gets cancelled due to squash, - // then a new icache miss gets scheduled? if (_status == IcacheMissStall) _status = IcacheMissComplete; } -#if 0 -template -inline void -SimpleFetch::recordGlobalHist(DynInstPtr &inst) -{ - inst->setGlobalHist(branchPred.BPReadGlobalHist()); -} -#endif - template bool SimpleFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) @@ -311,7 +302,6 @@ SimpleFetch::squashFromDecode(const Addr &new_PC, // Tell the CPU to remove any instructions that are in flight between // fetch and decode. cpu->removeInstsUntil(seq_num); - } template @@ -428,7 +418,9 @@ SimpleFetch::tick() // Switch status to running _status = Running; - ++fetchSquashCycles; + ++fetchCycles; + + fetch(); } else if (_status != IcacheMissStall) { DPRINTF(Fetch, "Fetch: Running stage.\n"); diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc index 04c74393b..3cf5d4aaa 100644 --- a/cpu/beta_cpu/full_cpu.cc +++ b/cpu/beta_cpu/full_cpu.cc @@ -16,7 +16,7 @@ using namespace std; BaseFullCPU::BaseFullCPU(Params ¶ms) - : BaseCPU(¶ms) + : BaseCPU(¶ms), cpu_id(0) { } @@ -82,15 +82,14 @@ FullBetaCPU::FullBetaCPU(Params ¶ms) #ifdef FULL_SYSTEM system(params.system), - memCtrl(system->memCtrl), + memCtrl(system->memctrl), physmem(system->physmem), itb(params.itb), dtb(params.dtb), mem(params.mem), #else - process(params.process), - asid(params.asid), - mem(process->getMemory()), + // Hardcoded for a single thread!! + mem(params.workload[0]->getMemory()), #endif // FULL_SYSTEM icacheInterface(params.icacheInterface), @@ -100,20 +99,40 @@ FullBetaCPU::FullBetaCPU(Params ¶ms) funcExeInst(0) { _status = Idle; + +#ifndef FULL_SYSTEM + thread.resize(this->number_of_threads); +#endif + + for (int i = 0; i < this->number_of_threads; ++i) { #ifdef FULL_SYSTEM - xc = new ExecContext(this, 0, system, itb, dtb, mem); + assert(i == 0); + system->execContexts[i] = + new ExecContext(this, i, system, itb, dtb, mem); - // initialize CPU, including PC - TheISA::initCPU(&xc->regs); + // initialize CPU, including PC + TheISA::initCPU(&system->execContexts[i]->regs); + execContexts.push_back(system->execContexts[i]); #else - DPRINTF(FullCPU, "FullCPU: Process's starting PC is %#x, process is %#x", - process->prog_entry, process); - xc = new ExecContext(this, /* thread_num */ 0, process, /* asid */ 0); - - assert(process->getMemory() != NULL); - assert(mem != NULL); + if (i < params.workload.size()) { + DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, " + "process is %#x", + i, params.workload[i]->prog_entry, thread[i]); + thread[i] = new ExecContext(this, i, params.workload[i], i); + } + assert(params.workload[i]->getMemory() != NULL); + assert(mem != NULL); + execContexts.push_back(thread[i]); #endif // !FULL_SYSTEM - execContexts.push_back(xc); + } + + // Note that this is a hack so that my code which still uses xc-> will + // still work. I should remove this eventually +#ifdef FULL_SYSTEM + xc = system->execContexts[0]; +#else + xc = thread[0]; +#endif // The stages also need their CPU pointer setup. However this must be // done at the upper level CPU because they have pointers to the upper @@ -202,29 +221,33 @@ FullBetaCPU::init() // Need to do a copy of the xc->regs into the CPU's regfile so // that it can start properly. - +#ifdef FULL_SYSTEM + ExecContext *src_xc = system->execContexts[0]; +#else + ExecContext *src_xc = thread[0]; +#endif // First loop through the integer registers. for (int i = 0; i < Impl::ISA::NumIntRegs; ++i) { - regFile.intRegFile[i] = xc->regs.intRegFile[i]; + regFile.intRegFile[i] = src_xc->regs.intRegFile[i]; } // Then loop through the floating point registers. for (int i = 0; i < Impl::ISA::NumFloatRegs; ++i) { - regFile.floatRegFile[i].d = xc->regs.floatRegFile.d[i]; - regFile.floatRegFile[i].q = xc->regs.floatRegFile.q[i]; + regFile.floatRegFile[i].d = src_xc->regs.floatRegFile.d[i]; + regFile.floatRegFile[i].q = src_xc->regs.floatRegFile.q[i]; } // Then loop through the misc registers. - regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr; - regFile.miscRegs.uniq = xc->regs.miscRegs.uniq; - regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag; - regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr; + regFile.miscRegs.fpcr = src_xc->regs.miscRegs.fpcr; + regFile.miscRegs.uniq = src_xc->regs.miscRegs.uniq; + regFile.miscRegs.lock_flag = src_xc->regs.miscRegs.lock_flag; + regFile.miscRegs.lock_addr = src_xc->regs.miscRegs.lock_addr; // Then finally set the PC and the next PC. - regFile.pc = xc->regs.pc; - regFile.npc = xc->regs.npc; + regFile.pc = src_xc->regs.pc; + regFile.npc = src_xc->regs.npc; } } @@ -277,13 +300,13 @@ FullBetaCPU::takeOverFrom(BaseCPU *oldCPU) // Set all status's to active, schedule the // CPU's tick event. - tickEvent.schedule(curTick); for (int i = 0; i < execContexts.size(); ++i) { - execContexts[i]->activate(); + ExecContext *xc = execContexts[i]; + if (xc->status() == ExecContext::Active && _status != Running) { + _status = Running; + tickEvent.schedule(curTick); + } } - - // Switch out the other CPU. - oldCPU->switchOut(); } template @@ -463,6 +486,7 @@ FullBetaCPU::removeInstsUntil(const InstSeqNum &seq_num) inst_to_delete->seqNum, inst_to_delete->readPC()); // Remove the instruction from the list. + instList.back() = NULL; instList.pop_back(); // Mark it as squashed. diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh index 8ce32b7c7..85fc49371 100644 --- a/cpu/beta_cpu/full_cpu.hh +++ b/cpu/beta_cpu/full_cpu.hh @@ -5,11 +5,12 @@ //itself properly. Constructor. Derived alpha class. Threads! // Avoid running stages and advancing queues if idle/stalled. -#ifndef __SIMPLE_FULL_CPU_HH__ -#define __SIMPLE_FULL_CPU_HH__ +#ifndef __CPU_BETA_CPU_FULL_CPU_HH__ +#define __CPU_BETA_CPU_FULL_CPU_HH__ #include #include +#include #include "cpu/beta_cpu/comm.hh" @@ -20,6 +21,11 @@ #include "cpu/beta_cpu/cpu_policy.hh" #include "sim/process.hh" +#ifdef FULL_SYSTEM +#include "arch/alpha/ev5.hh" +using namespace EV5; +#endif + class FunctionalMemory; class Process; @@ -34,6 +40,9 @@ class BaseFullCPU : public BaseCPU #else BaseFullCPU(Params ¶ms); #endif // FULL_SYSTEM + + private: + int cpu_id; }; template @@ -41,6 +50,7 @@ class FullBetaCPU : public BaseFullCPU { public: //Put typedefs from the Impl here. + typedef typename Impl::ISA ISA; typedef typename Impl::CPUPol CPUPolicy; typedef typename Impl::Params Params; typedef typename Impl::DynInstPtr DynInstPtr; @@ -114,19 +124,21 @@ class FullBetaCPU : public BaseFullCPU bool validDataAddr(Addr addr) { return true; } /** Get instruction asid. */ - int getInstAsid() { return ITB_ASN_ASN(regs.ipr[ISA::IPR_ITB_ASN]); } + int getInstAsid() + { return ITB_ASN_ASN(regFile.getIpr()[ISA::IPR_ITB_ASN]); } /** Get data asid. */ - int getDataAsid() { return DTB_ASN_ASN(regs.ipr[ISA::IPR_DTB_ASN]); } + int getDataAsid() + { return DTB_ASN_ASN(regFile.getIpr()[ISA::IPR_DTB_ASN]); } #else bool validInstAddr(Addr addr) - { return process->validInstAddr(addr); } + { return thread[0]->validInstAddr(addr); } bool validDataAddr(Addr addr) - { return process->validDataAddr(addr); } + { return thread[0]->validDataAddr(addr); } - int getInstAsid() { return asid; } - int getDataAsid() { return asid; } + int getInstAsid() { return thread[0]->asid; } + int getDataAsid() { return thread[0]->asid; } #endif @@ -284,7 +296,14 @@ class FullBetaCPU : public BaseFullCPU ExecContext *xc; /** Temporary function to get pointer to exec context. */ - ExecContext *xcBase() { return xc; } + ExecContext *xcBase() + { +#ifdef FULL_SYSTEM + return system->execContexts[0]; +#else + return thread[0]; +#endif + } InstSeqNum globalSeqNum; @@ -299,12 +318,7 @@ class FullBetaCPU : public BaseFullCPU // SWContext *swCtx; #else - Process *process; - - // Address space ID. Note that this is used for TIMING cache - // simulation only; all functional memory accesses should use - // one of the FunctionalMemory pointers above. - short asid; + std::vector thread; #endif FunctionalMemory *mem; diff --git a/cpu/beta_cpu/iew.cc b/cpu/beta_cpu/iew.cc index a90d64434..626c4a90f 100644 --- a/cpu/beta_cpu/iew.cc +++ b/cpu/beta_cpu/iew.cc @@ -4,4 +4,4 @@ #include "cpu/beta_cpu/iew_impl.hh" #include "cpu/beta_cpu/inst_queue.hh" -template class SimpleIEW; +template class SimpleIEW; diff --git a/cpu/beta_cpu/iew.hh b/cpu/beta_cpu/iew.hh index e3e7c6db5..1e5eb2244 100644 --- a/cpu/beta_cpu/iew.hh +++ b/cpu/beta_cpu/iew.hh @@ -14,7 +14,7 @@ //Can IEW even stall? Space should be available/allocated already...maybe //if there's not enough write ports on the ROB or waiting for CDB //arbitration. -template +template class SimpleIEW { private: @@ -25,6 +25,7 @@ class SimpleIEW typedef typename Impl::FullCPU FullCPU; typedef typename Impl::Params Params; + typedef typename CPUPol::IQ IQ; typedef typename CPUPol::RenameMap RenameMap; typedef typename CPUPol::LDSTQ LDSTQ; @@ -33,6 +34,7 @@ class SimpleIEW typedef typename CPUPol::RenameStruct RenameStruct; typedef typename CPUPol::IssueStruct IssueStruct; + friend class Impl::FullCPU; public: enum Status { Running, @@ -49,15 +51,17 @@ class SimpleIEW Status _wbStatus; public: - void squash(); + class WritebackEvent : public Event { + private: + DynInstPtr inst; + SimpleIEW *iewStage; - void squashDueToBranch(DynInstPtr &inst); + public: + WritebackEvent(DynInstPtr &_inst, SimpleIEW *_iew); - void squashDueToMem(DynInstPtr &inst); - - void block(); - - inline void unblock(); + virtual void process(); + virtual const char *description(); + }; public: SimpleIEW(Params ¶ms); @@ -74,17 +78,30 @@ class SimpleIEW void setRenameMap(RenameMap *rm_ptr); + void squash(); + + void squashDueToBranch(DynInstPtr &inst); + + void squashDueToMem(DynInstPtr &inst); + + void block(); + + inline void unblock(); + void wakeDependents(DynInstPtr &inst); - void tick(); - - void iew(); + void instToCommit(DynInstPtr &inst); private: void dispatchInsts(); void executeInsts(); + public: + void tick(); + + void iew(); + //Interfaces to objects inside and outside of IEW. /** Time buffer interface. */ TimeBuffer *timeBuffer; @@ -121,11 +138,18 @@ class SimpleIEW /** Skid buffer between rename and IEW. */ std::queue skidBuffer; + protected: /** Instruction queue. */ IQ instQueue; LDSTQ ldstQueue; +#ifndef FULL_SYSTEM + public: + void lsqWriteback(); +#endif + + private: /** Pointer to rename map. Might not want this stage to directly * access this though... */ diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh index 1d072ab33..086d39320 100644 --- a/cpu/beta_cpu/iew_impl.hh +++ b/cpu/beta_cpu/iew_impl.hh @@ -12,8 +12,36 @@ #include "base/timebuf.hh" #include "cpu/beta_cpu/iew.hh" -template -SimpleIEW::SimpleIEW(Params ¶ms) +template +SimpleIEW::WritebackEvent::WritebackEvent(DynInstPtr &_inst, + SimpleIEW *_iew) + : Event(&mainEventQueue, CPU_Tick_Pri), inst(_inst), iewStage(_iew) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +SimpleIEW::WritebackEvent::process() +{ + DPRINTF(IEW, "IEW: WRITEBACK EVENT!!!!\n"); + + // Need to insert instruction into queue to commit + iewStage->instToCommit(inst); + // Need to execute second half of the instruction, do actual writing to + // registers and such + inst->execute(); +} + +template +const char * +SimpleIEW::WritebackEvent::description() +{ + return "LSQ writeback event"; +} + +template +SimpleIEW::SimpleIEW(Params ¶ms) : // Just make this time buffer really big for now issueToExecQueue(5, 5), instQueue(params), @@ -36,11 +64,13 @@ SimpleIEW::SimpleIEW(Params ¶ms) // Instruction queue needs the queue between issue and execute. instQueue.setIssueToExecuteQueue(&issueToExecQueue); + + ldstQueue.setIEW(this); } -template +template void -SimpleIEW::regStats() +SimpleIEW::regStats() { instQueue.regStats(); @@ -111,9 +141,9 @@ SimpleIEW::regStats() .desc("Number of branches that were predicted taken incorrectly"); } -template +template void -SimpleIEW::setCPU(FullCPU *cpu_ptr) +SimpleIEW::setCPU(FullCPU *cpu_ptr) { DPRINTF(IEW, "IEW: Setting CPU pointer.\n"); cpu = cpu_ptr; @@ -122,9 +152,9 @@ SimpleIEW::setCPU(FullCPU *cpu_ptr) ldstQueue.setCPU(cpu_ptr); } -template +template void -SimpleIEW::setTimeBuffer(TimeBuffer *tb_ptr) +SimpleIEW::setTimeBuffer(TimeBuffer *tb_ptr) { DPRINTF(IEW, "IEW: Setting time buffer pointer.\n"); timeBuffer = tb_ptr; @@ -139,9 +169,9 @@ SimpleIEW::setTimeBuffer(TimeBuffer *tb_ptr) instQueue.setTimeBuffer(tb_ptr); } -template +template void -SimpleIEW::setRenameQueue(TimeBuffer *rq_ptr) +SimpleIEW::setRenameQueue(TimeBuffer *rq_ptr) { DPRINTF(IEW, "IEW: Setting rename queue pointer.\n"); renameQueue = rq_ptr; @@ -150,9 +180,9 @@ SimpleIEW::setRenameQueue(TimeBuffer *rq_ptr) fromRename = renameQueue->getWire(-renameToIEWDelay); } -template +template void -SimpleIEW::setIEWQueue(TimeBuffer *iq_ptr) +SimpleIEW::setIEWQueue(TimeBuffer *iq_ptr) { DPRINTF(IEW, "IEW: Setting IEW queue pointer.\n"); iewQueue = iq_ptr; @@ -161,63 +191,17 @@ SimpleIEW::setIEWQueue(TimeBuffer *iq_ptr) toCommit = iewQueue->getWire(0); } -template +template void -SimpleIEW::setRenameMap(RenameMap *rm_ptr) +SimpleIEW::setRenameMap(RenameMap *rm_ptr) { DPRINTF(IEW, "IEW: Setting rename map pointer.\n"); renameMap = rm_ptr; } -template +template void -SimpleIEW::wakeDependents(DynInstPtr &inst) -{ - instQueue.wakeDependents(inst); -} - -template -void -SimpleIEW::block() -{ - DPRINTF(IEW, "IEW: Blocking.\n"); - // Set the status to Blocked. - _status = Blocked; - - // Add the current inputs to the skid buffer so they can be - // reprocessed when this stage unblocks. - skidBuffer.push(*fromRename); - - // Note that this stage only signals previous stages to stall when - // it is the cause of the stall originates at this stage. Otherwise - // the previous stages are expected to check all possible stall signals. -} - -template -inline void -SimpleIEW::unblock() -{ - // Check if there's information in the skid buffer. If there is, then - // set status to unblocking, otherwise set it directly to running. - DPRINTF(IEW, "IEW: Reading instructions out of the skid " - "buffer.\n"); - // Remove the now processed instructions from the skid buffer. - skidBuffer.pop(); - - // If there's still information in the skid buffer, then - // continue to tell previous stages to stall. They will be - // able to restart once the skid buffer is empty. - if (!skidBuffer.empty()) { - toRename->iewInfo.stall = true; - } else { - DPRINTF(IEW, "IEW: Stage is done unblocking.\n"); - _status = Running; - } -} - -template -void -SimpleIEW::squash() +SimpleIEW::squash() { DPRINTF(IEW, "IEW: Squashing all instructions.\n"); _status = Squashing; @@ -229,9 +213,9 @@ SimpleIEW::squash() ldstQueue.squash(fromCommit->commitInfo.doneSeqNum); } -template +template void -SimpleIEW::squashDueToBranch(DynInstPtr &inst) +SimpleIEW::squashDueToBranch(DynInstPtr &inst) { DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n", inst->PC); @@ -251,9 +235,9 @@ SimpleIEW::squashDueToBranch(DynInstPtr &inst) (inst->readPC() + sizeof(MachInst)); } -template +template void -SimpleIEW::squashDueToMem(DynInstPtr &inst) +SimpleIEW::squashDueToMem(DynInstPtr &inst) { DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n", inst->PC); @@ -268,9 +252,63 @@ SimpleIEW::squashDueToMem(DynInstPtr &inst) toCommit->nextPC = inst->readNextPC(); } -template +template void -SimpleIEW::dispatchInsts() +SimpleIEW::block() +{ + DPRINTF(IEW, "IEW: Blocking.\n"); + // Set the status to Blocked. + _status = Blocked; + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidBuffer.push(*fromRename); + + // Note that this stage only signals previous stages to stall when + // it is the cause of the stall originates at this stage. Otherwise + // the previous stages are expected to check all possible stall signals. +} + +template +inline void +SimpleIEW::unblock() +{ + // Check if there's information in the skid buffer. If there is, then + // set status to unblocking, otherwise set it directly to running. + DPRINTF(IEW, "IEW: Reading instructions out of the skid " + "buffer.\n"); + // Remove the now processed instructions from the skid buffer. + skidBuffer.pop(); + + // If there's still information in the skid buffer, then + // continue to tell previous stages to stall. They will be + // able to restart once the skid buffer is empty. + if (!skidBuffer.empty()) { + toRename->iewInfo.stall = true; + } else { + DPRINTF(IEW, "IEW: Stage is done unblocking.\n"); + _status = Running; + } +} + +template +void +SimpleIEW::wakeDependents(DynInstPtr &inst) +{ + instQueue.wakeDependents(inst); +} + + +template +void +SimpleIEW::instToCommit(DynInstPtr &inst) +{ + +} + +template +void +SimpleIEW::dispatchInsts() { //////////////////////////////////////// // DISPATCH/ISSUE stage @@ -329,14 +367,14 @@ SimpleIEW::dispatchInsts() // a signal to this stage to issue and execute that // store. Change to be a bit that says the instruction // has extra work to do at commit. - inst->setCanCommit(); +// inst->setCanCommit(); - instQueue.insertNonSpec(inst); +// instQueue.insertNonSpec(inst); ++iewDispStoreInsts; - ++iewDispNonSpecInsts; +// ++iewDispNonSpecInsts; - continue; +// continue; } else if (inst->isNonSpeculative()) { DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction " "encountered, skipping.\n"); @@ -385,9 +423,9 @@ SimpleIEW::dispatchInsts() } } -template +template void -SimpleIEW::executeInsts() +SimpleIEW::executeInsts() { //////////////////////////////////////// //EXECUTE/WRITEBACK stage @@ -403,6 +441,8 @@ SimpleIEW::executeInsts() int fu_usage = 0; bool fetch_redirect = false; + int inst_slot = 0; + int time_slot = 0; // Execute/writeback any instructions that are available. for (int inst_num = 0; @@ -452,7 +492,7 @@ SimpleIEW::executeInsts() ++iewExecLoadInsts; } else if (inst->isStore()) { - ldstQueue.executeStore(); + ldstQueue.executeStore(inst); ++iewExecStoreInsts; } else { @@ -473,9 +513,23 @@ SimpleIEW::executeInsts() // For now naively assume that all instructions take one cycle. // Otherwise would have to look into the time buffer based on the // latency of the instruction. + (*iewQueue)[time_slot].insts[inst_slot]; + while ((*iewQueue)[time_slot].insts[inst_slot]) { + if (inst_slot < issueWidth) { + ++inst_slot; + } else { + ++time_slot; + inst_slot = 0; + } + + assert(time_slot < 5); + } + + // May actually have to work this out, especially with loads and stores // Add finished instruction to queue to commit. - toCommit->insts[inst_num] = inst; + (*iewQueue)[time_slot].insts[inst_slot] = inst; + (*iewQueue)[time_slot].size++; // Check if branch was correct. This check happens after the // instruction is added to the queue because even if the branch @@ -518,9 +572,9 @@ SimpleIEW::executeInsts() } } -template +template void -SimpleIEW::tick() +SimpleIEW::tick() { // Considering putting all the state-determining stuff in this section. @@ -594,14 +648,20 @@ SimpleIEW::tick() // Write back number of free IQ entries here. toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries(); + ldstQueue.writebackStores(); + // Check the committed load/store signals to see if there's a load // or store to commit. Also check if it's being told to execute a // nonspeculative instruction. - if (fromCommit->commitInfo.commitIsStore) { + // This is pretty inefficient... +// if (0/*fromCommit->commitInfo.commitIsStore*/) { + if (!fromCommit->commitInfo.squash && + !fromCommit->commitInfo.robSquashing) { ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum); - } else if (fromCommit->commitInfo.commitIsLoad) { +// } else if (fromCommit->commitInfo.commitIsLoad) { ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum); } +// } if (fromCommit->commitInfo.nonSpecSeqNum != 0) { instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum); @@ -611,9 +671,9 @@ SimpleIEW::tick() instQueue.numFreeEntries()); } -template +template void -SimpleIEW::iew() +SimpleIEW::iew() { // Might want to put all state checks in the tick() function. // Check if being told to stall from commit. @@ -663,3 +723,12 @@ SimpleIEW::iew() // Not the best place for it, but this works (hopefully). issueToExecQueue.advance(); } + +#ifndef FULL_SYSTEM +template +void +SimpleIEW::lsqWriteback() +{ + ldstQueue.writebackAllInsts(); +} +#endif diff --git a/cpu/beta_cpu/inst_queue.hh b/cpu/beta_cpu/inst_queue.hh index 6fcce70a4..120e6b940 100644 --- a/cpu/beta_cpu/inst_queue.hh +++ b/cpu/beta_cpu/inst_queue.hh @@ -174,7 +174,7 @@ class InstructionQueue * once the IQ gets a signal from commit. While it's redundant to * have the key be a part of the value (the sequence number is stored * inside of DynInst), when these instructions are woken up only - * the sequence number will be available. Thus it is necessary to be + * the sequence number will be available. Thus it is most efficient to be * able to search by the sequence number alone. */ std::map nonSpecInsts; diff --git a/cpu/beta_cpu/inst_queue_impl.hh b/cpu/beta_cpu/inst_queue_impl.hh index c688181ed..d4e3939cf 100644 --- a/cpu/beta_cpu/inst_queue_impl.hh +++ b/cpu/beta_cpu/inst_queue_impl.hh @@ -31,8 +31,6 @@ InstructionQueue::InstructionQueue(Params ¶ms) numPhysFloatRegs(params.numPhysFloatRegs), commitToIEWDelay(params.commitToIEWDelay) { - DPRINTF(IQ, "IQ: Int width is %i.\n", params.executeIntWidth); - // Initialize the number of free IQ entries. freeEntries = numEntries; @@ -291,10 +289,6 @@ InstructionQueue::insertNonSpec(DynInstPtr &inst) // Decrease the number of free entries. --freeEntries; - // Look through its source registers (physical regs), and mark any - // dependencies. -// addToDependents(inst); - // Have this instruction set itself as the producer of its destination // register(s). createDependency(inst); @@ -568,15 +562,20 @@ InstructionQueue::scheduleReadyInsts() break; case Squashed: - issuing_inst = squashed_head_inst; +// issuing_inst = squashed_head_inst; + assert(0 && "Squashed insts should not issue any more!"); squashedInsts.pop(); + // Set the squashed instruction as able to commit so that commit + // can just drop it from the ROB. This is a bit faked. ++squashed_issued; + ++freeEntries; + DPRINTF(IQ, "IQ: Issuing squashed instruction PC %#x.\n", - issuing_inst->readPC()); + squashed_head_inst->readPC()); break; } - if (list_with_oldest != None) { + if (list_with_oldest != None && list_with_oldest != Squashed) { i2e_info->insts[total_issued] = issuing_inst; i2e_info->size++; @@ -641,8 +640,10 @@ InstructionQueue::squash() // Setup the squash iterator to point to the tail. squashIt = tail; - // Call doSquash. - doSquash(); + // Call doSquash if there are insts in the IQ + if (freeEntries != numEntries) { + doSquash(); + } // Also tell the memory dependence unit to squash. memDepUnit.squash(squashedSeqNum); @@ -672,12 +673,12 @@ InstructionQueue::doSquash() // Remove the instruction from the dependency list. // Hack for now: These below don't add themselves to the // dependency list, so don't try to remove them. - if (!squashed_inst->isNonSpeculative() && - !squashed_inst->isStore()) { - int8_t total_src_regs = squashed_inst->numSrcRegs(); + if (!squashed_inst->isNonSpeculative()/* && + !squashed_inst->isStore()*/ + ) { for (int src_reg_idx = 0; - src_reg_idx < total_src_regs; + src_reg_idx < squashed_inst->numSrcRegs(); src_reg_idx++) { PhysRegIndex src_reg = @@ -699,6 +700,8 @@ InstructionQueue::doSquash() // Might want to remove producers as well. } else { + nonSpecInsts[squashed_inst->seqNum] = NULL; + nonSpecInsts.erase(squashed_inst->seqNum); ++iqSquashedNonSpecRemoved; @@ -709,7 +712,11 @@ InstructionQueue::doSquash() // Mark it as squashed within the IQ. squashed_inst->setSquashedInIQ(); - squashedInsts.push(squashed_inst); +// squashedInsts.push(squashed_inst); + squashed_inst->setIssued(); + squashed_inst->setCanCommit(); + + ++freeEntries; DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n", squashed_inst->readPC()); @@ -718,6 +725,13 @@ InstructionQueue::doSquash() --squashIt; ++iqSquashedInstsExamined; } + + assert(freeEntries <= numEntries); + + if (freeEntries == numEntries) { + tail = cpu->instList.end(); + } + } template @@ -739,8 +753,6 @@ InstructionQueue::wakeDependents(DynInstPtr &completed_inst) //Look at the physical destination register of the DynInst //and look it up on the dependency graph. Then mark as ready //any instructions within the instruction queue. - int8_t total_dest_regs = completed_inst->numDestRegs(); - DependencyEntry *curr; // Tell the memory dependence unit to wake any dependents on this @@ -751,7 +763,7 @@ InstructionQueue::wakeDependents(DynInstPtr &completed_inst) } for (int dest_reg_idx = 0; - dest_reg_idx < total_dest_regs; + dest_reg_idx < completed_inst->numDestRegs(); dest_reg_idx++) { PhysRegIndex dest_reg = @@ -759,7 +771,7 @@ InstructionQueue::wakeDependents(DynInstPtr &completed_inst) // Special case of uniq or control registers. They are not // handled by the IQ and thus have no dependency graph entry. - // @todo Figure out a cleaner way to handle thie. + // @todo Figure out a cleaner way to handle this. if (dest_reg >= numPhysRegs) { continue; } @@ -789,6 +801,8 @@ InstructionQueue::wakeDependents(DynInstPtr &completed_inst) DependencyEntry::mem_alloc_counter--; + curr->inst = NULL; + delete curr; } @@ -874,7 +888,10 @@ InstructionQueue::createDependency(DynInstPtr &new_inst) dependGraph[dest_reg].inst = new_inst; - assert(!dependGraph[dest_reg].next); + if (dependGraph[dest_reg].next) { + dumpDependGraph(); + panic("IQ: Dependency graph not empty!"); + } // Mark the scoreboard to say it's not yet ready. regScoreboard[dest_reg] = false; @@ -929,36 +946,12 @@ InstructionQueue::DependencyEntry::remove(DynInstPtr &inst_to_remove) --mem_alloc_counter; + // Could push this off to the destructor of DependencyEntry + curr->inst = NULL; + delete curr; } -template -void -InstructionQueue::dumpDependGraph() -{ - DependencyEntry *curr; - - for (int i = 0; i < numPhysRegs; ++i) - { - curr = &dependGraph[i]; - - if (curr->inst) { - cprintf("dependGraph[%i]: producer: %#x consumer: ", i, - curr->inst->readPC()); - } else { - cprintf("dependGraph[%i]: No producer. consumer: ", i); - } - - while (curr->next != NULL) { - curr = curr->next; - - cprintf("%#x ", curr->inst->readPC()); - } - - cprintf("\n"); - } -} - template void InstructionQueue::addIfReady(DynInstPtr &inst) @@ -1024,6 +1017,12 @@ InstructionQueue::addIfReady(DynInstPtr &inst) } } +/* + * Caution, this function must not be called prior to tail being updated at + * least once, otherwise it will fail the assertion. This is because + * instList.begin() actually changes upon the insertion of an element into the + * list when the list is empty. + */ template int InstructionQueue::countInsts() @@ -1031,6 +1030,9 @@ InstructionQueue::countInsts() ListIt count_it = cpu->instList.begin(); int total_insts = 0; + if (tail == cpu->instList.end()) + return 0; + while (count_it != tail) { if (!(*count_it)->isIssued()) { ++total_insts; @@ -1051,6 +1053,33 @@ InstructionQueue::countInsts() return total_insts; } +template +void +InstructionQueue::dumpDependGraph() +{ + DependencyEntry *curr; + + for (int i = 0; i < numPhysRegs; ++i) + { + curr = &dependGraph[i]; + + if (curr->inst) { + cprintf("dependGraph[%i]: producer: %#x consumer: ", i, + curr->inst->readPC()); + } else { + cprintf("dependGraph[%i]: No producer. consumer: ", i); + } + + while (curr->next != NULL) { + curr = curr->next; + + cprintf("%#x ", curr->inst->readPC()); + } + + cprintf("\n"); + } +} + template void InstructionQueue::dumpLists() diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh index a81ed63bc..c9d1b092f 100644 --- a/cpu/beta_cpu/regfile.hh +++ b/cpu/beta_cpu/regfile.hh @@ -1,18 +1,26 @@ -#ifndef __REGFILE_HH__ -#define __REGFILE_HH__ +#ifndef __CPU_BETA_CPU_REGFILE_HH__ +#define __CPU_BETA_CPU_REGFILE_HH__ // @todo: Destructor #include "arch/alpha/isa_traits.hh" +#include "base/trace.hh" #include "cpu/beta_cpu/comm.hh" -#include "base/trace.hh" +#ifdef FULL_SYSTEM +#include "kern/kernel_stats.hh" +#include "arch/alpha/ev5.hh" + +using namespace EV5; +#endif // This really only depends on the ISA, and not the Impl. It might be nicer // to see if I can make it depend on nothing... // Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA, // and should go in the AlphaFullCPU. +extern void debug_break(); + template class PhysRegFile { @@ -27,6 +35,7 @@ class PhysRegFile //be private eventually with some accessor functions. public: typedef typename Impl::ISA ISA; + typedef typename Impl::FullCPU FullCPU; PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs); @@ -177,6 +186,7 @@ class PhysRegFile #ifdef FULL_SYSTEM uint64_t readIpr(int idx, Fault &fault); Fault setIpr(int idx, uint64_t val); + InternalProcReg *getIpr() { return ipr; } int readIntrFlag() { return intrflag; } void setIntrFlag(int val) { intrflag = val; } #endif @@ -196,7 +206,21 @@ class PhysRegFile Addr pc; // program counter Addr npc; // next-cycle program counter +#ifdef FULL_SYSTEM private: + // This is ISA specifc stuff; remove it eventually once ISAImpl is used + IntReg palregs[NumIntRegs]; // PAL shadow registers + InternalProcReg ipr[NumInternalProcRegs]; // internal processor regs + int intrflag; // interrupt flag + bool pal_shadow; // using pal_shadow registers +#endif + + private: + FullCPU *cpu; + + public: + void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; } + unsigned numPhysicalIntRegs; unsigned numPhysicalFloatRegs; }; @@ -269,46 +293,42 @@ PhysRegFile::readIpr(int idx, Fault &fault) case ISA::IPR_IPLR: case ISA::IPR_INTID: case ISA::IPR_PMCTR: - // no side-effect - retval = ipr[idx]; - break; + // no side-effect + retval = ipr[idx]; + break; case ISA::IPR_CC: - retval |= ipr[idx] & ULL(0xffffffff00000000); - retval |= curTick & ULL(0x00000000ffffffff); - break; + retval |= ipr[idx] & ULL(0xffffffff00000000); + retval |= curTick & ULL(0x00000000ffffffff); + break; case ISA::IPR_VA: - // SFX: unlocks interrupt status registers - retval = ipr[idx]; - - if (!misspeculating()) - regs.intrlock = false; - break; + retval = ipr[idx]; + break; case ISA::IPR_VA_FORM: case ISA::IPR_MM_STAT: case ISA::IPR_IFAULT_VA_FORM: case ISA::IPR_EXC_MASK: case ISA::IPR_EXC_SUM: - retval = ipr[idx]; - break; + retval = ipr[idx]; + break; case ISA::IPR_DTB_PTE: - { - ISA::PTE &pte = dtb->index(!misspeculating()); + { + typename ISA::PTE &pte = cpu->dtb->index(1); - retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32; - retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8; - retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12; - retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1; - retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2; - retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4; - retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57; - } - break; + retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32; + retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8; + retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12; + retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1; + retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2; + retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4; + retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57; + } + break; - // write only registers + // write only registers case ISA::IPR_HWINT_CLR: case ISA::IPR_SL_XMIT: case ISA::IPR_DC_FLUSH: @@ -318,22 +338,19 @@ PhysRegFile::readIpr(int idx, Fault &fault) case ISA::IPR_DTB_IAP: case ISA::IPR_ITB_IA: case ISA::IPR_ITB_IAP: - fault = Unimplemented_Opcode_Fault; - break; + fault = Unimplemented_Opcode_Fault; + break; default: - // invalid IPR - fault = Unimplemented_Opcode_Fault; - break; + // invalid IPR + fault = Unimplemented_Opcode_Fault; + break; } return retval; } -#ifdef DEBUG -// Cause the simulator to break when changing to the following IPL -int break_ipl = -1; -#endif +extern int break_ipl; template Fault @@ -341,9 +358,6 @@ PhysRegFile::setIpr(int idx, uint64_t val) { uint64_t old; - if (misspeculating()) - return No_Fault; - switch (idx) { case ISA::IPR_PALtemp0: case ISA::IPR_PALtemp1: @@ -372,222 +386,225 @@ PhysRegFile::setIpr(int idx, uint64_t val) case ISA::IPR_IC_PERR_STAT: case ISA::IPR_DC_PERR_STAT: case ISA::IPR_PMCTR: - // write entire quad w/ no side-effect - ipr[idx] = val; - break; + // write entire quad w/ no side-effect + ipr[idx] = val; + break; case ISA::IPR_CC_CTL: - // This IPR resets the cycle counter. We assume this only - // happens once... let's verify that. - assert(ipr[idx] == 0); - ipr[idx] = 1; - break; + // This IPR resets the cycle counter. We assume this only + // happens once... let's verify that. + assert(ipr[idx] == 0); + ipr[idx] = 1; + break; case ISA::IPR_CC: - // This IPR only writes the upper 64 bits. It's ok to write - // all 64 here since we mask out the lower 32 in rpcc (see - // isa_desc). - ipr[idx] = val; - break; + // This IPR only writes the upper 64 bits. It's ok to write + // all 64 here since we mask out the lower 32 in rpcc (see + // isa_desc). + ipr[idx] = val; + break; case ISA::IPR_PALtemp23: - // write entire quad w/ no side-effect - old = ipr[idx]; - ipr[idx] = val; - kernelStats.context(old, val); - break; + // write entire quad w/ no side-effect + old = ipr[idx]; + ipr[idx] = val; +// kernelStats.context(old, val); + break; case ISA::IPR_DTB_PTE: - // write entire quad w/ no side-effect, tag is forthcoming - ipr[idx] = val; - break; + // write entire quad w/ no side-effect, tag is forthcoming + ipr[idx] = val; + break; case ISA::IPR_EXC_ADDR: - // second least significant bit in PC is always zero - ipr[idx] = val & ~2; - break; + // second least significant bit in PC is always zero + ipr[idx] = val & ~2; + break; case ISA::IPR_ASTRR: case ISA::IPR_ASTER: - // only write least significant four bits - privilege mask - ipr[idx] = val & 0xf; - break; + // only write least significant four bits - privilege mask + ipr[idx] = val & 0xf; + break; case ISA::IPR_IPLR: #ifdef DEBUG - if (break_ipl != -1 && break_ipl == (val & 0x1f)) - debug_break(); + if (break_ipl != -1 && break_ipl == (val & 0x1f)) + debug_break(); #endif - // only write least significant five bits - interrupt level - ipr[idx] = val & 0x1f; - kernelStats.swpipl(ipr[idx]); - break; + // only write least significant five bits - interrupt level + ipr[idx] = val & 0x1f; +// kernelStats.swpipl(ipr[idx]); + break; case ISA::IPR_DTB_CM: - kernelStats.mode((val & 0x18) != 0); +// if (val & 0x18) +// kernelStats->mode(Kernel::user); +// else +// kernelStats->mode(Kernel::kernel); case ISA::IPR_ICM: - // only write two mode bits - processor mode - ipr[idx] = val & 0x18; - break; + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; case ISA::IPR_ALT_MODE: - // only write two mode bits - processor mode - ipr[idx] = val & 0x18; - break; + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; case ISA::IPR_MCSR: - // more here after optimization... - ipr[idx] = val; - break; + // more here after optimization... + ipr[idx] = val; + break; case ISA::IPR_SIRR: - // only write software interrupt mask - ipr[idx] = val & 0x7fff0; - break; + // only write software interrupt mask + ipr[idx] = val & 0x7fff0; + break; case ISA::IPR_ICSR: - ipr[idx] = val & ULL(0xffffff0300); - break; + ipr[idx] = val & ULL(0xffffff0300); + break; case ISA::IPR_IVPTBR: case ISA::IPR_MVPTBR: - ipr[idx] = val & ULL(0xffffffffc0000000); - break; + ipr[idx] = val & ULL(0xffffffffc0000000); + break; case ISA::IPR_DC_TEST_CTL: - ipr[idx] = val & 0x1ffb; - break; + ipr[idx] = val & 0x1ffb; + break; case ISA::IPR_DC_MODE: case ISA::IPR_MAF_MODE: - ipr[idx] = val & 0x3f; - break; + ipr[idx] = val & 0x3f; + break; case ISA::IPR_ITB_ASN: - ipr[idx] = val & 0x7f0; - break; + ipr[idx] = val & 0x7f0; + break; case ISA::IPR_DTB_ASN: - ipr[idx] = val & ULL(0xfe00000000000000); - break; + ipr[idx] = val & ULL(0xfe00000000000000); + break; case ISA::IPR_EXC_SUM: case ISA::IPR_EXC_MASK: - // any write to this register clears it - ipr[idx] = 0; - break; + // any write to this register clears it + ipr[idx] = 0; + break; case ISA::IPR_INTID: case ISA::IPR_SL_RCV: case ISA::IPR_MM_STAT: case ISA::IPR_ITB_PTE_TEMP: case ISA::IPR_DTB_PTE_TEMP: - // read-only registers - return Unimplemented_Opcode_Fault; + // read-only registers + return Unimplemented_Opcode_Fault; case ISA::IPR_HWINT_CLR: case ISA::IPR_SL_XMIT: case ISA::IPR_DC_FLUSH: case ISA::IPR_IC_FLUSH: - // the following are write only - ipr[idx] = val; - break; + // the following are write only + ipr[idx] = val; + break; case ISA::IPR_DTB_IA: - // really a control write - ipr[idx] = 0; + // really a control write + ipr[idx] = 0; - dtb->flushAll(); - break; + cpu->dtb->flushAll(); + break; case ISA::IPR_DTB_IAP: - // really a control write - ipr[idx] = 0; + // really a control write + ipr[idx] = 0; - dtb->flushProcesses(); - break; + cpu->dtb->flushProcesses(); + break; case ISA::IPR_DTB_IS: - // really a control write - ipr[idx] = val; + // really a control write + ipr[idx] = val; - dtb->flushAddr(val, DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN])); - break; + cpu->dtb->flushAddr(val, DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN])); + break; case ISA::IPR_DTB_TAG: { - struct ISA::PTE pte; + struct ISA::PTE pte; - // FIXME: granularity hints NYI... - if (DTB_PTE_GH(ipr[ISA::IPR_DTB_PTE]) != 0) - panic("PTE GH field != 0"); + // FIXME: granularity hints NYI... + if (DTB_PTE_GH(ipr[ISA::IPR_DTB_PTE]) != 0) + panic("PTE GH field != 0"); - // write entire quad - ipr[idx] = val; + // write entire quad + ipr[idx] = val; - // construct PTE for new entry - pte.ppn = DTB_PTE_PPN(ipr[ISA::IPR_DTB_PTE]); - pte.xre = DTB_PTE_XRE(ipr[ISA::IPR_DTB_PTE]); - pte.xwe = DTB_PTE_XWE(ipr[ISA::IPR_DTB_PTE]); - pte.fonr = DTB_PTE_FONR(ipr[ISA::IPR_DTB_PTE]); - pte.fonw = DTB_PTE_FONW(ipr[ISA::IPR_DTB_PTE]); - pte.asma = DTB_PTE_ASMA(ipr[ISA::IPR_DTB_PTE]); - pte.asn = DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]); + // construct PTE for new entry + pte.ppn = DTB_PTE_PPN(ipr[ISA::IPR_DTB_PTE]); + pte.xre = DTB_PTE_XRE(ipr[ISA::IPR_DTB_PTE]); + pte.xwe = DTB_PTE_XWE(ipr[ISA::IPR_DTB_PTE]); + pte.fonr = DTB_PTE_FONR(ipr[ISA::IPR_DTB_PTE]); + pte.fonw = DTB_PTE_FONW(ipr[ISA::IPR_DTB_PTE]); + pte.asma = DTB_PTE_ASMA(ipr[ISA::IPR_DTB_PTE]); + pte.asn = DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]); - // insert new TAG/PTE value into data TLB - dtb->insert(val, pte); + // insert new TAG/PTE value into data TLB + cpu->dtb->insert(val, pte); } - break; + break; case ISA::IPR_ITB_PTE: { - struct ISA::PTE pte; + struct ISA::PTE pte; - // FIXME: granularity hints NYI... - if (ITB_PTE_GH(val) != 0) - panic("PTE GH field != 0"); + // FIXME: granularity hints NYI... + if (ITB_PTE_GH(val) != 0) + panic("PTE GH field != 0"); - // write entire quad - ipr[idx] = val; + // write entire quad + ipr[idx] = val; - // construct PTE for new entry - pte.ppn = ITB_PTE_PPN(val); - pte.xre = ITB_PTE_XRE(val); - pte.xwe = 0; - pte.fonr = ITB_PTE_FONR(val); - pte.fonw = ITB_PTE_FONW(val); - pte.asma = ITB_PTE_ASMA(val); - pte.asn = ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]); + // construct PTE for new entry + pte.ppn = ITB_PTE_PPN(val); + pte.xre = ITB_PTE_XRE(val); + pte.xwe = 0; + pte.fonr = ITB_PTE_FONR(val); + pte.fonw = ITB_PTE_FONW(val); + pte.asma = ITB_PTE_ASMA(val); + pte.asn = ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]); - // insert new TAG/PTE value into data TLB - itb->insert(ipr[ISA::IPR_ITB_TAG], pte); + // insert new TAG/PTE value into data TLB + cpu->itb->insert(ipr[ISA::IPR_ITB_TAG], pte); } - break; + break; case ISA::IPR_ITB_IA: - // really a control write - ipr[idx] = 0; + // really a control write + ipr[idx] = 0; - itb->flushAll(); - break; + cpu->itb->flushAll(); + break; case ISA::IPR_ITB_IAP: - // really a control write - ipr[idx] = 0; + // really a control write + ipr[idx] = 0; - itb->flushProcesses(); - break; + cpu->itb->flushProcesses(); + break; case ISA::IPR_ITB_IS: - // really a control write - ipr[idx] = val; + // really a control write + ipr[idx] = val; - itb->flushAddr(val, ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN])); - break; + cpu->itb->flushAddr(val, ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN])); + break; default: - // invalid IPR - return Unimplemented_Opcode_Fault; + // invalid IPR + return Unimplemented_Opcode_Fault; } // no error... @@ -596,4 +613,4 @@ PhysRegFile::setIpr(int idx, uint64_t val) #endif // #ifdef FULL_SYSTEM -#endif // __REGFILE_HH__ +#endif // __CPU_BETA_CPU_REGFILE_HH__ diff --git a/cpu/beta_cpu/rob.hh b/cpu/beta_cpu/rob.hh index da6b5232a..3e08def74 100644 --- a/cpu/beta_cpu/rob.hh +++ b/cpu/beta_cpu/rob.hh @@ -10,8 +10,6 @@ #include #include -//#include "arch/alpha/isa_traits.hh" - /** * ROB class. Uses the instruction list that exists within the CPU to * represent the ROB. This class doesn't contain that list, but instead diff --git a/cpu/beta_cpu/rob_impl.hh b/cpu/beta_cpu/rob_impl.hh index 86c4e2db1..52d51028e 100644 --- a/cpu/beta_cpu/rob_impl.hh +++ b/cpu/beta_cpu/rob_impl.hh @@ -1,5 +1,5 @@ -#ifndef __ROB_IMPL_HH__ -#define __ROB_IMPL_HH__ +#ifndef __CPU_BETA_CPU_ROB_IMPL_HH__ +#define __CPU_BETA_CPU_ROB_IMPL_HH__ #include "cpu/beta_cpu/rob.hh" @@ -107,10 +107,8 @@ ROB::retireHead() assert(numInstsInROB == countInsts()); assert(numInstsInROB > 0); - DynInstPtr head_inst; - // Get the head ROB instruction. - head_inst = cpu->instList.front(); + DynInstPtr head_inst = cpu->instList.front(); // Make certain this can retire. assert(head_inst->readyToCommit()); @@ -126,11 +124,10 @@ ROB::retireHead() // A special case is needed if the instruction being retired is the // only instruction in the ROB; otherwise the tail iterator will become // invalidated. - if (tail == cpu->instList.begin()) { - cpu->removeFrontInst(head_inst); + cpu->removeFrontInst(head_inst); + + if (numInstsInROB == 0) { tail = cpu->instList.end(); - } else { - cpu->removeFrontInst(head_inst); } } @@ -283,4 +280,4 @@ ROB::readTailSeqNum() return (*tail)->seqNum; } -#endif // __ROB_IMPL_HH__ +#endif // __CPU_BETA_CPU_ROB_IMPL_HH__ diff --git a/cpu/ooo_cpu/ooo_cpu.hh b/cpu/ooo_cpu/ooo_cpu.hh index 25fdb39b6..ddbc3b061 100644 --- a/cpu/ooo_cpu/ooo_cpu.hh +++ b/cpu/ooo_cpu/ooo_cpu.hh @@ -122,7 +122,7 @@ class OoOCPU : public BaseCPU enum Status { Running, Idle, - IcacheMissStall, + IcacheMiss, IcacheMissComplete, DcacheMissStall, SwitchedOut @@ -161,6 +161,8 @@ class OoOCPU : public BaseCPU virtual ~OoOCPU(); + void init(); + private: void copyFromXC(); @@ -203,14 +205,21 @@ class OoOCPU : public BaseCPU // Will need to create a cache completion event upon any memory miss. ICacheCompletionEvent iCacheCompletionEvent; + class DCacheCompletionEvent; + + typedef typename + std::list::iterator DCacheCompEventIt; + class DCacheCompletionEvent : public Event { private: OoOCPU *cpu; DynInstPtr inst; + DCacheCompEventIt dcceIt; public: - DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst); + DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst, + DCacheCompEventIt &_dcceIt); virtual void process(); virtual const char *description(); @@ -218,6 +227,11 @@ class OoOCPU : public BaseCPU friend class DCacheCompletionEvent; + protected: + std::list dCacheCompList; + DCacheCompEventIt dcceIt; + + private: Status status() const { return _status; } virtual void activateContext(int thread_num, int delay); @@ -260,6 +274,8 @@ class OoOCPU : public BaseCPU void processICacheCompletion(); + public: + virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); @@ -350,7 +366,7 @@ class OoOCPU : public BaseCPU void commitHeadInst(); - bool grabInst(); + bool getOneInst(); Fault fetchCacheLine(); @@ -471,6 +487,7 @@ class OoOCPU : public BaseCPU // ROB tracking stuff. DynInstPtr robHeadPtr; DynInstPtr robTailPtr; + unsigned robSize; unsigned robInsts; // List of outstanding EA instructions. @@ -545,10 +562,8 @@ OoOCPU::read(Addr addr, T &data, unsigned flags, DynInstPtr inst) /*MemAccessResult result = */dcacheInterface->access(readReq); if (dcacheInterface->doEvents()) { - readReq->completionEvent = new DCacheCompletionEvent(this, inst); - lastDcacheStall = curTick; - unscheduleTickEvent(); - _status = DcacheMissStall; + readReq->completionEvent = new DCacheCompletionEvent(this, inst, + dcceIt); } } @@ -579,7 +594,7 @@ OoOCPU::write(T data, Addr addr, unsigned flags, writeReq->reset(addr, sizeof(T), flags); // translate to physical address - Fault fault = xc->translateDataWriteReq(writeReq); + Fault fault = translateDataWriteReq(writeReq); // do functional access if (fault == No_Fault) @@ -593,10 +608,8 @@ OoOCPU::write(T data, Addr addr, unsigned flags, /*MemAccessResult result = */dcacheInterface->access(writeReq); if (dcacheInterface->doEvents()) { - writeReq->completionEvent = new DCacheCompletionEvent(this, inst); - lastDcacheStall = curTick; - unscheduleTickEvent(); - _status = DcacheMissStall; + writeReq->completionEvent = new DCacheCompletionEvent(this, inst, + dcceIt); } } diff --git a/cpu/static_inst.hh b/cpu/static_inst.hh index 3ac88fd3d..4bbe8b636 100644 --- a/cpu/static_inst.hh +++ b/cpu/static_inst.hh @@ -41,16 +41,12 @@ // forward declarations struct AlphaSimpleImpl; -struct OoOImpl; class ExecContext; class DynInst; template class AlphaDynInst; -template -class OoODynInst; - class FastCPU; class SimpleCPU; class InorderCPU; @@ -260,7 +256,7 @@ class StaticInst : public StaticInstBase * obtain the dependence info (numSrcRegs and srcRegIdx[]) for * just the EA computation. */ - virtual + virtual const StaticInstPtr &eaCompInst() const { return nullStaticInstPtr; } /** @@ -269,7 +265,7 @@ class StaticInst : public StaticInstBase * obtain the dependence info (numSrcRegs and srcRegIdx[]) for * just the memory access (not the EA computation). */ - virtual + virtual const StaticInstPtr &memAccInst() const { return nullStaticInstPtr; } /// The binary machine instruction. diff --git a/kern/kernel_stats.hh b/kern/kernel_stats.hh index af93eb95c..66e9911b5 100644 --- a/kern/kernel_stats.hh +++ b/kern/kernel_stats.hh @@ -41,6 +41,9 @@ class BaseCPU; class ExecContext; class FnEvent; +// What does kernel stats expect is included? +class StaticInstBase; +class System; enum Fault; namespace Kernel { diff --git a/python/m5/objects/AlphaFullCPU.mpy b/python/m5/objects/AlphaFullCPU.mpy new file mode 100644 index 000000000..bf3f2d718 --- /dev/null +++ b/python/m5/objects/AlphaFullCPU.mpy @@ -0,0 +1,79 @@ +from BaseCPU import BaseCPU + +simobj DerivAlphaFullCPU(BaseCPU): + type = 'DerivAlphaFullCPU' + + numThreads = Param.Unsigned("number of HW thread contexts") + + if not build_env['FULL_SYSTEM']: + mem = Param.FunctionalMemory(NULL, "memory") + + decodeToFetchDelay = Param.Unsigned("Decode to fetch delay") + renameToFetchDelay = Param.Unsigned("Rename to fetch delay") + iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch " + "delay") + commitToFetchDelay = Param.Unsigned("Commit to fetch delay") + fetchWidth = Param.Unsigned("Fetch width") + + renameToDecodeDelay = Param.Unsigned("Rename to decode delay") + iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode " + "delay") + commitToDecodeDelay = Param.Unsigned("Commit to decode delay") + fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay") + decodeWidth = Param.Unsigned("Decode width") + + iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename " + "delay") + commitToRenameDelay = Param.Unsigned("Commit to rename delay") + decodeToRenameDelay = Param.Unsigned("Decode to rename delay") + renameWidth = Param.Unsigned("Rename width") + + commitToIEWDelay = Param.Unsigned("Commit to " + "Issue/Execute/Writeback delay") + renameToIEWDelay = Param.Unsigned("Rename to " + "Issue/Execute/Writeback delay") + issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal " + "to the IEW stage)") + issueWidth = Param.Unsigned("Issue width") + executeWidth = Param.Unsigned("Execute width") + executeIntWidth = Param.Unsigned("Integer execute width") + executeFloatWidth = Param.Unsigned("Floating point execute width") + executeBranchWidth = Param.Unsigned("Branch execute width") + executeMemoryWidth = Param.Unsigned("Memory execute width") + + iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit " + "delay") + renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay") + commitWidth = Param.Unsigned("Commit width") + squashWidth = Param.Unsigned("Squash width") + + local_predictor_size = Param.Unsigned("Size of local predictor") + local_ctr_bits = Param.Unsigned("Bits per counter") + local_history_table_size = Param.Unsigned("Size of local history table") + local_history_bits = Param.Unsigned("Bits for the local history") + global_predictor_size = Param.Unsigned("Size of global predictor") + global_ctr_bits = Param.Unsigned("Bits per counter") + global_history_bits = Param.Unsigned("Bits of history") + choice_predictor_size = Param.Unsigned("Size of choice predictor") + choice_ctr_bits = Param.Unsigned("Bits of choice counters") + + BTBEntries = Param.Unsigned("Number of BTB entries") + BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits") + + RASSize = Param.Unsigned("RAS size") + + LQEntries = Param.Unsigned("Number of load queue entries") + SQEntries = Param.Unsigned("Number of store queue entries") + LFSTSize = Param.Unsigned("Last fetched store table size") + SSITSize = Param.Unsigned("Store set ID table size") + + numPhysIntRegs = Param.Unsigned("Number of physical integer registers") + numPhysFloatRegs = Param.Unsigned("Number of physical floating point " + "registers") + numIQEntries = Param.Unsigned("Number of instruction queue entries") + numROBEntries = Param.Unsigned("Number of reorder buffer entries") + + instShiftAmt = Param.Unsigned("Number of bits to shift instructions by") + + function_trace = Param.Bool(False, "Enable function trace") + function_trace_start = Param.Tick(0, "Cycle to start function trace")