diff --git a/src/arch/alpha/isa/mem.isa b/src/arch/alpha/isa/mem.isa index 08a0a2343..a5dda7fc6 100644 --- a/src/arch/alpha/isa/mem.isa +++ b/src/arch/alpha/isa/mem.isa @@ -668,7 +668,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + completeAccTemplate.subst(completeacc_iop)) }}; - def format LoadOrNop(memacc_code, ea_code = {{ EA = Rb + disp; }}, mem_flags = [], inst_flags = []) {{ (header_output, decoder_output, decode_block, exec_output) = \ diff --git a/src/arch/mips/faults.cc b/src/arch/mips/faults.cc index cfeb045eb..2a8ab1df5 100644 --- a/src/arch/mips/faults.cc +++ b/src/arch/mips/faults.cc @@ -25,13 +25,15 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Korey Sewell + * Authors: Gabe Black + * Korey Sewell */ #include "arch/mips/faults.hh" #include "cpu/thread_context.hh" #include "cpu/base.hh" #include "base/trace.hh" + #if !FULL_SYSTEM #include "sim/process.hh" #include "mem/page_table.hh" @@ -110,35 +112,6 @@ FaultName IntegerOverflowFault::_name = "intover"; FaultVect IntegerOverflowFault::_vect = 0x0501; FaultStat IntegerOverflowFault::_count; -#if FULL_SYSTEM - -void MipsFault::invoke(ThreadContext * tc) -{ - FaultBase::invoke(tc); - countStat()++; - - // exception restart address - if (setRestartAddress() || !tc->inPalMode()) - tc->setMiscReg(MipsISA::IPR_EXC_ADDR, tc->readPC()); - - if (skipFaultingInstruction()) { - // traps... skip faulting instruction. - tc->setMiscReg(MipsISA::IPR_EXC_ADDR, - tc->readMiscReg(MipsISA::IPR_EXC_ADDR) + 4); - } - - tc->setPC(tc->readMiscReg(MipsISA::IPR_PAL_BASE) + vect()); - tc->setNextPC(tc->readPC() + sizeof(MachInst)); -} - -void ArithmeticFault::invoke(ThreadContext * tc) -{ - FaultBase::invoke(tc); - panic("Arithmetic traps are unimplemented!"); -} - -#else //!FULL_SYSTEM - void PageTableFault::invoke(ThreadContext *tc) { Process *p = tc->getProcessPtr(); @@ -159,6 +132,5 @@ void PageTableFault::invoke(ThreadContext *tc) } } -#endif } // namespace MipsISA diff --git a/src/arch/mips/faults.hh b/src/arch/mips/faults.hh index 95c61cfbc..9d2c5df32 100644 --- a/src/arch/mips/faults.hh +++ b/src/arch/mips/faults.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Korey Sewell + * Authors: Gabe Black + * Korey Sewell */ #ifndef __MIPS_FAULTS_HH__ diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa index 9ac982e34..13f6f9712 100644 --- a/src/arch/mips/isa/decoder.isa +++ b/src/arch/mips/isa/decoder.isa @@ -1089,7 +1089,7 @@ decode OPCODE_HI default Unknown::unknown() { 0x0: StoreCond::sc({{ Mem.uw = Rt.uw;}}, {{ uint64_t tmp = write_result; Rt.uw = (tmp == 0 || tmp == 1) ? tmp : Rt.uw; - }}, mem_flags=LOCKED); + }}, mem_flags=LOCKED, inst_flags = IsStoreConditional); format StoreMemory { 0x1: swc1({{ Mem.uw = Ft.uw; }}); diff --git a/src/arch/mips/isa/formats/basic.isa b/src/arch/mips/isa/formats/basic.isa index 29dafd541..29a445b2c 100644 --- a/src/arch/mips/isa/formats/basic.isa +++ b/src/arch/mips/isa/formats/basic.isa @@ -26,7 +26,8 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Authors: Korey Sewell +// Authors: Steve Reinhardt +// Korey Sewell // Declarations for execute() methods. def template BasicExecDeclare {{ @@ -85,7 +86,7 @@ def template BasicDecodeWithMnemonic {{ return new %(class_name)s("%(mnemonic)s", machInst); }}; -// The most basic instruction format... used only for a few misc. insts +// The most basic instruction format... def format BasicOp(code, *flags) {{ iop = InstObjParams(name, Name, 'MipsStaticInst', CodeBlock(code), flags) header_output = BasicDeclare.subst(iop) diff --git a/src/arch/mips/isa/formats/branch.isa b/src/arch/mips/isa/formats/branch.isa index 5230ce9cc..9db3108b4 100644 --- a/src/arch/mips/isa/formats/branch.isa +++ b/src/arch/mips/isa/formats/branch.isa @@ -235,10 +235,11 @@ def format Branch(code,*opt_flags) {{ else: inst_flags += (x, ) + #Take into account uncond. branch instruction if 'cond == 1' in code: - inst_flags += ('IsCondControl', ) + inst_flags += ('IsUnCondControl', ) else: - inst_flags += ('IsUncondControl', ) + inst_flags += ('IsCondControl', ) #Condition code code = 'bool cond;\n' + code diff --git a/src/arch/mips/isa/formats/mem.isa b/src/arch/mips/isa/formats/mem.isa index f03f7becd..d6b0c2938 100644 --- a/src/arch/mips/isa/formats/mem.isa +++ b/src/arch/mips/isa/formats/mem.isa @@ -26,7 +26,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Authors: Gabe Black +// Authors: Steve Reinhardt // Korey Sewell //////////////////////////////////////////////////////////////////// @@ -162,7 +162,7 @@ def template InitiateAccDeclare {{ def template CompleteAccDeclare {{ - Fault completeAcc(uint8_t *, %(CPU_exec_context)s *, Trace::InstRecord *) const; + Fault completeAcc(Packet *, %(CPU_exec_context)s *, Trace::InstRecord *) const; }}; @@ -288,7 +288,7 @@ def template LoadInitiateAcc {{ def template LoadCompleteAcc {{ - Fault %(class_name)s::completeAcc(uint8_t *data, + Fault %(class_name)s::completeAcc(Packet *pkt, %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const { @@ -297,7 +297,7 @@ def template LoadCompleteAcc {{ %(fp_enable_check)s; %(op_decl)s; - memcpy(&Mem, data, sizeof(Mem)); + Mem = pkt->get(); if (fault == NoFault) { %(memacc_code)s; @@ -390,7 +390,6 @@ def template StoreInitiateAcc {{ { Addr EA; Fault fault = NoFault; - uint64_t write_result = 0; %(fp_enable_check)s; %(op_decl)s; @@ -403,7 +402,7 @@ def template StoreInitiateAcc {{ if (fault == NoFault) { fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA, - memAccessFlags, &write_result); + memAccessFlags, NULL); if (traceData) { traceData->setData(Mem); } } @@ -413,17 +412,38 @@ def template StoreInitiateAcc {{ def template StoreCompleteAcc {{ - Fault %(class_name)s::completeAcc(uint8_t *data, + Fault %(class_name)s::completeAcc(Packet *pkt, %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const { Fault fault = NoFault; - uint64_t write_result = 0; %(fp_enable_check)s; %(op_dest_decl)s; - memcpy(&write_result, data, sizeof(write_result)); + if (fault == NoFault) { + %(postacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template StoreCondCompleteAcc {{ + Fault %(class_name)s::completeAcc(Packet *pkt, + %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(fp_enable_check)s; + %(op_dest_decl)s; + + uint64_t write_result = pkt->req->getScResult(); if (fault == NoFault) { %(postacc_code)s; @@ -489,7 +509,7 @@ def template MiscInitiateAcc {{ def template MiscCompleteAcc {{ - Fault %(class_name)s::completeAcc(uint8_t *data, + Fault %(class_name)s::completeAcc(Packet *pkt, %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const { @@ -580,5 +600,5 @@ def format StoreCond(memacc_code, postacc_code, mem_flags = [], inst_flags = []) {{ (header_output, decoder_output, decode_block, exec_output) = \ LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, - postacc_code, exec_template_base = 'Store') + postacc_code, exec_template_base = 'StoreCond') }}; diff --git a/src/arch/mips/isa/formats/util.isa b/src/arch/mips/isa/formats/util.isa index 0cc375af3..9f25c7725 100644 --- a/src/arch/mips/isa/formats/util.isa +++ b/src/arch/mips/isa/formats/util.isa @@ -65,7 +65,7 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, if (exec_template_base == 'Load'): initiateacc_cblk = CodeBlock(ea_code + memacc_code) completeacc_cblk = CodeBlock(memacc_code + postacc_code) - elif (exec_template_base == 'Store'): + elif (exec_template_base.startswith('Store')): initiateacc_cblk = CodeBlock(ea_code + memacc_code) completeacc_cblk = CodeBlock(postacc_code) else: @@ -83,7 +83,7 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, initiateacc_iop.memacc_code = memacc_cblk.code completeacc_iop.memacc_code = memacc_cblk.code completeacc_iop.postacc_code = postacc_cblk.code - elif (exec_template_base == 'Store'): + elif (exec_template_base.startswith('Store')): initiateacc_iop.ea_code = ea_cblk.code initiateacc_iop.memacc_code = memacc_cblk.code completeacc_iop.postacc_code = postacc_cblk.code @@ -104,6 +104,13 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, memacc_iop.constructor += s # select templates + + # define aliases... most StoreCond templates are the same as the + # corresponding Store templates (only CompleteAcc is different). + StoreCondMemAccExecute = StoreMemAccExecute + StoreCondExecute = StoreExecute + StoreCondInitiateAcc = StoreInitiateAcc + memAccExecTemplate = eval(exec_template_base + 'MemAccExecute') fullExecTemplate = eval(exec_template_base + 'Execute') initiateAccTemplate = eval(exec_template_base + 'InitiateAcc') @@ -118,7 +125,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + initiateAccTemplate.subst(initiateacc_iop) + completeAccTemplate.subst(completeacc_iop)) }}; - output header {{ std::string inst2string(MachInst machInst); }}; diff --git a/src/arch/mips/isa_traits.cc b/src/arch/mips/isa_traits.cc index a8b41270e..1ec4bde3e 100644 --- a/src/arch/mips/isa_traits.cc +++ b/src/arch/mips/isa_traits.cc @@ -73,9 +73,9 @@ void RegFile::serialize(std::ostream &os) { intRegFile.serialize(os); - //SERIALIZE_ARRAY(floatRegFile.q, NumFloatRegs); + //SERIALIZE_ARRAY(floatRegFile, NumFloatRegs); + //SERIALZE_ARRAY(miscRegFile); //SERIALIZE_SCALAR(miscRegs.fpcr); - //SERIALIZE_SCALAR(miscRegs.uniq); //SERIALIZE_SCALAR(miscRegs.lock_flag); //SERIALIZE_SCALAR(miscRegs.lock_addr); SERIALIZE_SCALAR(pc); @@ -88,9 +88,9 @@ void RegFile::unserialize(Checkpoint *cp, const std::string §ion) { intRegFile.unserialize(cp, section); - //UNSERIALIZE_ARRAY(floatRegFile.q, NumFloatRegs); + //UNSERIALIZE_ARRAY(floatRegFile); + //UNSERIALZE_ARRAY(miscRegFile); //UNSERIALIZE_SCALAR(miscRegs.fpcr); - //UNSERIALIZE_SCALAR(miscRegs.uniq); //UNSERIALIZE_SCALAR(miscRegs.lock_flag); //UNSERIALIZE_SCALAR(miscRegs.lock_addr); UNSERIALIZE_SCALAR(pc); diff --git a/src/arch/mips/process.cc b/src/arch/mips/process.cc index cb847fe04..031c2030e 100644 --- a/src/arch/mips/process.cc +++ b/src/arch/mips/process.cc @@ -41,8 +41,6 @@ using namespace std; using namespace MipsISA; -Addr MipsLiveProcess::stack_start = 0x7FFFFFFF; - MipsLiveProcess::MipsLiveProcess(const std::string &nm, ObjectFile *objFile, System *_system, int stdin_fd, int stdout_fd, int stderr_fd, std::vector &argv, std::vector &envp) @@ -51,11 +49,10 @@ MipsLiveProcess::MipsLiveProcess(const std::string &nm, ObjectFile *objFile, { // Set up stack. On MIPS, stack starts at the top of kuseg // user address space. MIPS stack grows down from here - stack_base = stack_start; + stack_base = 0x7FFFFFFF; // Set pointer for next thread stack. Reserve 8M for main stack. next_thread_stack_base = stack_base - (8 * 1024 * 1024); - stack_start = next_thread_stack_base; // Set up break point (Top of Heap) brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize(); diff --git a/src/arch/mips/process.hh b/src/arch/mips/process.hh index 4baee134b..400591599 100644 --- a/src/arch/mips/process.hh +++ b/src/arch/mips/process.hh @@ -51,8 +51,6 @@ class MipsLiveProcess : public LiveProcess void startup(); - - static Addr stack_start; }; diff --git a/src/arch/mips/regfile/misc_regfile.hh b/src/arch/mips/regfile/misc_regfile.hh index 87961f97e..67aef9c63 100644 --- a/src/arch/mips/regfile/misc_regfile.hh +++ b/src/arch/mips/regfile/misc_regfile.hh @@ -45,12 +45,25 @@ namespace MipsISA protected: uint64_t fpcr; // floating point condition codes + // FPCR is not used in MIPS. Condition + // codes are kept as part of the FloatRegFile + bool lock_flag; // lock flag for LL/SC + // use LL reg. in the future + Addr lock_addr; // lock address for LL/SC + // use LLAddr reg. in the future MiscReg miscRegFile[NumMiscRegs]; public: + void clear() + { + fpcr = 0; + lock_flag = 0; + lock_addr = 0; + } + void copyMiscRegs(ThreadContext *tc); MiscReg readReg(int misc_reg) diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 9cc61f74c..40611abe6 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -215,6 +215,9 @@ class BaseDynInst : public FastAlloc, public RefCounted */ Addr nextPC; + /** Next non-speculative NPC. Target PC for Mips or Sparc. */ + Addr nextNPC; + /** Predicted next PC. */ Addr predPC; @@ -275,6 +278,11 @@ class BaseDynInst : public FastAlloc, public RefCounted */ Addr readNextPC() { return nextPC; } + /** Returns the next NPC. This could be the speculative next NPC if it is + * called prior to the actual branch target being calculated. + */ + Addr readNextNPC() { return nextNPC; } + /** Set the predicted target of this current instruction. */ void setPredTarg(Addr predicted_PC) { predPC = predicted_PC; } @@ -282,11 +290,20 @@ class BaseDynInst : public FastAlloc, public RefCounted Addr readPredTarg() { return predPC; } /** Returns whether the instruction was predicted taken or not. */ - bool predTaken() { return predPC != (PC + sizeof(MachInst)); } + bool predTaken() +#if THE_ISA == ALPHA_ISA + { return predPC != (PC + sizeof(MachInst)); } +#else + { return predPC != (nextPC + sizeof(MachInst)); } +#endif /** Returns whether the instruction mispredicted. */ - bool mispredicted() { return predPC != nextPC; } - + bool mispredicted() +#if THE_ISA == ALPHA_ISA + { return predPC != nextPC; } +#else + { return predPC != nextNPC; } +#endif // // Instruction types. Forward checks to StaticInst object. // @@ -308,6 +325,7 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isIndirectCtrl() const { return staticInst->isIndirectCtrl(); } bool isCondCtrl() const { return staticInst->isCondCtrl(); } bool isUncondCtrl() const { return staticInst->isUncondCtrl(); } + bool isCondDelaySlot() const { return staticInst->isCondDelaySlot(); } bool isThreadSync() const { return staticInst->isThreadSync(); } bool isSerializing() const { return staticInst->isSerializing(); } bool isSerializeBefore() const @@ -545,6 +563,12 @@ class BaseDynInst : public FastAlloc, public RefCounted nextPC = val; } + /** Set the next NPC of this instruction (the target in Mips or Sparc).*/ + void setNextNPC(uint64_t val) + { + nextNPC = val; + } + /** Sets the ASID. */ void setASID(short addr_space_id) { asid = addr_space_id; } diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index 91424faad..a344990b4 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -74,6 +74,7 @@ BaseDynInst::BaseDynInst(ExtMachInst machInst, Addr inst_PC, PC = inst_PC; nextPC = PC + sizeof(MachInst); + nextNPC = nextPC + sizeof(MachInst); predPC = pred_PC; initVars(); diff --git a/src/cpu/o3/SConscript b/src/cpu/o3/SConscript index e65d41411..44882e5ec 100755 --- a/src/cpu/o3/SConscript +++ b/src/cpu/o3/SConscript @@ -52,15 +52,14 @@ if env['TARGET_ISA'] == 'alpha': alpha/cpu_builder.cc ''') elif env['TARGET_ISA'] == 'mips': - sys.exit('O3 CPU does not support MIPS') - #sources += Split(''' - # mips/dyn_inst.cc - # mips/cpu.cc - # mips/thread_context.cc - # mips/cpu_builder.cc - # ''') + sources += Split(''' + mips/dyn_inst.cc + mips/cpu.cc + mips/thread_context.cc + mips/cpu_builder.cc + ''') elif env['TARGET_ISA'] == 'sparc': - sys.exit('O3 CPU does not support MIPS') + sys.exit('O3 CPU does not support Sparc') #sources += Split(''' # sparc/dyn_inst.cc # sparc/cpu.cc diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh index ad52b0d2e..773f08ecc 100644 --- a/src/cpu/o3/alpha/thread_context.hh +++ b/src/cpu/o3/alpha/thread_context.hh @@ -26,7 +26,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim - * Korey Sewell */ #include "cpu/o3/thread_context.hh" diff --git a/src/cpu/o3/bpred_unit_impl.hh b/src/cpu/o3/bpred_unit_impl.hh index 0da02145b..8dad42011 100644 --- a/src/cpu/o3/bpred_unit_impl.hh +++ b/src/cpu/o3/bpred_unit_impl.hh @@ -159,7 +159,7 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) void *bp_history = NULL; if (inst->isUncondCtrl()) { - DPRINTF(Fetch, "BranchPred: [tid:%i] Unconditional control.\n", tid); + DPRINTF(Fetch, "BranchPred: [tid:%i]: Unconditional control.\n", tid); pred_taken = true; // Tell the BP there was an unconditional branch. BPUncond(bp_history); @@ -201,15 +201,20 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) ++BTBLookups; if (inst->isCall()) { - RAS[tid].push(PC + sizeof(MachInst)); +#if THE_ISA == ALPHA_ISA + Addr ras_pc = PC + sizeof(MachInst); // Next PC +#else + Addr ras_pc = PC + (2 * sizeof(MachInst)); // Next Next PC +#endif + RAS[tid].push(ras_pc); // Record that it was a call so that the top RAS entry can // be popped off if the speculation is incorrect. predict_record.wasCall = true; - DPRINTF(Fetch, "BranchPred: [tid:%i] Instruction %#x was a call" + DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x was a call" ", adding %#x to the RAS.\n", - tid, inst->readPC(), PC + sizeof(MachInst)); + tid, inst->readPC(), ras_pc); } if (BTB.valid(PC, tid)) { @@ -242,7 +247,7 @@ BPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) predHist[tid].push_front(predict_record); - DPRINTF(Fetch, "[tid:%i] predHist.size(): %i\n", tid, predHist[tid].size()); + DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid, predHist[tid].size()); return pred_taken; } @@ -251,8 +256,8 @@ template void BPredUnit::update(const InstSeqNum &done_sn, unsigned tid) { - DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until sequence" - "number %lli.\n", tid, done_sn); + DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until " + "[sn:%lli].\n", tid, done_sn); while (!predHist[tid].empty() && predHist[tid].back().seqNum <= done_sn) { diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index bf1bd08e8..e311910cf 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -88,6 +88,7 @@ struct DefaultIEWDefaultCommit { bool squash[Impl::MaxThreads]; bool branchMispredict[Impl::MaxThreads]; bool branchTaken[Impl::MaxThreads]; + bool condDelaySlotBranch[Impl::MaxThreads]; uint64_t mispredPC[Impl::MaxThreads]; uint64_t nextPC[Impl::MaxThreads]; InstSeqNum squashedSeqNum[Impl::MaxThreads]; @@ -113,6 +114,7 @@ struct TimeBufStruct { uint64_t branchAddr; InstSeqNum doneSeqNum; + InstSeqNum bdelayDoneSeqNum; // @todo: Might want to package this kind of branch stuff into a single // struct as it is used pretty frequently. @@ -165,6 +167,9 @@ struct TimeBufStruct { // retired or squashed sequence number. InstSeqNum doneSeqNum; + InstSeqNum bdelayDoneSeqNum; + bool squashDelaySlot; + //Just in case we want to do a commit/squash on a cycle //(necessary for multiple ROBs?) bool commitInsts; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 956b6ec3e..cb83012f7 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -286,13 +286,11 @@ class DefaultCommit /** Sets the next PC of a specific thread. */ void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } -#if THE_ISA != ALPHA_ISA /** Reads the next NPC of a specific thread. */ - uint64_t readNextPC(unsigned tid) { return nextNPC[tid]; } + uint64_t readNextNPC(unsigned tid) { return nextNPC[tid]; } /** Sets the next NPC of a specific thread. */ - void setNextPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } -#endif + void setNextNPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } private: /** Time buffer interface. */ @@ -397,10 +395,8 @@ class DefaultCommit /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; -#if THE_ISA != ALPHA_ISA /** The next NPC of each thread. */ Addr nextNPC[Impl::MaxThreads]; -#endif /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index c667d633a..8a8035e73 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -120,7 +120,7 @@ DefaultCommit::DefaultCommit(Params *params) changedROBNumEntries[i] = false; trapSquash[i] = false; tcSquash[i] = false; - PC[i] = nextPC[i] = 0; + PC[i] = nextPC[i] = nextNPC[i] = 0; } } @@ -723,14 +723,48 @@ DefaultCommit::commit() // then use one older sequence number. InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid]; - if (fromIEW->includeSquashInst[tid] == true) - squashed_inst--; +#if THE_ISA != ALPHA_ISA + InstSeqNum bdelay_done_seq_num; + bool squash_bdelay_slot; + if (fromIEW->branchMispredict[tid]) { + if (fromIEW->branchTaken[tid] && + fromIEW->condDelaySlotBranch[tid]) { + DPRINTF(Commit, "[tid:%i]: Cond. delay slot branch" + "mispredicted as taken. Squashing after previous " + "inst, [sn:%i]\n", + tid, squashed_inst); + bdelay_done_seq_num = squashed_inst; + squash_bdelay_slot = true; + } else { + DPRINTF(Commit, "[tid:%i]: Branch Mispredict. Squashing " + "after delay slot [sn:%i]\n", tid, squashed_inst+1); + bdelay_done_seq_num = squashed_inst + 1; + squash_bdelay_slot = false; + } + } else { + bdelay_done_seq_num = squashed_inst; + } +#endif + + if (fromIEW->includeSquashInst[tid] == true) { + squashed_inst--; +#if THE_ISA != ALPHA_ISA + bdelay_done_seq_num--; +#endif + } // All younger instructions will be squashed. Set the sequence // number as the youngest instruction in the ROB. youngestSeqNum[tid] = squashed_inst; +#if THE_ISA == ALPHA_ISA rob->squash(squashed_inst, tid); + toIEW->commitInfo[tid].squashDelaySlot = true; +#else + rob->squash(bdelay_done_seq_num, tid); + toIEW->commitInfo[tid].squashDelaySlot = squash_bdelay_slot; + toIEW->commitInfo[tid].bdelayDoneSeqNum = bdelay_done_seq_num; +#endif changedROBNumEntries[tid] = true; toIEW->commitInfo[tid].doneSeqNum = squashed_inst; @@ -840,6 +874,7 @@ DefaultCommit::commitInsts() } else { PC[tid] = head_inst->readPC(); nextPC[tid] = head_inst->readNextPC(); + nextNPC[tid] = head_inst->readNextNPC(); // Increment the total number of non-speculative instructions // executed. @@ -868,7 +903,13 @@ DefaultCommit::commitInsts() } PC[tid] = nextPC[tid]; +#if THE_ISA == ALPHA_ISA nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst); +#else + nextPC[tid] = nextNPC[tid]; + nextNPC[tid] = nextNPC[tid] + sizeof(TheISA::MachInst); +#endif + #if FULL_SYSTEM int count = 0; Addr oldpc; @@ -1069,6 +1110,8 @@ template void DefaultCommit::getInsts() { + DPRINTF(Commit, "Getting instructions from Rename stage.\n"); + // Read any renamed instructions and place them into the ROB. int insts_to_process = min((int)renameWidth, fromRename->size); diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index c43cc2cf8..af032132e 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -667,11 +667,12 @@ FullO3CPU::removeThread(unsigned tid) } // Squash Throughout Pipeline - fetch.squash(0,tid); + InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum; + fetch.squash(0, squash_seq_num, true, tid); decode.squash(tid); - rename.squash(tid); + rename.squash(squash_seq_num, tid); iew.squash(tid); - commit.rob->squash(commit.rob->readHeadInst(tid)->seqNum, tid); + commit.rob->squash(squash_seq_num, tid); assert(iew.ldstQueue.getCount(tid) == 0); @@ -1101,7 +1102,6 @@ FullO3CPU::setNextPC(uint64_t val,unsigned tid) commit.setNextPC(val, tid); } -#if THE_ISA != ALPHA_ISA template uint64_t FullO3CPU::readNextNPC(unsigned tid) @@ -1111,11 +1111,10 @@ FullO3CPU::readNextNPC(unsigned tid) template void -FullO3CPU::setNextNNPC(uint64_t val,unsigned tid) +FullO3CPU::setNextNPC(uint64_t val,unsigned tid) { commit.setNextNPC(val, tid); } -#endif template typename FullO3CPU::ListIt @@ -1165,7 +1164,9 @@ FullO3CPU::removeFrontInst(DynInstPtr &inst) template void -FullO3CPU::removeInstsNotInROB(unsigned tid) +FullO3CPU::removeInstsNotInROB(unsigned tid, + bool squash_delay_slot, + const InstSeqNum &delay_slot_seq_num) { DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" " list.\n", tid); @@ -1196,6 +1197,12 @@ FullO3CPU::removeInstsNotInROB(unsigned tid) while (inst_it != end_it) { assert(!instList.empty()); +#if THE_ISA != ALPHA_ISA + if(!squash_delay_slot && + delay_slot_seq_num >= (*inst_it)->seqNum) { + break; + } +#endif squashInstIt(inst_it, tid); inst_it--; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 83cb966e3..8c4f663ef 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -449,8 +449,10 @@ class FullO3CPU : public BaseO3CPU */ void removeFrontInst(DynInstPtr &inst); - /** Remove all instructions that are not currently in the ROB. */ - void removeInstsNotInROB(unsigned tid); + /** Remove all instructions that are not currently in the ROB. + * There's also an option to not squash delay slot instructions.*/ + void removeInstsNotInROB(unsigned tid, bool squash_delay_slot, + const InstSeqNum &delay_slot_seq_num); /** Remove all instructions younger than the given sequence number. */ void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid); diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index 7f5ecbc26..4a845e670 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -276,6 +276,19 @@ class DefaultDecode /** Maximum size of the skid buffer. */ unsigned skidBufferMax; + /** SeqNum of Squashing Branch Delay Instruction (used for MIPS)*/ + Addr bdelayDoneSeqNum[Impl::MaxThreads]; + + /** Instruction used for squashing branch (used for MIPS)*/ + DynInstPtr squashInst[Impl::MaxThreads]; + + /** Tells when their is a pending delay slot inst. to send + * to rename. If there is, then wait squash after the next + * instruction (used for MIPS). + */ + bool squashAfterDelaySlot[Impl::MaxThreads]; + + /** Stat for total number of idle cycles. */ Stats::Scalar<> decodeIdleCycles; /** Stat for total number of blocked cycles. */ diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 8b851c032..0bc6a109f 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -50,6 +50,8 @@ DefaultDecode::DefaultDecode(Params *params) stalls[i].rename = false; stalls[i].iew = false; stalls[i].commit = false; + + squashAfterDelaySlot[i] = false; } // @todo: Make into a parameter @@ -278,13 +280,25 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) // Send back mispredict information. toFetch->decodeInfo[tid].branchMispredict = true; - toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].predIncorrect = true; + toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; toFetch->decodeInfo[tid].squash = true; toFetch->decodeInfo[tid].nextPC = inst->branchTarget(); +#if THE_ISA == ALPHA_ISA toFetch->decodeInfo[tid].branchTaken = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); + InstSeqNum squash_seq_num = inst->seqNum; +#else + toFetch->decodeInfo[tid].branchTaken = inst->readNextNPC() != + (inst->readNextPC() + sizeof(TheISA::MachInst)); + + toFetch->decodeInfo[tid].bdelayDoneSeqNum = bdelayDoneSeqNum[tid]; + squashAfterDelaySlot[tid] = false; + + InstSeqNum squash_seq_num = bdelayDoneSeqNum[tid]; +#endif + // Might have to tell fetch to unblock. if (decodeStatus[tid] == Blocked || decodeStatus[tid] == Unblocking) { @@ -296,7 +310,7 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) for (int i=0; isize; i++) { if (fromFetch->insts[i]->threadNumber == tid && - fromFetch->insts[i]->seqNum > inst->seqNum) { + fromFetch->insts[i]->seqNum > squash_seq_num) { fromFetch->insts[i]->setSquashed(); } } @@ -304,15 +318,35 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) // Clear the instruction list and skid buffer in case they have any // insts in them. while (!insts[tid].empty()) { + +#if THE_ISA != ALPHA_ISA + if (insts[tid].front()->seqNum <= squash_seq_num) { + DPRINTF(Decode, "[tid:%i]: Cannot remove incoming decode " + "instructions before delay slot [sn:%i]. %i insts" + "left in decode.\n", tid, squash_seq_num, + insts[tid].size()); + break; + } +#endif insts[tid].pop(); } while (!skidBuffer[tid].empty()) { + +#if THE_ISA != ALPHA_ISA + if (skidBuffer[tid].front()->seqNum <= squash_seq_num) { + DPRINTF(Decode, "[tid:%i]: Cannot remove skidBuffer " + "instructions before delay slot [sn:%i]. %i insts" + "left in decode.\n", tid, squash_seq_num, + insts[tid].size()); + break; + } +#endif skidBuffer[tid].pop(); } // Squash instructions up until this one - cpu->removeInstsUntil(inst->seqNum, tid); + cpu->removeInstsUntil(squash_seq_num, tid); } template @@ -611,7 +645,7 @@ DefaultDecode::decode(bool &status_change, unsigned tid) // will allow, as long as it is not currently blocked. if (decodeStatus[tid] == Running || decodeStatus[tid] == Idle) { - DPRINTF(Decode, "[tid:%u] Not blocked, so attempting to run " + DPRINTF(Decode, "[tid:%u]: Not blocked, so attempting to run " "stage.\n",tid); decodeInsts(tid); @@ -710,6 +744,9 @@ DefaultDecode::decodeInsts(unsigned tid) // Ensure that if it was predicted as a branch, it really is a // branch. if (inst->predTaken() && !inst->isControl()) { + DPRINTF(Decode, "PredPC : %#x != NextPC: %#x\n",inst->predPC, + inst->nextPC + 4); + panic("Instruction predicted as a branch!"); ++decodeControlMispred; @@ -730,12 +767,43 @@ DefaultDecode::decodeInsts(unsigned tid) // Might want to set some sort of boolean and just do // a check at the end +#if THE_ISA == ALPHA_ISA squash(inst, inst->threadNumber); inst->setPredTarg(inst->branchTarget()); - break; +#else + // If mispredicted as taken, then ignore delay slot + // instruction... else keep delay slot and squash + // after it is sent to rename + if (inst->predTaken() && inst->isCondDelaySlot()) { + DPRINTF(Decode, "[tid:%i]: Conditional delay slot inst." + "[sn:%i] PC %#x mispredicted as taken.\n", tid, + inst->seqNum, inst->PC); + bdelayDoneSeqNum[tid] = inst->seqNum; + squash(inst, inst->threadNumber); + inst->setPredTarg(inst->branchTarget()); + break; + } else { + DPRINTF(Decode, "[tid:%i]: Misprediction detected at " + "[sn:%i] PC %#x, will squash after delay slot " + "inst. is sent to Rename\n", + tid, inst->seqNum, inst->PC); + bdelayDoneSeqNum[tid] = inst->seqNum + 1; + squashAfterDelaySlot[tid] = true; + squashInst[tid] = inst; + continue; + } +#endif } } + + if (squashAfterDelaySlot[tid]) { + assert(!inst->isSquashed()); + squash(squashInst[tid], squashInst[tid]->threadNumber); + squashInst[tid]->setPredTarg(squashInst[tid]->branchTarget()); + assert(!inst->isSquashed()); + break; + } } // If we didn't process all instructions, then we will need to block diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index a2cdf2dba..5f7caf79f 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -34,12 +34,15 @@ #include "arch/isa_specific.hh" #if THE_ISA == ALPHA_ISA -template -class AlphaDynInst; - -struct AlphaSimpleImpl; - -typedef AlphaDynInst O3DynInst; + template class AlphaDynInst; + struct AlphaSimpleImpl; + typedef AlphaDynInst O3DynInst; +#elif THE_ISA == MIPS_ISA + template class MipsDynInst; + struct MipsSimpleImpl; + typedef MipsDynInst O3DynInst; +#else + #error "O3DynInst not defined for this ISA" #endif #endif // __CPU_O3_DYN_INST_HH__ diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 931919af8..1a2ca32a4 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -106,6 +106,7 @@ class DefaultFetch virtual void recvRetry(); }; + public: /** Overall fetch status. Used to determine if the CPU can * deschedule itsef due to a lack of activity. @@ -218,9 +219,10 @@ class DefaultFetch * @param next_PC Next PC variable passed in by reference. It is * expected to be set to the current PC; it will be updated with what * the next PC will be. + * @param next_NPC Used for ISAs which use delay slots. * @return Whether or not a branch was predicted as taken. */ - bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC); + bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC); /** * Fetches the cache line that contains fetch_PC. Returns any @@ -255,7 +257,8 @@ class DefaultFetch * remove any instructions that are not in the ROB. The source of this * squash should be the commit stage. */ - void squash(const Addr &new_PC, unsigned tid); + void squash(const Addr &new_PC, const InstSeqNum &seq_num, + bool squash_delay_slot, unsigned tid); /** Ticks the fetch stage, processing all inputs signals and fetching * as many instructions as possible. @@ -340,14 +343,12 @@ class DefaultFetch /** Per-thread next PC. */ Addr nextPC[Impl::MaxThreads]; -#if THE_ISA != ALPHA_ISA /** Per-thread next Next PC. * This is not a real register but is used for * architectures that use a branch-delay slot. * (such as MIPS or Sparc) */ Addr nextNPC[Impl::MaxThreads]; -#endif /** Memory request used to access cache. */ RequestPtr memReq[Impl::MaxThreads]; @@ -360,6 +361,19 @@ class DefaultFetch /** Tracks how many instructions has been fetched this cycle. */ int numInst; + /** Tracks delay slot information for threads in ISAs which use + * delay slots; + */ + struct DelaySlotInfo { + InstSeqNum delaySlotSeqNum; + InstSeqNum branchSeqNum; + int numInsts; + Addr targetAddr; + bool targetReady; + }; + + DelaySlotInfo delaySlotInfo[Impl::MaxThreads]; + /** Source of possible stalls. */ struct Stalls { bool decode; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 4184e1867..54e35433b 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -165,10 +165,15 @@ DefaultFetch::DefaultFetch(Params *params) cacheDataPC[tid] = 0; cacheDataValid[tid] = false; - stalls[tid].decode = 0; - stalls[tid].rename = 0; - stalls[tid].iew = 0; - stalls[tid].commit = 0; + delaySlotInfo[tid].branchSeqNum = -1; + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; + + stalls[tid].decode = false; + stalls[tid].rename = false; + stalls[tid].iew = false; + stalls[tid].commit = false; } // Get the size of an instruction. @@ -288,6 +293,9 @@ DefaultFetch::setCPU(O3CPU *cpu_ptr) } #endif + // Schedule fetch to get the correct PC from the CPU + // scheduleFetchStartupEvent(1); + // Fetch needs to start fetching instructions at the very beginning, // so it must start up in active state. switchToActive(); @@ -426,6 +434,10 @@ DefaultFetch::takeOverFrom() nextPC[i] = cpu->readNextPC(i); #if THE_ISA != ALPHA_ISA nextNPC[i] = cpu->readNextNPC(i); + delaySlotInfo[i].branchSeqNum = -1; + delaySlotInfo[i].numInsts = 0; + delaySlotInfo[i].targetAddr = 0; + delaySlotInfo[i].targetReady = false; #endif fetchStatus[i] = Running; } @@ -474,7 +486,8 @@ DefaultFetch::switchToInactive() template bool -DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) +DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, + Addr &next_NPC) { // Do branch prediction check here. // A bit of a misnomer...next_PC is actually the current PC until @@ -482,12 +495,54 @@ DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) bool predict_taken; if (!inst->isControl()) { +#if THE_ISA == ALPHA_ISA next_PC = next_PC + instSize; inst->setPredTarg(next_PC); +#else + Addr cur_PC = next_PC; + next_PC = cur_PC + instSize; //next_NPC; + next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize; + inst->setPredTarg(next_NPC); +#endif return false; } - predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber); + int tid = inst->threadNumber; +#if THE_ISA == ALPHA_ISA + predict_taken = branchPred.predict(inst, next_PC, tid); +#else + Addr pred_PC = next_PC; + predict_taken = branchPred.predict(inst, pred_PC, tid); + + if (predict_taken) { + DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid); + } else { + DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid); + } + + if (predict_taken) { + next_PC = next_NPC; + next_NPC = pred_PC; + + // Update delay slot info + ++delaySlotInfo[tid].numInsts; + delaySlotInfo[tid].targetAddr = pred_PC; + DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid, + delaySlotInfo[tid].numInsts); + } else { // !predict_taken + if (inst->isCondDelaySlot()) { + next_PC = pred_PC; + // The delay slot is skipped here if there is on + // prediction + } else { + next_PC = next_NPC; + // No need to declare a delay slot here since + // there is no for the pred. target to jump + } + + next_NPC = next_NPC + instSize; + } +#endif ++fetchedBranches; @@ -606,6 +661,7 @@ DefaultFetch::doSquash(const Addr &new_PC, unsigned tid) PC[tid] = new_PC; nextPC[tid] = new_PC + instSize; + nextNPC[tid] = new_PC + (2 * instSize); // Clear the icache miss if it's outstanding. if (fetchStatus[tid] == IcacheWaitResponse) { @@ -639,6 +695,14 @@ DefaultFetch::squashFromDecode(const Addr &new_PC, doSquash(new_PC, tid); +#if THE_ISA != ALPHA_ISA + if (seq_num <= delaySlotInfo[tid].branchSeqNum) { + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; + } +#endif + // Tell the CPU to remove any instructions that are in flight between // fetch and decode. cpu->removeInstsUntil(seq_num, tid); @@ -712,14 +776,26 @@ DefaultFetch::updateFetchStatus() template void -DefaultFetch::squash(const Addr &new_PC, unsigned tid) +DefaultFetch::squash(const Addr &new_PC, const InstSeqNum &seq_num, + bool squash_delay_slot, unsigned tid) { DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); doSquash(new_PC, tid); +#if THE_ISA == ALPHA_ISA // Tell the CPU to remove any instructions that are not in the ROB. - cpu->removeInstsNotInROB(tid); + cpu->removeInstsNotInROB(tid, true, 0); +#else + if (seq_num <= delaySlotInfo[tid].branchSeqNum) { + delaySlotInfo[tid].numInsts = 0; + delaySlotInfo[tid].targetAddr = 0; + delaySlotInfo[tid].targetReady = false; + } + + // Tell the CPU to remove any instructions that are not in the ROB. + cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num); +#endif } template @@ -828,8 +904,16 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " "from commit.\n",tid); +#if THE_ISA == ALPHA_ISA + InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum; +#else + InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum; +#endif // In any case, squash. - squash(fromCommit->commitInfo[tid].nextPC,tid); + squash(fromCommit->commitInfo[tid].nextPC, + doneSeqNum, + fromCommit->commitInfo[tid].squashDelaySlot, + tid); // Also check if there's a mispredict that happened. if (fromCommit->commitInfo[tid].branchMispredict) { @@ -876,9 +960,15 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) } if (fetchStatus[tid] != Squashing) { + +#if THE_ISA == ALPHA_ISA + InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum; +#else + InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum; +#endif // Squash unless we're already squashing squashFromDecode(fromDecode->decodeInfo[tid].nextPC, - fromDecode->decodeInfo[tid].doneSeqNum, + doneSeqNum, tid); return true; @@ -984,6 +1074,7 @@ DefaultFetch::fetch(bool &status_change) } Addr next_PC = fetch_PC; + Addr next_NPC = next_PC + instSize; InstSeqNum inst_seq; MachInst inst; ExtMachInst ext_inst; @@ -1002,10 +1093,13 @@ DefaultFetch::fetch(bool &status_change) // ended this fetch block. bool predicted_branch = false; + // Need to keep track of whether or not a delay slot + // instruction has been fetched + for (; offset < cacheBlkSize && numInst < fetchWidth && - !predicted_branch; + (!predicted_branch || delaySlotInfo[tid].numInsts > 0); ++numInst) { // Get a sequence number. @@ -1042,7 +1136,8 @@ DefaultFetch::fetch(bool &status_change) instruction->staticInst, instruction->readPC(),tid); - predicted_branch = lookupAndUpdateNextPC(instruction, next_PC); + predicted_branch = lookupAndUpdateNextPC(instruction, next_PC, + next_NPC); // Add instruction to the CPU's list of instructions. instruction->setInstListIt(cpu->addInst(instruction)); @@ -1068,7 +1163,41 @@ DefaultFetch::fetch(bool &status_change) break; } - offset+= instSize; + offset += instSize; + +#if THE_ISA != ALPHA_ISA + if (predicted_branch) { + delaySlotInfo[tid].branchSeqNum = inst_seq; + + DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n", + tid, inst_seq); + continue; + } else if (delaySlotInfo[tid].numInsts > 0) { + --delaySlotInfo[tid].numInsts; + + // It's OK to set PC to target of branch + if (delaySlotInfo[tid].numInsts == 0) { + delaySlotInfo[tid].targetReady = true; + + // Break the looping condition + predicted_branch = true; + } + + DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to" + " process.\n", tid, delaySlotInfo[tid].numInsts); + } +#endif + } + + if (offset >= cacheBlkSize) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " + "block.\n", tid); + } else if (numInst >= fetchWidth) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " + "for this cycle.\n", tid); + } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " + "instruction encountered.\n", tid); } } @@ -1085,12 +1214,19 @@ DefaultFetch::fetch(bool &status_change) PC[tid] = next_PC; nextPC[tid] = next_PC + instSize; #else - PC[tid] = next_PC; - nextPC[tid] = next_PC + instSize; - nextPC[tid] = next_PC + instSize; + if (delaySlotInfo[tid].targetReady && + delaySlotInfo[tid].numInsts == 0) { + // Set PC to target + PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC + nextPC[tid] = next_PC + instSize; //next_NPC + nextNPC[tid] = next_PC + (2 * instSize); - thread->setNextPC(thread->readNextNPC()); - thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); + delaySlotInfo[tid].targetReady = false; + } else { + PC[tid] = next_PC; + nextPC[tid] = next_NPC; + nextNPC[tid] = next_NPC + instSize; + } #endif } else { // We shouldn't be in an icache miss and also have a fault (an ITB diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 6c6be4787..76fa008ee 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -405,6 +405,9 @@ class DefaultIEW /** Records if there is a fetch redirect on this cycle for each thread. */ bool fetchRedirect[Impl::MaxThreads]; + /** Keeps track of the last valid branch delay slot instss for threads */ + InstSeqNum bdelayDoneSeqNum[Impl::MaxThreads]; + /** Used to track if all instructions have been dispatched this cycle. * If they have not, then blocking must have occurred, and the instructions * would already be added to the skid buffer. diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 684ae2295..c4ec93b3e 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -73,6 +73,7 @@ DefaultIEW::DefaultIEW(Params *params) dispatchStatus[i] = Running; stalls[i].commit = false; fetchRedirect[i] = false; + bdelayDoneSeqNum[i] = 0; } wbMax = wbWidth * params->wbDepth; @@ -428,13 +429,31 @@ DefaultIEW::squash(unsigned tid) instQueue.squash(tid); // Tell the LDSTQ to start squashing. +#if THE_ISA == ALPHA_ISA ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid); - +#else + ldstQueue.squash(fromCommit->commitInfo[tid].bdelayDoneSeqNum, tid); +#endif updatedQueues = true; // Clear the skid buffer in case it has any data in it. - while (!skidBuffer[tid].empty()) { + DPRINTF(IEW, "[tid:%i]: Removing skidbuffer instructions until [sn:%i].\n", + tid, fromCommit->commitInfo[tid].bdelayDoneSeqNum); + while (!skidBuffer[tid].empty()) { +#if THE_ISA != ALPHA_ISA + if (skidBuffer[tid].front()->seqNum <= + fromCommit->commitInfo[tid].bdelayDoneSeqNum) { + DPRINTF(IEW, "[tid:%i]: Cannot remove skidbuffer instructions " + "that occur before delay slot [sn:%i].\n", + fromCommit->commitInfo[tid].bdelayDoneSeqNum, + tid); + break; + } else { + DPRINTF(IEW, "[tid:%i]: Removing instruction [sn:%i] from " + "skidBuffer.\n", tid, skidBuffer[tid].front()->seqNum); + } +#endif if (skidBuffer[tid].front()->isLoad() || skidBuffer[tid].front()->isStore() ) { toRename->iewInfo[tid].dispatchedToLSQ++; @@ -445,6 +464,8 @@ DefaultIEW::squash(unsigned tid) skidBuffer[tid].pop(); } + bdelayDoneSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum; + emptyRenameInsts(tid); } @@ -458,10 +479,26 @@ DefaultIEW::squashDueToBranch(DynInstPtr &inst, unsigned tid) toCommit->squash[tid] = true; toCommit->squashedSeqNum[tid] = inst->seqNum; toCommit->mispredPC[tid] = inst->readPC(); - toCommit->nextPC[tid] = inst->readNextPC(); toCommit->branchMispredict[tid] = true; + +#if THE_ISA == ALPHA_ISA toCommit->branchTaken[tid] = inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst)); + toCommit->nextPC[tid] = inst->readNextPC(); +#else + bool branch_taken = inst->readNextNPC() != + (inst->readNextPC() + sizeof(TheISA::MachInst)); + + toCommit->branchTaken[tid] = branch_taken; + + toCommit->condDelaySlotBranch[tid] = inst->isCondDelaySlot(); + + if (inst->isCondDelaySlot() && branch_taken) { + toCommit->nextPC[tid] = inst->readNextPC(); + } else { + toCommit->nextPC[tid] = inst->readNextNPC(); + } +#endif toCommit->includeSquashInst[tid] = false; @@ -825,8 +862,10 @@ DefaultIEW::sortInsts() { int insts_from_rename = fromRename->size; #ifdef DEBUG +#if THE_ISA == ALPHA_ISA for (int i = 0; i < numThreads; i++) assert(insts[i].empty()); +#endif #endif for (int i = 0; i < insts_from_rename; ++i) { insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]); @@ -837,7 +876,23 @@ template void DefaultIEW::emptyRenameInsts(unsigned tid) { + DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions until " + "[sn:%i].\n", tid, bdelayDoneSeqNum[tid]); + while (!insts[tid].empty()) { + +#if THE_ISA != ALPHA_ISA + if (insts[tid].front()->seqNum <= bdelayDoneSeqNum[tid]) { + DPRINTF(IEW, "[tid:%i]: Done removing, cannot remove instruction" + " that occurs at or before delay slot [sn:%i].\n", + tid, bdelayDoneSeqNum[tid]); + break; + } else { + DPRINTF(IEW, "[tid:%i]: Removing incoming rename instruction " + "[sn:%i].\n", tid, insts[tid].front()->seqNum); + } +#endif + if (insts[tid].front()->isLoad() || insts[tid].front()->isStore() ) { toRename->iewInfo[tid].dispatchedToLSQ++; @@ -1120,7 +1175,7 @@ DefaultIEW::dispatchInsts(unsigned tid) } if (!insts_to_dispatch.empty()) { - DPRINTF(IEW,"[tid:%i]: Issue: Bandwidth Full. Blocking.\n"); + DPRINTF(IEW,"[tid:%i]: Issue: Bandwidth Full. Blocking.\n", tid); block(tid); toRename->iewUnblock[tid] = false; } @@ -1263,9 +1318,13 @@ DefaultIEW::executeInsts() fetchRedirect[tid] = true; DPRINTF(IEW, "Execute: Branch mispredict detected.\n"); +#if THE_ISA == ALPHA_ISA DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n", inst->nextPC); - +#else + DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n", + inst->nextNPC); +#endif // If incorrect, then signal the ROB that it must be squashed. squashDueToBranch(inst, tid); diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 4c69ca384..d745faf7b 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -26,7 +26,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim - * Korey Sewell */ #ifndef __CPU_O3_INST_QUEUE_HH__ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 36e0842be..8faae3ebf 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -993,7 +993,11 @@ InstructionQueue::squash(unsigned tid) // Read instruction sequence number of last instruction out of the // time buffer. +#if THE_ISA == ALPHA_ISA squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; +#else + squashedSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum; +#endif // Call doSquash if there are insts in the IQ if (count[tid] > 0) { diff --git a/src/cpu/o3/isa_specific.hh b/src/cpu/o3/isa_specific.hh index f8a9dd8cc..4937589e3 100755 --- a/src/cpu/o3/isa_specific.hh +++ b/src/cpu/o3/isa_specific.hh @@ -35,6 +35,11 @@ #include "cpu/o3/alpha/impl.hh" #include "cpu/o3/alpha/params.hh" #include "cpu/o3/alpha/dyn_inst.hh" +#elif THE_ISA == MIPS_ISA + #include "cpu/o3/mips/cpu.hh" + #include "cpu/o3/mips/impl.hh" + #include "cpu/o3/mips/params.hh" + #include "cpu/o3/mips/dyn_inst.hh" #else - #error "O3CPU doesnt support this ISA" + #error "ISA-specific header files O3CPU not defined ISA" #endif diff --git a/src/cpu/o3/mips/cpu.cc b/src/cpu/o3/mips/cpu.cc new file mode 100755 index 000000000..420f460b2 --- /dev/null +++ b/src/cpu/o3/mips/cpu.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/mips/impl.hh" +#include "cpu/o3/mips/cpu_impl.hh" +#include "cpu/o3/mips/dyn_inst.hh" + +// Force instantiation of MipsO3CPU for all the implemntations that are +// needed. Consider merging this and mips_dyn_inst.cc, and maybe all +// classes that depend on a certain impl, into one file (mips_impl.cc?). +template class MipsO3CPU; diff --git a/src/cpu/o3/mips/cpu.hh b/src/cpu/o3/mips/cpu.hh new file mode 100755 index 000000000..1813e8656 --- /dev/null +++ b/src/cpu/o3/mips/cpu.hh @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_CPU_HH__ +#define __CPU_O3_MIPS_CPU_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/thread_context.hh" +#include "cpu/o3/cpu.hh" +#include "sim/byteswap.hh" + +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + +/** + * MipsO3CPU class. Derives from the FullO3CPU class, and + * implements all ISA and implementation specific functions of the + * CPU. This is the CPU class that is used for the SimObjects, and is + * what is given to the DynInsts. Most of its state exists in the + * FullO3CPU; the state is has is mainly for ISA specific + * functionality. + */ +template +class MipsO3CPU : public FullO3CPU +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::RegFile RegFile; + typedef TheISA::MiscRegFile MiscRegFile; + + public: + typedef O3ThreadState ImplState; + typedef O3ThreadState Thread; + typedef typename Impl::Params Params; + + /** Constructs an MipsO3CPU with the given parameters. */ + MipsO3CPU(Params *params); + + /** Registers statistics. */ + void regStats(); + + /** Translates instruction requestion in syscall emulation mode. */ + Fault translateInstReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data read request in syscall emulation mode. */ + Fault translateDataReadReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data write request in syscall emulation mode. */ + Fault translateDataWriteReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg, unsigned tid); + + /** Reads a misc. register, including any side effects the read + * might have as defined by the architecture. + */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); + + /** Sets a miscellaneous register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); + + /** Sets a misc. register, including any side effects the write + * might have as defined by the architecture. + */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); + + /** Initiates a squash of all in-flight instructions for a given + * thread. The source of the squash is an external update of + * state through the TC. + */ + void squashFromTC(unsigned tid); + + /** Traps to handle given fault. */ + void trap(Fault fault, unsigned tid); + + /** Executes a syscall. + * @todo: Determine if this needs to be virtual. + */ + void syscall(int64_t callnum, int tid); + /** Gets a syscall argument. */ + IntReg getSyscallArg(int i, int tid); + + /** Used to shift args for indirect syscall. */ + void setSyscallArg(int i, IntReg val, int tid); + + /** Sets the return value of a syscall. */ + void setSyscallReturn(SyscallReturn return_value, int tid); + + /** CPU read function, forwards read to LSQ. */ + template + Fault read(RequestPtr &req, T &data, int load_idx) + { + return this->iew.ldstQueue.read(req, data, load_idx); + } + + /** CPU write function, forwards write to LSQ. */ + template + Fault write(RequestPtr &req, T &data, int store_idx) + { + return this->iew.ldstQueue.write(req, data, store_idx); + } + + Addr lockAddr; + + /** Temporary fix for the lock flag, works in the UP case. */ + bool lockFlag; +}; + +#endif // __CPU_O3_MIPS_CPU_HH__ diff --git a/src/cpu/o3/mips/cpu_builder.cc b/src/cpu/o3/mips/cpu_builder.cc new file mode 100644 index 000000000..f1c3b33a5 --- /dev/null +++ b/src/cpu/o3/mips/cpu_builder.cc @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include + +#include "cpu/base.hh" +#include "cpu/o3/mips/cpu.hh" +#include "cpu/o3/mips/impl.hh" +#include "cpu/o3/mips/params.hh" +#include "cpu/o3/fu_pool.hh" +#include "sim/builder.hh" + +class DerivO3CPU : public MipsO3CPU +{ + public: + DerivO3CPU(MipsSimpleParams *p) + : MipsO3CPU(p) + { } +}; + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) + +Param clock; +Param numThreads; +Param activity; + +SimObjectVectorParam workload; + +SimObjectParam mem; + +SimObjectParam checker; + +Param max_insts_any_thread; +Param max_insts_all_threads; +Param max_loads_any_thread; +Param max_loads_all_threads; + +Param cachePorts; + +Param decodeToFetchDelay; +Param renameToFetchDelay; +Param iewToFetchDelay; +Param commitToFetchDelay; +Param fetchWidth; + +Param renameToDecodeDelay; +Param iewToDecodeDelay; +Param commitToDecodeDelay; +Param fetchToDecodeDelay; +Param decodeWidth; + +Param iewToRenameDelay; +Param commitToRenameDelay; +Param decodeToRenameDelay; +Param renameWidth; + +Param commitToIEWDelay; +Param renameToIEWDelay; +Param issueToExecuteDelay; +Param dispatchWidth; +Param issueWidth; +Param wbWidth; +Param wbDepth; +SimObjectParam fuPool; + +Param iewToCommitDelay; +Param renameToROBDelay; +Param commitWidth; +Param squashWidth; +Param trapLatency; + +Param backComSize; +Param forwardComSize; + +Param predType; +Param localPredictorSize; +Param localCtrBits; +Param localHistoryTableSize; +Param localHistoryBits; +Param globalPredictorSize; +Param globalCtrBits; +Param globalHistoryBits; +Param choicePredictorSize; +Param choiceCtrBits; + +Param BTBEntries; +Param BTBTagSize; + +Param RASSize; + +Param LQEntries; +Param SQEntries; +Param LFSTSize; +Param SSITSize; + +Param numPhysIntRegs; +Param numPhysFloatRegs; +Param numIQEntries; +Param numROBEntries; + +Param smtNumFetchingThreads; +Param smtFetchPolicy; +Param smtLSQPolicy; +Param smtLSQThreshold; +Param smtIQPolicy; +Param smtIQThreshold; +Param smtROBPolicy; +Param smtROBThreshold; +Param smtCommitPolicy; + +Param instShiftAmt; + +Param defer_registration; + +Param function_trace; +Param function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + INIT_PARAM_DFLT(activity, "Initial activity count", 0), + + INIT_PARAM(workload, "Processes to run"), + + INIT_PARAM(mem, "Memory"), + + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(dispatchWidth, "Dispatch width"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(wbWidth, "Writeback width"), + INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"), + INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), + + INIT_PARAM(backComSize, "Time buffer size for backwards communication"), + INIT_PARAM(forwardComSize, "Time buffer size for forward communication"), + + INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), + INIT_PARAM(localPredictorSize, "Size of local predictor"), + INIT_PARAM(localCtrBits, "Bits per counter"), + INIT_PARAM(localHistoryTableSize, "Size of local history table"), + INIT_PARAM(localHistoryBits, "Bits for the local history"), + INIT_PARAM(globalPredictorSize, "Size of global predictor"), + INIT_PARAM(globalCtrBits, "Bits per counter"), + INIT_PARAM(globalHistoryBits, "Bits of history"), + INIT_PARAM(choicePredictorSize, "Size of choice predictor"), + INIT_PARAM(choiceCtrBits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), + INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), + INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), + INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), + INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), + INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), + INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) + +CREATE_SIM_OBJECT(DerivO3CPU) +{ + DerivO3CPU *cpu; + + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + (numThreads.isValid() && numThreads >= workload.size()) ? + numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + + MipsSimpleParams *params = new MipsSimpleParams; + + params->clock = clock; + + params->name = getInstanceName(); + params->numberOfThreads = actual_num_threads; + params->activity = activity; + + params->workload = workload; + + params->mem = mem; + + params->checker = checker; + + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params->cachePorts = cachePorts; + + params->decodeToFetchDelay = decodeToFetchDelay; + params->renameToFetchDelay = renameToFetchDelay; + params->iewToFetchDelay = iewToFetchDelay; + params->commitToFetchDelay = commitToFetchDelay; + params->fetchWidth = fetchWidth; + + params->renameToDecodeDelay = renameToDecodeDelay; + params->iewToDecodeDelay = iewToDecodeDelay; + params->commitToDecodeDelay = commitToDecodeDelay; + params->fetchToDecodeDelay = fetchToDecodeDelay; + params->decodeWidth = decodeWidth; + + params->iewToRenameDelay = iewToRenameDelay; + params->commitToRenameDelay = commitToRenameDelay; + params->decodeToRenameDelay = decodeToRenameDelay; + params->renameWidth = renameWidth; + + params->commitToIEWDelay = commitToIEWDelay; + params->renameToIEWDelay = renameToIEWDelay; + params->issueToExecuteDelay = issueToExecuteDelay; + params->dispatchWidth = dispatchWidth; + params->issueWidth = issueWidth; + params->wbWidth = wbWidth; + params->wbDepth = wbDepth; + params->fuPool = fuPool; + + params->iewToCommitDelay = iewToCommitDelay; + params->renameToROBDelay = renameToROBDelay; + params->commitWidth = commitWidth; + params->squashWidth = squashWidth; + params->trapLatency = trapLatency; + + params->backComSize = backComSize; + params->forwardComSize = forwardComSize; + + params->predType = predType; + params->localPredictorSize = localPredictorSize; + params->localCtrBits = localCtrBits; + params->localHistoryTableSize = localHistoryTableSize; + params->localHistoryBits = localHistoryBits; + params->globalPredictorSize = globalPredictorSize; + params->globalCtrBits = globalCtrBits; + params->globalHistoryBits = globalHistoryBits; + params->choicePredictorSize = choicePredictorSize; + params->choiceCtrBits = choiceCtrBits; + + params->BTBEntries = BTBEntries; + params->BTBTagSize = BTBTagSize; + + params->RASSize = RASSize; + + params->LQEntries = LQEntries; + params->SQEntries = SQEntries; + + params->SSITSize = SSITSize; + params->LFSTSize = LFSTSize; + + params->numPhysIntRegs = numPhysIntRegs; + params->numPhysFloatRegs = numPhysFloatRegs; + params->numIQEntries = numIQEntries; + params->numROBEntries = numROBEntries; + + params->smtNumFetchingThreads = smtNumFetchingThreads; + + // Default smtFetchPolicy to "RoundRobin", if necessary. + std::string round_robin_policy = "RoundRobin"; + std::string single_thread = "SingleThread"; + + if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0) + params->smtFetchPolicy = round_robin_policy; + else + params->smtFetchPolicy = smtFetchPolicy; + + params->smtIQPolicy = smtIQPolicy; + params->smtLSQPolicy = smtLSQPolicy; + params->smtLSQThreshold = smtLSQThreshold; + params->smtROBPolicy = smtROBPolicy; + params->smtROBThreshold = smtROBThreshold; + params->smtCommitPolicy = smtCommitPolicy; + + params->instShiftAmt = 2; + + params->deferRegistration = defer_registration; + + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + + cpu = new DerivO3CPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("DerivO3CPU", DerivO3CPU) + diff --git a/src/cpu/o3/mips/cpu_impl.hh b/src/cpu/o3/mips/cpu_impl.hh new file mode 100644 index 000000000..6b9f3ae10 --- /dev/null +++ b/src/cpu/o3/mips/cpu_impl.hh @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "config/use_checker.hh" + +#include "arch/mips/faults.hh" +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/checker/thread_context.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/o3/mips/cpu.hh" +#include "cpu/o3/mips/params.hh" +#include "cpu/o3/mips/thread_context.hh" +#include "cpu/o3/comm.hh" +#include "cpu/o3/thread_state.hh" + +using namespace TheISA; + +template +MipsO3CPU::MipsO3CPU(Params *params) + : FullO3CPU(params) +{ + DPRINTF(O3CPU, "Creating MipsO3CPU object.\n"); + + // Setup any thread state. + this->thread.resize(this->numThreads); + + for (int i = 0; i < this->numThreads; ++i) { + if (i < params->workload.size()) { + DPRINTF(O3CPU, "Workload[%i] process is %#x", + i, this->thread[i]); + this->thread[i] = new Thread(this, i, params->workload[i], + i, params->mem); + + this->thread[i]->setStatus(ThreadContext::Suspended); + + + /* Use this port to for syscall emulation writes to memory. */ + Port *mem_port; + TranslatingPort *trans_port; + trans_port = new TranslatingPort(csprintf("%s-%d-funcport", + name(), i), + params->workload[i]->pTable, + false); + mem_port = params->mem->getPort("functional"); + mem_port->setPeer(trans_port); + trans_port->setPeer(mem_port); + this->thread[i]->setMemPort(trans_port); + + //usedTids[i] = true; + //threadMap[i] = i; + } else { + //Allocate Empty thread so M5 can use later + //when scheduling threads to CPU + Process* dummy_proc = NULL; + + this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem); + //usedTids[i] = false; + } + + ThreadContext *tc; + + // Setup the TC that will serve as the interface to the threads/CPU. + MipsTC *mips_tc = + new MipsTC; + + tc = mips_tc; + + // If we're using a checker, then the TC should be the + // CheckerThreadContext. +#if USE_CHECKER + if (params->checker) { + tc = new CheckerThreadContext >( + mips_tc, this->checker); + } +#endif + + mips_tc->cpu = this; + mips_tc->thread = this->thread[i]; + + // Give the thread the TC. + this->thread[i]->tc = tc; + + // Add the TC to the CPU's list of TC's. + this->threadContexts.push_back(tc); + } + + for (int i=0; i < this->numThreads; i++) { + this->thread[i]->setFuncExeInst(0); + } + + // Sets CPU pointers. These must be set at this level because the CPU + // pointers are defined to be the highest level of CPU class. + this->fetch.setCPU(this); + this->decode.setCPU(this); + this->rename.setCPU(this); + this->iew.setCPU(this); + this->commit.setCPU(this); + + this->rob.setCPU(this); + this->regFile.setCPU(this); + + lockAddr = 0; + lockFlag = false; +} + +template +void +MipsO3CPU::regStats() +{ + // Register stats for everything that has stats. + this->fullCPURegStats(); + this->fetch.regStats(); + this->decode.regStats(); + this->rename.regStats(); + this->iew.regStats(); + this->commit.regStats(); +} + + +template +MiscReg +MipsO3CPU::readMiscReg(int misc_reg, unsigned tid) +{ + return this->regFile.readMiscReg(misc_reg, tid); +} + +template +MiscReg +MipsO3CPU::readMiscRegWithEffect(int misc_reg, Fault &fault, + unsigned tid) +{ + return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid); +} + +template +Fault +MipsO3CPU::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid) +{ + return this->regFile.setMiscReg(misc_reg, val, tid); +} + +template +Fault +MipsO3CPU::setMiscRegWithEffect(int misc_reg, const MiscReg &val, + unsigned tid) +{ + return this->regFile.setMiscRegWithEffect(misc_reg, val, tid); +} + +template +void +MipsO3CPU::squashFromTC(unsigned tid) +{ + this->thread[tid]->inSyscall = true; + this->commit.generateTCEvent(tid); +} + +template +void +MipsO3CPU::trap(Fault fault, unsigned tid) +{ + // Pass the thread's TC into the invoke method. + fault->invoke(this->threadContexts[tid]); +} + +#if !FULL_SYSTEM + +template +void +MipsO3CPU::syscall(int64_t callnum, int tid) +{ + DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid); + + DPRINTF(Activity,"Activity: syscall() called.\n"); + + // Temporarily increase this by one to account for the syscall + // instruction. + ++(this->thread[tid]->funcExeInst); + + // Execute the actual syscall. + this->thread[tid]->syscall(callnum); + + // Decrease funcExeInst by one as the normal commit will handle + // incrementing it. + --(this->thread[tid]->funcExeInst); + + DPRINTF(O3CPU, "[tid:%i] Register 2 is %i ", tid, this->readIntReg(2)); +} + +template +TheISA::IntReg +MipsO3CPU::getSyscallArg(int i, int tid) +{ + return this->readArchIntReg(MipsISA::ArgumentReg0 + i, tid); +} + +template +void +MipsO3CPU::setSyscallArg(int i, IntReg val, int tid) +{ + this->setArchIntReg(MipsISA::ArgumentReg0 + i, val, tid); +} + +template +void +MipsO3CPU::setSyscallReturn(SyscallReturn return_value, int tid) +{ + // check for error condition. Mips syscall convention is to + // indicate success/failure in reg a3 (r19) and put the + // return value itself in the standard return value reg (v0). + if (return_value.successful()) { + // no error + this->setArchIntReg(SyscallSuccessReg, 0, tid); + this->setArchIntReg(ReturnValueReg, return_value.value(), tid); + } else { + // got an error, return details + this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid); + this->setArchIntReg(ReturnValueReg, -return_value.value(), tid); + } +} +#endif diff --git a/src/cpu/o3/mips/dyn_inst.cc b/src/cpu/o3/mips/dyn_inst.cc new file mode 100755 index 000000000..216aa7d2c --- /dev/null +++ b/src/cpu/o3/mips/dyn_inst.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/mips/dyn_inst_impl.hh" +#include "cpu/o3/mips/impl.hh" + +// Force instantiation of MipsDynInst for all the implementations that +// are needed. +template class MipsDynInst; diff --git a/src/cpu/o3/mips/dyn_inst.hh b/src/cpu/o3/mips/dyn_inst.hh new file mode 100755 index 000000000..06bdfcec4 --- /dev/null +++ b/src/cpu/o3/mips/dyn_inst.hh @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_DYN_INST_HH__ +#define __CPU_O3_MIPS_DYN_INST_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/base_dyn_inst.hh" +#include "cpu/inst_seq.hh" +#include "cpu/o3/mips/cpu.hh" +#include "cpu/o3/mips/impl.hh" + +class Packet; + +/** + * Mostly implementation & ISA specific MipsDynInst. As with most + * other classes in the new CPU model, it is templated on the Impl to + * allow for passing in of all types, such as the CPU type and the ISA + * type. The MipsDynInst serves as the primary interface to the CPU + * for instructions that are executing. + */ +template +class MipsDynInst : public BaseDynInst +{ + public: + /** Typedef for the CPU. */ + typedef typename Impl::O3CPU O3CPU; + + /** Binary machine instruction type. */ + typedef TheISA::MachInst MachInst; + /** Extended machine instruction type. */ + typedef TheISA::ExtMachInst ExtMachInst; + /** Logical register index type. */ + typedef TheISA::RegIndex RegIndex; + /** Integer register index type. */ + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + /** Misc register index type. */ + typedef TheISA::MiscReg MiscReg; + + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs + }; + + public: + /** BaseDynInst constructor given a binary instruction. */ + MipsDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, + O3CPU *cpu); + + /** BaseDynInst constructor given a static inst pointer. */ + MipsDynInst(StaticInstPtr &_staticInst); + + /** Executes the instruction.*/ + Fault execute(); + + /** Initiates the access. Only valid for memory operations. */ + Fault initiateAcc(); + + /** Completes the access. Only valid for memory operations. */ + Fault completeAcc(Packet *pkt); + + private: + /** Initializes variables. */ + void initVars(); + + public: + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg) + { + return this->cpu->readMiscReg(misc_reg, this->threadNumber); + } + + /** Reads a misc. register, including any side-effects the read + * might have as defined by the architecture. + */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + return this->cpu->readMiscRegWithEffect(misc_reg, fault, + this->threadNumber); + } + + /** Sets a misc. register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + this->instResult.integer = val; + return this->cpu->setMiscReg(misc_reg, val, this->threadNumber); + } + + /** Sets a misc. register, including any side-effects the write + * might have as defined by the architecture. + */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + return this->cpu->setMiscRegWithEffect(misc_reg, val, + this->threadNumber); + } + + /** Calls a syscall. */ + void syscall(int64_t callnum); + + private: + /** Physical register index of the destination registers of this + * instruction. + */ + PhysRegIndex _destRegIdx[MaxInstDestRegs]; + + /** Physical register index of the source registers of this + * instruction. + */ + PhysRegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** Physical register index of the previous producers of the + * architected destinations. + */ + PhysRegIndex _prevDestRegIdx[MaxInstDestRegs]; + + public: + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(const StaticInst *si, int idx) + { + return this->cpu->readIntReg(_srcRegIdx[idx]); + } + + FloatReg readFloatReg(const StaticInst *si, int idx, int width) + { + return this->cpu->readFloatReg(_srcRegIdx[idx], width); + } + + FloatReg readFloatReg(const StaticInst *si, int idx) + { + return this->cpu->readFloatReg(_srcRegIdx[idx]); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width) + { + return this->cpu->readFloatRegBits(_srcRegIdx[idx], width); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx) + { + return this->cpu->readFloatRegBits(_srcRegIdx[idx]); + } + + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntReg(const StaticInst *si, int idx, uint64_t val) + { + this->cpu->setIntReg(_destRegIdx[idx], val); + BaseDynInst::setIntReg(si, idx, val); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) + { + this->cpu->setFloatReg(_destRegIdx[idx], val, width); + BaseDynInst::setFloatReg(si, idx, val, width); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val) + { + this->cpu->setFloatReg(_destRegIdx[idx], val); + BaseDynInst::setFloatReg(si, idx, val); + } + + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width) + { + this->cpu->setFloatRegBits(_destRegIdx[idx], val, width); + BaseDynInst::setFloatRegBits(si, idx, val); + } + + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val) + { + this->cpu->setFloatRegBits(_destRegIdx[idx], val); + BaseDynInst::setFloatRegBits(si, idx, val); + } + + /** Returns the physical register index of the i'th destination + * register. + */ + PhysRegIndex renamedDestRegIdx(int idx) const + { + return _destRegIdx[idx]; + } + + /** Returns the physical register index of the i'th source register. */ + PhysRegIndex renamedSrcRegIdx(int idx) const + { + return _srcRegIdx[idx]; + } + + /** Returns the physical register index of the previous physical register + * that remapped to the same logical register index. + */ + PhysRegIndex prevDestRegIdx(int idx) const + { + return _prevDestRegIdx[idx]; + } + + /** Renames a destination register to a physical register. Also records + * the previous physical register that the logical register mapped to. + */ + void renameDestReg(int idx, + PhysRegIndex renamed_dest, + PhysRegIndex previous_rename) + { + _destRegIdx[idx] = renamed_dest; + _prevDestRegIdx[idx] = previous_rename; + } + + /** Renames a source logical register to the physical register which + * has/will produce that logical register's result. + * @todo: add in whether or not the source register is ready. + */ + void renameSrcReg(int idx, PhysRegIndex renamed_src) + { + _srcRegIdx[idx] = renamed_src; + } + + public: + /** Calculates EA part of a memory instruction. Currently unused, + * though it may be useful in the future if we want to split + * memory operations into EA calculation and memory access parts. + */ + Fault calcEA() + { + return this->staticInst->eaCompInst()->execute(this, this->traceData); + } + + /** Does the memory access part of a memory instruction. Currently unused, + * though it may be useful in the future if we want to split + * memory operations into EA calculation and memory access parts. + */ + Fault memAccess() + { + return this->staticInst->memAccInst()->execute(this, this->traceData); + } +}; + +#endif // __CPU_O3_MIPS_DYN_INST_HH__ + diff --git a/src/cpu/o3/mips/dyn_inst_impl.hh b/src/cpu/o3/mips/dyn_inst_impl.hh new file mode 100755 index 000000000..57dec1ccf --- /dev/null +++ b/src/cpu/o3/mips/dyn_inst_impl.hh @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/o3/mips/dyn_inst.hh" + +template +MipsDynInst::MipsDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, O3CPU *cpu) + : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) +{ + initVars(); +} + +template +MipsDynInst::MipsDynInst(StaticInstPtr &_staticInst) + : BaseDynInst(_staticInst) +{ + initVars(); +} + +template +void +MipsDynInst::initVars() +{ + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < this->staticInst->numDestRegs(); i++) { + _destRegIdx[i] = this->staticInst->destRegIdx(i); + } + + for (int i = 0; i < this->staticInst->numSrcRegs(); i++) { + _srcRegIdx[i] = this->staticInst->srcRegIdx(i); + this->_readySrcRegIdx[i] = 0; + } +} + +template +Fault +MipsDynInst::execute() +{ + // @todo: Pretty convoluted way to avoid squashing from happening + // when using the TC during an instruction's execution + // (specifically for instructions that have side-effects that use + // the TC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + this->fault = this->staticInst->execute(this, this->traceData); + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template +Fault +MipsDynInst::initiateAcc() +{ + // @todo: Pretty convoluted way to avoid squashing from happening + // when using the TC during an instruction's execution + // (specifically for instructions that have side-effects that use + // the TC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + this->fault = this->staticInst->initiateAcc(this, this->traceData); + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template +Fault +MipsDynInst::completeAcc(Packet *pkt) +{ + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); + + return this->fault; +} + +template +void +MipsDynInst::syscall(int64_t callnum) +{ + this->cpu->syscall(callnum, this->threadNumber); +} + diff --git a/src/cpu/o3/mips/impl.hh b/src/cpu/o3/mips/impl.hh new file mode 100644 index 000000000..ac7181a19 --- /dev/null +++ b/src/cpu/o3/mips/impl.hh @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_IMPL_HH__ +#define __CPU_O3_MIPS_IMPL_HH__ + +#include "arch/mips/isa_traits.hh" + +#include "cpu/o3/mips/params.hh" +#include "cpu/o3/cpu_policy.hh" + + +// Forward declarations. +template +class MipsDynInst; + +template +class MipsO3CPU; + +/** Implementation specific struct that defines several key types to the + * CPU, the stages within the CPU, the time buffers, and the DynInst. + * The struct defines the ISA, the CPU policy, the specific DynInst, the + * specific O3CPU, and all of the structs from the time buffers to do + * communication. + * This is one of the key things that must be defined for each hardware + * specific CPU implementation. + */ +struct MipsSimpleImpl +{ + /** The type of MachInst. */ + typedef TheISA::MachInst MachInst; + + /** The CPU policy to be used, which defines all of the CPU stages. */ + typedef SimpleCPUPolicy CPUPol; + + /** The DynInst type to be used. */ + typedef MipsDynInst DynInst; + + /** The refcounted DynInst pointer to be used. In most cases this is + * what should be used, and not DynInst *. + */ + typedef RefCountingPtr DynInstPtr; + + /** The O3CPU type to be used. */ + typedef MipsO3CPU O3CPU; + + /** Same typedef, but for CPUType. BaseDynInst may not always use + * an O3 CPU, so it's clearer to call it CPUType instead in that + * case. + */ + typedef O3CPU CPUType; + + /** The Params to be passed to each stage. */ + typedef MipsSimpleParams Params; + + enum { + MaxWidth = 8, + MaxThreads = 4 + }; +}; + +/** The O3Impl to be used. */ +typedef MipsSimpleImpl O3CPUImpl; + +#endif // __CPU_O3_MIPS_IMPL_HH__ diff --git a/src/cpu/o3/mips/params.hh b/src/cpu/o3/mips/params.hh new file mode 100644 index 000000000..d1ac62e21 --- /dev/null +++ b/src/cpu/o3/mips/params.hh @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_MIPS_PARAMS_HH__ +#define __CPU_O3_MIPS_PARAMS_HH__ + +#include "cpu/o3/cpu.hh" +#include "cpu/o3/params.hh" + +//Forward declarations +//class MipsDTB; +//class MipsITB; +class MemObject; +class Process; +class System; + +/** + * This file defines the parameters that will be used for the MipsO3CPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class MipsSimpleParams : public O3Params +{ + public: + MipsSimpleParams() {} + +#if FULL_SYSTEM + //Full System Paramater Objects place here + MipsITB *itb; + MipsDTB *dtb; +#endif +}; + +#endif // __CPU_O3_MIPS_PARAMS_HH__ diff --git a/src/cpu/o3/mips/thread_context.cc b/src/cpu/o3/mips/thread_context.cc new file mode 100755 index 000000000..0061a2a63 --- /dev/null +++ b/src/cpu/o3/mips/thread_context.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" +#include "cpu/o3/thread_context_impl.hh" + +template class O3ThreadContext; + diff --git a/src/cpu/o3/mips/thread_context.hh b/src/cpu/o3/mips/thread_context.hh new file mode 100644 index 000000000..5b03426f1 --- /dev/null +++ b/src/cpu/o3/mips/thread_context.hh @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" + +template +class MipsTC : public O3ThreadContext +{ + public: + virtual uint64_t readNextNPC() + { + return this->cpu->readNextNPC(this->thread->readTid()); + } + + virtual void setNextNPC(uint64_t val) + { + this->cpu->setNextNPC(val, this->thread->readTid()); + } + + virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, + TheISA::RegFile::ContextVal val) + { panic("Not supported on Mips!"); } + + /** This function exits the thread context in the CPU and returns + * 1 if the CPU has no more active threads (meaning it's OK to exit); + * Used in syscall-emulation mode when a thread executes the 'exit' + * syscall. + */ + virtual int exit() + { + this->deallocate(); + + // If there are still threads executing in the system + if (this->cpu->numActiveThreads()) + return 0; // don't exit simulation + else + return 1; // exit simulation + } +}; diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 034087feb..ba26a01dd 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -76,6 +76,7 @@ class DefaultRename // using a list instead of a queue. (Most other stages use a // queue) typedef std::list InstQueue; + typedef typename std::list::iterator ListIt; public: /** Overall rename status. Used to determine if the CPU can @@ -170,7 +171,7 @@ class DefaultRename void takeOverFrom(); /** Squashes all instructions in a thread. */ - void squash(unsigned tid); + void squash(const InstSeqNum &squash_seq_num, unsigned tid); /** Ticks rename, which processes all input signals and attempts to rename * as many instructions as possible. @@ -222,7 +223,7 @@ class DefaultRename bool unblock(unsigned tid); /** Executes actual squash, removing squashed instructions. */ - void doSquash(unsigned tid); + void doSquash(const InstSeqNum &squash_seq_num, unsigned tid); /** Removes a committed instruction's rename history. */ void removeFromHistory(InstSeqNum inst_seq_num, unsigned tid); diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 805a72808..ee251de00 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include @@ -318,7 +319,7 @@ DefaultRename::takeOverFrom() template void -DefaultRename::squash(unsigned tid) +DefaultRename::squash(const InstSeqNum &squash_seq_num, unsigned tid) { DPRINTF(Rename, "[tid:%u]: Squashing instructions.\n",tid); @@ -341,19 +342,55 @@ DefaultRename::squash(unsigned tid) unsigned squashCount = 0; for (int i=0; isize; i++) { - if (fromDecode->insts[i]->threadNumber == tid) { + if (fromDecode->insts[i]->threadNumber == tid && + fromDecode->insts[i]->seqNum > squash_seq_num) { fromDecode->insts[i]->setSquashed(); wroteToTimeBuffer = true; squashCount++; } + } + // Clear the instruction list and skid buffer in case they have any + // insts in them. Since we support multiple ISAs, we cant just: + // "insts[tid].clear();" or "skidBuffer[tid].clear()" since there is + // a possible delay slot inst for different architectures + // insts[tid].clear(); +#if THE_ISA == ALPHA_ISA insts[tid].clear(); +#else + DPRINTF(Rename, "[tid:%i] Squashing incoming decode instructions until " + "[sn:%i].\n",tid, squash_seq_num); + ListIt ilist_it = insts[tid].begin(); + while (ilist_it != insts[tid].end()) { + if ((*ilist_it)->seqNum > squash_seq_num) { + (*ilist_it)->setSquashed(); + DPRINTF(Rename, "Squashing incoming decode instruction, " + "[tid:%i] [sn:%i] PC %08p.\n", tid, (*ilist_it)->seqNum, (*ilist_it)->PC); + } + ilist_it++; + } +#endif // Clear the skid buffer in case it has any data in it. + // See comments above. + // skidBuffer[tid].clear(); +#if THE_ISA == ALPHA_ISA skidBuffer[tid].clear(); - - doSquash(tid); +#else + DPRINTF(Rename, "[tid:%i] Squashing incoming skidbuffer instructions " + "until [sn:%i].\n", tid, squash_seq_num); + ListIt slist_it = skidBuffer[tid].begin(); + while (slist_it != skidBuffer[tid].end()) { + if ((*slist_it)->seqNum > squash_seq_num) { + (*slist_it)->setSquashed(); + DPRINTF(Rename, "Squashing skidbuffer instruction, [tid:%i] [sn:%i]" + "PC %08p.\n", tid, (*slist_it)->seqNum, (*slist_it)->PC); + } + slist_it++; + } +#endif + doSquash(squash_seq_num, tid); } template @@ -572,7 +609,7 @@ DefaultRename::renameInsts(unsigned tid) if (inst->isSquashed()) { DPRINTF(Rename, "[tid:%u]: instruction %i with PC %#x is " "squashed, skipping.\n", - tid, inst->seqNum, inst->threadNumber,inst->readPC()); + tid, inst->seqNum, inst->readPC()); ++renameSquashedInsts; @@ -707,8 +744,10 @@ DefaultRename::sortInsts() { int insts_from_decode = fromDecode->size; #ifdef DEBUG +#if THE_ISA == ALPHA_ISA for (int i=0; i < numThreads; i++) assert(insts[i].empty()); +#endif #endif for (int i = 0; i < insts_from_decode; ++i) { DynInstPtr inst = fromDecode->insts[i]; @@ -824,12 +863,10 @@ DefaultRename::unblock(unsigned tid) template void -DefaultRename::doSquash(unsigned tid) +DefaultRename::doSquash(const InstSeqNum &squashed_seq_num, unsigned tid) { typename list::iterator hb_it = historyBuffer[tid].begin(); - InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum; - // After a syscall squashes everything, the history buffer may be empty // but the ROB may still be squashing instructions. if (historyBuffer[tid].empty()) { @@ -963,8 +1000,9 @@ DefaultRename::renameDestRegs(DynInstPtr &inst,unsigned tid) historyBuffer[tid].push_front(hb_entry); - DPRINTF(Rename, "[tid:%u]: Adding instruction to history buffer, " - "[sn:%lli].\n",tid, + DPRINTF(Rename, "[tid:%u]: Adding instruction to history buffer " + "(size=%i), [sn:%lli].\n",tid, + historyBuffer[tid].size(), (*historyBuffer[tid].begin()).instSeqNum); // Tell the instruction to rename the appropriate destination @@ -1143,7 +1181,13 @@ DefaultRename::checkSignalsAndUpdate(unsigned tid) DPRINTF(Rename, "[tid:%u]: Squashing instructions due to squash from " "commit.\n", tid); - squash(tid); +#if THE_ISA == ALPHA_ISA + InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum; +#else + InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].bdelayDoneSeqNum; +#endif + + squash(squashed_seq_num, tid); return true; } diff --git a/src/cpu/o3/scoreboard.hh b/src/cpu/o3/scoreboard.hh index f8e4df3b7..544084c40 100644 --- a/src/cpu/o3/scoreboard.hh +++ b/src/cpu/o3/scoreboard.hh @@ -35,7 +35,7 @@ #include #include #include -#include "arch/alpha/isa_traits.hh" +#include "arch/isa_traits.hh" #include "base/trace.hh" #include "base/traceflags.hh" #include "cpu/o3/comm.hh" diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index df8d1a6d8..9ca02b9f3 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -26,12 +26,12 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim - * Korey Sewell */ #ifndef __CPU_O3_THREAD_CONTEXT_HH__ #define __CPU_O3_THREAD_CONTEXT_HH__ +#include "cpu/thread_context.hh" #include "cpu/o3/isa_specific.hh" class EndQuiesceEvent; diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 240696c2b..622455745 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -26,7 +26,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Steve Reinhardt - * Korey Sewell */ #include "arch/utility.hh" @@ -455,6 +454,7 @@ BaseSimpleCPU::advancePC(Fault fault) #else thread->setNextPC(thread->readNextNPC()); thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); + assert(thread->readNextPC() != thread->readNextNPC()); #endif } diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index ea1a65148..0d356bb4c 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -214,6 +214,7 @@ class StaticInstBase : public RefCounted bool isIndirectCtrl() const { return flags[IsIndirectControl]; } bool isCondCtrl() const { return flags[IsCondControl]; } bool isUncondCtrl() const { return flags[IsUncondControl]; } + bool isCondDelaySlot() const { return flags[IsCondDelaySlot]; } bool isThreadSync() const { return flags[IsThreadSync]; } bool isSerializing() const { return flags[IsSerializing] ||