From 21df09cf7aa6bdec5de11904751d355e773a3168 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 11 May 2006 19:18:36 -0400 Subject: [PATCH] Fixes for ozone CPU to successfully boot and run linux. cpu/base_dyn_inst.hh: Remove snoop function (did not mean to commit it). cpu/ozone/back_end_impl.hh: Set instruction as having its result ready, not completed. cpu/ozone/cpu.hh: Fixes for store conditionals. Use an additional lock addr list to make sure that the access is valid. I don't know if this is fully necessary, but it gives me a peace of mind (at some performance cost). Make sure to schedule for cycles(1) and not just 1 cycle in the future as tick = 1ps. Also support the new Checker. cpu/ozone/cpu_builder.cc: Add parameter for maxOutstandingMemOps so it can be set through the config. Also add in the checker. Right now it's a BaseCPU simobject, but that may change in the future. cpu/ozone/cpu_impl.hh: Add support for the checker. For now there's a dynamic cast to convert the simobject passed back from the builder to the proper Checker type. It's ugly, but only happens at startup, and is probably a justified use of dynamic cast. Support switching out/taking over from other CPUs. Correct indexing problem for float registers. cpu/ozone/dyn_inst.hh: Add ability for instructions to wait on memory instructions in addition to source register instructions. This is needed for memory dependence predictors and memory barriers. cpu/ozone/dyn_inst_impl.hh: Support waiting on memory operations. Use "resultReady" to differentiate an instruction having its registers produced vs being totally completed. cpu/ozone/front_end.hh: Support switching out. Also record if an interrupt is pending. cpu/ozone/front_end_impl.hh: Support switching out. Also support stalling the front end if an interrupt is pending. cpu/ozone/lw_back_end.hh: Add checker in. Support switching out. Support memory barriers. cpu/ozone/lw_back_end_impl.hh: Lots of changes to get things to work right. Faults, traps, interrupts all wait until all stores have written back (important). Memory barriers are supported, as is the general ability for instructions to be dependent on other memory instructions. cpu/ozone/lw_lsq.hh: Support switching out. Also use store writeback events in all cases, not just dcache misses. cpu/ozone/lw_lsq_impl.hh: Support switching out. Also use store writeback events in all cases, not just dcache misses. Support the checker CPU. Marks instructions as completed once the functional access is done (which has to be done for the checker to be able to verify results). cpu/ozone/simple_params.hh: Add max outstanding mem ops parameter. python/m5/objects/OzoneCPU.py: Add max outstanding mem ops, checker. --HG-- extra : convert_revision : f4d408e1bb1f25836a097b6abe3856111e950c59 --- cpu/base_dyn_inst.hh | 5 - cpu/ozone/back_end_impl.hh | 2 +- cpu/ozone/cpu.hh | 28 +++- cpu/ozone/cpu_builder.cc | 16 ++- cpu/ozone/cpu_impl.hh | 118 ++++++++++++---- cpu/ozone/dyn_inst.hh | 40 ++++-- cpu/ozone/dyn_inst_impl.hh | 43 +++++- cpu/ozone/front_end.hh | 13 ++ cpu/ozone/front_end_impl.hh | 58 +++++++- cpu/ozone/lw_back_end.hh | 20 ++- cpu/ozone/lw_back_end_impl.hh | 256 +++++++++++++++++++++++++++------- cpu/ozone/lw_lsq.hh | 32 ++++- cpu/ozone/lw_lsq_impl.hh | 189 +++++++++++++++++++------ cpu/ozone/simple_params.hh | 1 + python/m5/objects/OzoneCPU.py | 3 + 15 files changed, 660 insertions(+), 164 deletions(-) diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh index 18978142d..cd754dc3c 100644 --- a/cpu/base_dyn_inst.hh +++ b/cpu/base_dyn_inst.hh @@ -117,11 +117,6 @@ class BaseDynInst : public FastAlloc, public RefCounted Fault write(T data, Addr addr, unsigned flags, uint64_t *res); - // @todo: Probably should not have this function in the DynInst. - template - bool snoop(MemReqPtr &req, T &data) - { return cpu->snoop(req, data); } - void prefetch(Addr addr, unsigned flags); void writeHint(Addr addr, int size, unsigned flags); Fault copySrcTranslate(Addr src); diff --git a/cpu/ozone/back_end_impl.hh b/cpu/ozone/back_end_impl.hh index 0b0f04f59..36770d65c 100644 --- a/cpu/ozone/back_end_impl.hh +++ b/cpu/ozone/back_end_impl.hh @@ -1385,7 +1385,7 @@ BackEnd::writebackInsts() inst->seqNum, inst->readPC()); inst->setCanCommit(); - inst->setCompleted(); + inst->setResultReady(); if (inst->isExecuted()) { int dependents = IQ.wakeDependents(inst); diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh index 56b6571a2..eec8902d8 100644 --- a/cpu/ozone/cpu.hh +++ b/cpu/ozone/cpu.hh @@ -53,6 +53,7 @@ class AlphaDTB; class PhysicalMemory; class MemoryController; +class Sampler; class RemoteGDB; class GDBListener; @@ -69,6 +70,9 @@ namespace Trace { class InstRecord; } +template +class Checker; + /** * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with * simple out-of-order capabilities added to it. It is still a 1 CPI machine @@ -226,7 +230,9 @@ class OzoneCPU : public BaseCPU }; // execution context proxy - OzoneXC xcProxy; + OzoneXC ozoneXC; + ExecContext *xcProxy; + ExecContext *checkerXC; typedef OzoneThreadState ImplState; @@ -245,6 +251,7 @@ class OzoneCPU : public BaseCPU void tick(); std::set snList; + std::set lockAddrList; private: struct TickEvent : public Event { @@ -262,9 +269,9 @@ class OzoneCPU : public BaseCPU void scheduleTickEvent(int delay) { if (tickEvent.squashed()) - tickEvent.reschedule(curTick + delay); + tickEvent.reschedule(curTick + cycles(delay)); else if (!tickEvent.scheduled()) - tickEvent.schedule(curTick + delay); + tickEvent.schedule(curTick + cycles(delay)); } /// Unschedule tick event, regardless of its current state. @@ -322,7 +329,7 @@ class OzoneCPU : public BaseCPU int cpuId; - void switchOut(); + void switchOut(Sampler *sampler); void takeOverFrom(BaseCPU *oldCPU); #if FULL_SYSTEM @@ -472,6 +479,7 @@ class OzoneCPU : public BaseCPU Fault error; if (req->flags & LOCKED) { // lockAddr = req->paddr; + lockAddrList.insert(req->paddr); lockFlag = true; } @@ -546,7 +554,13 @@ class OzoneCPU : public BaseCPU req->result = 2; } else { if (this->lockFlag/* && this->lockAddr == req->paddr*/) { - req->result = 1; + if (lockAddrList.find(req->paddr) != + lockAddrList.end()) { + req->result = 1; + } else { + req->result = 0; + return NoFault; + } } else { req->result = 0; return NoFault; @@ -599,7 +613,7 @@ class OzoneCPU : public BaseCPU void setSyscallReturn(SyscallReturn return_value, int tid); #endif - ExecContext *xcBase() { return &xcProxy; } + ExecContext *xcBase() { return xcProxy; } bool decoupledFrontEnd; struct CommStruct { @@ -615,6 +629,8 @@ class OzoneCPU : public BaseCPU bool lockFlag; Stats::Scalar<> quiesceCycles; + + Checker *checker; }; #endif // __CPU_OZONE_CPU_HH__ diff --git a/cpu/ozone/cpu_builder.cc b/cpu/ozone/cpu_builder.cc index 0146dd1bd..64aa49c71 100644 --- a/cpu/ozone/cpu_builder.cc +++ b/cpu/ozone/cpu_builder.cc @@ -1,6 +1,7 @@ #include +#include "cpu/checker/cpu.hh" #include "cpu/inst_seq.hh" #include "cpu/ozone/cpu.hh" #include "cpu/ozone/ozone_impl.hh" @@ -50,6 +51,8 @@ SimObjectVectorParam workload; SimObjectParam mem; +SimObjectParam checker; + Param max_insts_any_thread; Param max_insts_all_threads; Param max_loads_any_thread; @@ -66,6 +69,7 @@ Param backEndSquashLatency; Param backEndLatency; Param maxInstBufferSize; Param numPhysicalRegs; +Param maxOutstandingMemOps; Param decodeToFetchDelay; Param renameToFetchDelay; @@ -164,6 +168,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) INIT_PARAM_DFLT(mem, "Memory", NULL), + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + INIT_PARAM_DFLT(max_insts_any_thread, "Terminate when any thread reaches this inst count", 0), @@ -190,6 +196,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), INIT_PARAM(numPhysicalRegs, "Number of physical registers"), + INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4), INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), @@ -314,7 +321,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) #endif // FULL_SYSTEM params->mem = mem; - + params->checker = checker; params->max_insts_any_thread = max_insts_any_thread; params->max_insts_all_threads = max_insts_all_threads; params->max_loads_any_thread = max_loads_any_thread; @@ -334,6 +341,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) params->backEndLatency = backEndLatency; params->maxInstBufferSize = maxInstBufferSize; params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; + params->maxOutstandingMemOps = maxOutstandingMemOps; params->decodeToFetchDelay = decodeToFetchDelay; params->renameToFetchDelay = renameToFetchDelay; @@ -445,6 +453,8 @@ SimObjectVectorParam workload; SimObjectParam mem; +SimObjectParam checker; + Param max_insts_any_thread; Param max_insts_all_threads; Param max_loads_any_thread; @@ -559,6 +569,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) INIT_PARAM_DFLT(mem, "Memory", NULL), + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + INIT_PARAM_DFLT(max_insts_any_thread, "Terminate when any thread reaches this inst count", 0), @@ -709,7 +721,7 @@ CREATE_SIM_OBJECT(SimpleOzoneCPU) #endif // FULL_SYSTEM params->mem = mem; - + params->checker = checker; params->max_insts_any_thread = max_insts_any_thread; params->max_insts_all_threads = max_insts_all_threads; params->max_loads_any_thread = max_loads_any_thread; diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh index 17d944e7c..4f3fdf521 100644 --- a/cpu/ozone/cpu_impl.hh +++ b/cpu/ozone/cpu_impl.hh @@ -33,6 +33,7 @@ #include "base/trace.hh" #include "config/full_system.hh" #include "cpu/base.hh" +#include "cpu/checker/exec_context.hh" #include "cpu/exec_context.hh" #include "cpu/exetrace.hh" #include "cpu/ozone/cpu.hh" @@ -156,17 +157,33 @@ OzoneCPU::OzoneCPU(Params *p) #endif comm(5, 5) { - + if (p->checker) { + BaseCPU *temp_checker = p->checker; + checker = dynamic_cast *>(temp_checker); + } else { + checker = NULL; + } frontEnd = new FrontEnd(p); backEnd = new BackEnd(p); _status = Idle; - thread.xcProxy = &xcProxy; + if (checker) { + checker->setMemory(mem); +#if FULL_SYSTEM + checker->setSystem(p->system); +#endif + checkerXC = new CheckerExecContext(&ozoneXC, checker); + thread.xcProxy = checkerXC; + xcProxy = checkerXC; + } else { + thread.xcProxy = &ozoneXC; + xcProxy = &ozoneXC; + } thread.inSyscall = false; - xcProxy.cpu = this; - xcProxy.thread = &thread; + ozoneXC.cpu = this; + ozoneXC.thread = &thread; thread.setStatus(ExecContext::Suspended); #if FULL_SYSTEM @@ -177,7 +194,7 @@ OzoneCPU::OzoneCPU(Params *p) thread.tid = 0; thread.mem = p->mem; - thread.quiesceEvent = new EndQuiesceEvent(&xcProxy); + thread.quiesceEvent = new EndQuiesceEvent(xcProxy); system = p->system; itb = p->itb; @@ -187,9 +204,10 @@ OzoneCPU::OzoneCPU(Params *p) if (p->profile) { thread.profile = new FunctionProfile(p->system->kernelSymtab); + // @todo: This might be better as an ExecContext instead of OzoneXC Callback *cb = new MakeCallback(&xcProxy); + &OzoneXC::dumpFuncProfile>(&ozoneXC); registerExitCallback(cb); } @@ -198,7 +216,6 @@ OzoneCPU::OzoneCPU(Params *p) static ProfileNode dummyNode; thread.profileNode = &dummyNode; thread.profilePC = 3; - #else // xc = new ExecContext(this, /* thread_num */ 0, p->workload[0], /* asid */ 0); thread.cpu = this; @@ -225,13 +242,13 @@ OzoneCPU::OzoneCPU(Params *p) issueWidth = p->issueWidth; */ - execContexts.push_back(&xcProxy); + execContexts.push_back(xcProxy); frontEnd->setCPU(this); backEnd->setCPU(this); - frontEnd->setXC(&xcProxy); - backEnd->setXC(&xcProxy); + frontEnd->setXC(xcProxy); + backEnd->setXC(xcProxy); frontEnd->setThreadState(&thread); backEnd->setThreadState(&thread); @@ -250,7 +267,7 @@ OzoneCPU::OzoneCPU(Params *p) for (int i = 0; i < TheISA::TotalNumRegs; ++i) { thread.renameTable[i] = new DynInst(this); - thread.renameTable[i]->setCompleted(); + thread.renameTable[i]->setResultReady(); } frontEnd->renameTable.copyFrom(thread.renameTable); @@ -312,11 +329,15 @@ OzoneCPU::copyToXC() */ template void -OzoneCPU::switchOut() +OzoneCPU::switchOut(Sampler *sampler) { + // Front end needs state from back end, so switch out the back end first. + backEnd->switchOut(); + frontEnd->switchOut(); _status = SwitchedOut; if (tickEvent.scheduled()) tickEvent.squash(); + sampler->signalSwitched(); } template @@ -325,8 +346,16 @@ OzoneCPU::takeOverFrom(BaseCPU *oldCPU) { BaseCPU::takeOverFrom(oldCPU); + backEnd->takeOverFrom(); + frontEnd->takeOverFrom(); assert(!tickEvent.scheduled()); + // @todo: Fix hardcoded number + // Clear out any old information in time buffer. + for (int i = 0; i < 6; ++i) { + comm.advance(); + } + // if any of this CPU's ExecContexts are active, mark the CPU as // running and schedule its tick event. for (int i = 0; i < execContexts.size(); ++i) { @@ -470,7 +499,7 @@ OzoneCPU::serialize(std::ostream &os) BaseCPU::serialize(os); SERIALIZE_ENUM(_status); nameOut(os, csprintf("%s.xc", name())); - xcProxy.serialize(os); + ozoneXC.serialize(os); nameOut(os, csprintf("%s.tickEvent", name())); tickEvent.serialize(os); } @@ -481,7 +510,7 @@ OzoneCPU::unserialize(Checkpoint *cp, const std::string §ion) { BaseCPU::unserialize(cp, section); UNSERIALIZE_ENUM(_status); - xcProxy.unserialize(cp, csprintf("%s.xc", section)); + ozoneXC.unserialize(cp, csprintf("%s.xc", section)); tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); } @@ -579,7 +608,7 @@ template Addr OzoneCPU::dbg_vtophys(Addr addr) { - return vtophys(&xcProxy, addr); + return vtophys(xcProxy, addr); } #endif // FULL_SYSTEM /* @@ -725,7 +754,7 @@ OzoneCPU::tick() comInstEventQueue[0]->serviceEvents(numInst); if (!tickEvent.scheduled() && _status == Running) - tickEvent.schedule(curTick + 1); + tickEvent.schedule(curTick + cycles(1)); } template @@ -750,7 +779,7 @@ OzoneCPU::syscall() DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst); - thread.process->syscall(&xcProxy); + thread.process->syscall(xcProxy); thread.funcExeInst--; @@ -784,19 +813,17 @@ OzoneCPU::hwrei() { // Need to move this to ISA code // May also need to make this per thread +/* if (!inPalMode()) return new UnimplementedOpcodeFault; thread.setNextPC(thread.readMiscReg(AlphaISA::IPR_EXC_ADDR)); - +*/ lockFlag = false; + lockAddrList.clear(); + kernelStats->hwrei(); - // Not sure how to make a similar check in the Ozone model -// if (!misspeculating()) { - kernelStats->hwrei(); - - checkInterrupts = true; -// } + checkInterrupts = true; // FIXME: XXX check for interrupts? XXX return NoFault; @@ -847,6 +874,11 @@ OzoneCPU::processInterrupts() if (ipl && ipl > thread.readMiscReg(IPR_IPLR)) { thread.setMiscReg(IPR_ISR, summary); thread.setMiscReg(IPR_INTID, ipl); + // @todo: Make this more transparent + if (checker) { + checkerXC->setMiscReg(IPR_ISR, summary); + checkerXC->setMiscReg(IPR_INTID, ipl); + } Fault fault = new InterruptFault; fault->invoke(thread.getXCProxy()); DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", @@ -860,7 +892,7 @@ OzoneCPU::simPalCheck(int palFunc) { // Need to move this to ISA code // May also need to make this per thread - this->kernelStats->callpal(palFunc, &xcProxy); + this->kernelStats->callpal(palFunc, xcProxy); switch (palFunc) { case PAL::halt: @@ -944,7 +976,28 @@ OzoneCPU::OzoneXC::dumpFuncProfile() template void OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) -{ } +{ + // some things should already be set up + assert(getMemPtr() == old_context->getMemPtr()); +#if FULL_SYSTEM + assert(getSystemPtr() == old_context->getSystemPtr()); +#else + assert(getProcessPtr() == old_context->getProcessPtr()); +#endif + + // copy over functional state + setStatus(old_context->status()); + copyArchRegs(old_context); + setCpuId(old_context->readCpuId()); +#if !FULL_SYSTEM + setFuncExeInst(old_context->readFuncExeInst()); +#endif + +// storeCondFailures = 0; + cpu->lockFlag = false; + + old_context->setStatus(ExecContext::Unallocated); +} template void @@ -1062,21 +1115,24 @@ template float OzoneCPU::OzoneXC::readFloatRegSingle(int reg_idx) { - return thread->renameTable[reg_idx]->readFloatResult(); + int idx = reg_idx + TheISA::FP_Base_DepTag; + return thread->renameTable[idx]->readFloatResult(); } template double OzoneCPU::OzoneXC::readFloatRegDouble(int reg_idx) { - return thread->renameTable[reg_idx]->readDoubleResult(); + int idx = reg_idx + TheISA::FP_Base_DepTag; + return thread->renameTable[idx]->readDoubleResult(); } template uint64_t OzoneCPU::OzoneXC::readFloatRegInt(int reg_idx) { - return thread->renameTable[reg_idx]->readIntResult(); + int idx = reg_idx + TheISA::FP_Base_DepTag; + return thread->renameTable[idx]->readIntResult(); } template @@ -1101,7 +1157,9 @@ template void OzoneCPU::OzoneXC::setFloatRegDouble(int reg_idx, double val) { - thread->renameTable[reg_idx]->setDoubleResult(val); + int idx = reg_idx + TheISA::FP_Base_DepTag; + + thread->renameTable[idx]->setDoubleResult(val); if (!thread->inSyscall) { cpu->squashFromXC(); diff --git a/cpu/ozone/dyn_inst.hh b/cpu/ozone/dyn_inst.hh index 4382af0fd..f251c28ea 100644 --- a/cpu/ozone/dyn_inst.hh +++ b/cpu/ozone/dyn_inst.hh @@ -59,9 +59,9 @@ class OzoneDynInst : public BaseDynInst typedef TheISA::MiscReg MiscReg; typedef typename std::list::iterator ListIt; - // Note that this is duplicated from the BaseDynInst class; I'm simply not - // sure the enum would carry through so I could use it in array - // declarations in this class. + // Note that this is duplicated from the BaseDynInst class; I'm + // simply not sure the enum would carry through so I could use it + // in array declarations in this class. enum { MaxInstSrcRegs = TheISA::MaxInstSrcRegs, MaxInstDestRegs = TheISA::MaxInstDestRegs @@ -90,9 +90,23 @@ class OzoneDynInst : public BaseDynInst void addDependent(DynInstPtr &dependent_inst); std::vector &getDependents() { return dependents; } + std::vector &getMemDeps() { return memDependents; } + std::list &getMemSrcs() { return srcMemInsts; } void wakeDependents(); + void wakeMemDependents(); + + void addMemDependent(DynInstPtr &inst) { memDependents.push_back(inst); } + + void addSrcMemInst(DynInstPtr &inst) { srcMemInsts.push_back(inst); } + + void markMemInstReady(OzoneDynInst *inst); + + // For now I will remove instructions from the list when they wake + // up. In the future, you only really need a counter. + bool memDepReady() { return srcMemInsts.empty(); } + // void setBPredInfo(const BPredInfo &bp_info) { bpInfo = bp_info; } // BPredInfo &getBPredInfo() { return bpInfo; } @@ -104,9 +118,13 @@ class OzoneDynInst : public BaseDynInst std::vector dependents; - /** The instruction that produces the value of the source registers. These - * may be NULL if the value has already been read from the source - * instruction. + std::vector memDependents; + + std::list srcMemInsts; + + /** The instruction that produces the value of the source + * registers. These may be NULL if the value has already been + * read from the source instruction. */ DynInstPtr srcInsts[MaxInstSrcRegs]; @@ -165,22 +183,22 @@ class OzoneDynInst : public BaseDynInst */ void setIntReg(const StaticInst *si, int idx, uint64_t val) { - this->instResult.integer = val; + BaseDynInst::setIntReg(si, idx, val); } void setFloatRegSingle(const StaticInst *si, int idx, float val) { - this->instResult.fp = val; + BaseDynInst::setFloatRegSingle(si, idx, val); } void setFloatRegDouble(const StaticInst *si, int idx, double val) { - this->instResult.dbl = val; + BaseDynInst::setFloatRegDouble(si, idx, val); } void setFloatRegInt(const StaticInst *si, int idx, uint64_t val) { - this->instResult.integer = val; + BaseDynInst::setFloatRegInt(si, idx, val); } void setIntResult(uint64_t result) { this->instResult.integer = result; } @@ -199,6 +217,8 @@ class OzoneDynInst : public BaseDynInst void clearDependents(); + void clearMemDependents(); + public: // ISA stuff MiscReg readMiscReg(int misc_reg); diff --git a/cpu/ozone/dyn_inst_impl.hh b/cpu/ozone/dyn_inst_impl.hh index c83481c9a..a7e4460a1 100644 --- a/cpu/ozone/dyn_inst_impl.hh +++ b/cpu/ozone/dyn_inst_impl.hh @@ -38,7 +38,7 @@ template OzoneDynInst::OzoneDynInst(FullCPU *cpu) : BaseDynInst(0, 0, 0, 0, cpu) { - this->setCompleted(); + this->setResultReady(); initInstPtrs(); } @@ -130,7 +130,7 @@ template bool OzoneDynInst::srcInstReady(int regIdx) { - return srcInsts[regIdx]->isCompleted(); + return srcInsts[regIdx]->isResultReady(); } template @@ -149,6 +149,28 @@ OzoneDynInst::wakeDependents() } } +template +void +OzoneDynInst::wakeMemDependents() +{ + for (int i = 0; i < memDependents.size(); ++i) { + memDependents[i]->markMemInstReady(this); + } +} + +template +void +OzoneDynInst::markMemInstReady(OzoneDynInst *inst) +{ + ListIt mem_it = srcMemInsts.begin(); + while ((*mem_it) != inst && mem_it != srcMemInsts.end()) { + mem_it++; + } + assert(mem_it != srcMemInsts.end()); + + srcMemInsts.erase(mem_it); +} + template void OzoneDynInst::initInstPtrs() @@ -164,7 +186,7 @@ bool OzoneDynInst::srcsReady() { for (int i = 0; i < this->numSrcRegs(); ++i) { - if (!srcInsts[i]->isCompleted()) + if (!srcInsts[i]->isResultReady()) return false; } @@ -176,7 +198,7 @@ bool OzoneDynInst::eaSrcsReady() { for (int i = 1; i < this->numSrcRegs(); ++i) { - if (!srcInsts[i]->isCompleted()) + if (!srcInsts[i]->isResultReady()) return false; } @@ -195,6 +217,14 @@ OzoneDynInst::clearDependents() prevDestInst[i] = NULL; } } + +template +void +OzoneDynInst::clearMemDependents() +{ + memDependents.clear(); +} + template MiscReg OzoneDynInst::readMiscReg(int misc_reg) @@ -213,6 +243,7 @@ template Fault OzoneDynInst::setMiscReg(int misc_reg, const MiscReg &val) { + this->setIntResult(val); return this->thread->setMiscReg(misc_reg, val); } @@ -234,11 +265,13 @@ OzoneDynInst::hwrei() this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR)); + this->cpu->hwrei(); +/* this->cpu->kernelStats->hwrei(); this->cpu->checkInterrupts = true; this->cpu->lockFlag = false; - +*/ // FIXME: XXX check for interrupts? XXX return NoFault; } diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh index 2bff2544d..188925ae5 100644 --- a/cpu/ozone/front_end.hh +++ b/cpu/ozone/front_end.hh @@ -66,6 +66,14 @@ class FrontEnd bool isEmpty() { return instBuffer.empty(); } + void switchOut(); + + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + + bool switchedOut; + private: bool updateStatus(); @@ -198,6 +206,9 @@ class FrontEnd DynInstPtr barrierInst; + public: + bool interruptPending; + private: // number of idle cycles /* Stats::Average<> notIdleFraction; @@ -223,6 +234,8 @@ class FrontEnd Stats::Scalar<> fetchBlockedCycles; /** Stat for total number of fetched cache lines. */ Stats::Scalar<> fetchedCacheLines; + + Stats::Scalar<> fetchIcacheSquashes; /** Distribution of number of instructions fetched each cycle. */ Stats::Distribution<> fetchNisnDist; // Stats::Vector<> qfull_iq_occupancy; diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh index 7c18386cf..a3eb809d0 100644 --- a/cpu/ozone/front_end_impl.hh +++ b/cpu/ozone/front_end_impl.hh @@ -19,8 +19,11 @@ FrontEnd::FrontEnd(Params *params) width(params->frontEndWidth), freeRegs(params->numPhysicalRegs), numPhysRegs(params->numPhysicalRegs), - serializeNext(false) + serializeNext(false), + interruptPending(false) { + switchedOut = false; + status = Idle; // Setup branch predictor. @@ -127,6 +130,11 @@ FrontEnd::regStats() .desc("Number of cache lines fetched") .prereq(fetchedCacheLines); + fetchIcacheSquashes + .name(name() + ".fetchIcacheSquashes") + .desc("Number of outstanding Icache misses that were squashed") + .prereq(fetchIcacheSquashes); + fetchNisnDist .init(/* base value */ 0, /* last value */ width, @@ -370,6 +378,10 @@ FrontEnd::fetchCacheLine() #endif // FULL_SYSTEM Fault fault = NoFault; + if (interruptPending && flags == 0) { + return fault; + } + // Align the fetch PC so it's at the start of a cache block. Addr fetch_PC = icacheBlockAlignPC(PC); @@ -397,7 +409,8 @@ FrontEnd::fetchCacheLine() // exists within the cache. if (icacheInterface && fault == NoFault) { #if FULL_SYSTEM - if (cpu->system->memctrl->badaddr(memReq->paddr)) { + if (cpu->system->memctrl->badaddr(memReq->paddr) || + memReq->flags & UNCACHEABLE) { DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a " "misspeculating path!", memReq->paddr); @@ -497,7 +510,7 @@ FrontEnd::processBarriers(DynInstPtr &inst) dispatchedTempSerializing++; } - // Change status over to BarrierStall so that other stages know + // Change status over to SerializeBlocked so that other stages know // what this is blocked on. status = SerializeBlocked; @@ -613,8 +626,10 @@ FrontEnd::processCacheCompletion(MemReqPtr &req) // Do something here. if (status != IcacheMissStall || - req != memReq) { + req != memReq || + switchedOut) { DPRINTF(FE, "Previous fetch was squashed.\n"); + fetchIcacheSquashes++; return; } @@ -702,6 +717,7 @@ FrontEnd::getInstFromCacheline() DynInstPtr inst = barrierInst; status = Running; barrierInst = NULL; + inst->clearSerializeBefore(); return inst; } @@ -773,7 +789,7 @@ FrontEnd::renameInst(DynInstPtr &inst) DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n", inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum); - if (src_inst->isCompleted()) { + if (src_inst->isResultReady()) { DPRINTF(FE, "Reg ready.\n"); inst->markSrcRegReady(i); } else { @@ -807,6 +823,38 @@ FrontEnd::wakeFromQuiesce() status = Running; } +template +void +FrontEnd::switchOut() +{ + switchedOut = true; + memReq = NULL; + squash(0, 0); + instBuffer.clear(); + instBufferSize = 0; + status = Idle; +} + +template +void +FrontEnd::takeOverFrom(ExecContext *old_xc) +{ + assert(freeRegs == numPhysRegs); + fetchCacheLineNextCycle = true; + + cacheBlkValid = false; + +#if !FULL_SYSTEM +// pTable = params->pTable; +#endif + fetchFault = NoFault; + serializeNext = false; + barrierInst = NULL; + status = Running; + switchedOut = false; + interruptPending = false; +} + template void FrontEnd::dumpInsts() diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh index f17c93ff4..028fdaf8c 100644 --- a/cpu/ozone/lw_back_end.hh +++ b/cpu/ozone/lw_back_end.hh @@ -17,6 +17,8 @@ #include "mem/mem_req.hh" #include "sim/eventq.hh" +template +class Checker; class ExecContext; template @@ -126,6 +128,8 @@ class LWBackEnd Addr commitPC; + Tick lastCommitCycle; + bool robEmpty() { return instList.empty(); } bool isFull() { return numInsts >= numROBEntries; } @@ -133,7 +137,7 @@ class LWBackEnd void fetchFault(Fault &fault); - int wakeDependents(DynInstPtr &inst); + int wakeDependents(DynInstPtr &inst, bool memory_deps = false); /** Tells memory dependence unit that a memory instruction needs to be * rescheduled. It will re-execute once replayMemInst() is called. @@ -182,6 +186,12 @@ class LWBackEnd void instToCommit(DynInstPtr &inst); + void switchOut(); + + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + private: void generateTrapEvent(Tick latency = 0); void handleFault(Fault &fault, Tick latency = 0); @@ -303,6 +313,10 @@ class LWBackEnd Fault faultFromFetch; bool fetchHasFault; + bool switchedOut; + + DynInstPtr memBarrier; + private: struct pqCompare { bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const @@ -327,7 +341,7 @@ class LWBackEnd bool exactFullStall; - bool fetchRedirect[Impl::MaxThreads]; +// bool fetchRedirect[Impl::MaxThreads]; // number of cycles stalled for D-cache misses /* Stats::Scalar<> dcacheStallCycles; @@ -414,6 +428,8 @@ class LWBackEnd Stats::VectorDistribution<> ROB_occ_dist; public: void dumpInsts(); + + Checker *checker; }; template diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh index d1290239c..d4829629d 100644 --- a/cpu/ozone/lw_back_end_impl.hh +++ b/cpu/ozone/lw_back_end_impl.hh @@ -1,5 +1,6 @@ #include "encumbered/cpu/full/op_class.hh" +#include "cpu/checker/cpu.hh" #include "cpu/ozone/lw_back_end.hh" template @@ -10,28 +11,36 @@ LWBackEnd::generateTrapEvent(Tick latency) TrapEvent *trap = new TrapEvent(this); - trap->schedule(curTick + latency); + trap->schedule(curTick + cpu->cycles(latency)); thread->trapPending = true; } template int -LWBackEnd::wakeDependents(DynInstPtr &inst) +LWBackEnd::wakeDependents(DynInstPtr &inst, bool memory_deps) { assert(!inst->isSquashed()); - std::vector &dependents = inst->getDependents(); + std::vector &dependents = memory_deps ? inst->getMemDeps() : + inst->getDependents(); int num_outputs = dependents.size(); DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum); for (int i = 0; i < num_outputs; i++) { DynInstPtr dep_inst = dependents[i]; - dep_inst->markSrcRegReady(); + if (!memory_deps) { + dep_inst->markSrcRegReady(); + } else { + if (!dep_inst->isSquashed()) + dep_inst->markMemInstReady(inst.get()); + } + DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum); if (dep_inst->readyToIssue() && dep_inst->isInROB() && - !dep_inst->isNonSpeculative()) { + !dep_inst->isNonSpeculative() && + dep_inst->memDepReady() && !dep_inst->isMemBarrier() && !dep_inst->isWriteBarrier()) { DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n", dep_inst->seqNum); exeList.push(dep_inst); @@ -114,6 +123,9 @@ LWBackEnd::LdWritebackEvent::process() // iewStage->wakeCPU(); + if (be->isSwitchedOut()) + return; + if (dcacheMiss) { be->removeDcacheMiss(inst); } @@ -169,16 +181,18 @@ LWBackEnd::DCacheCompletionEvent::description() template LWBackEnd::LWBackEnd(Params *params) : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), - xcSquash(false), cacheCompletionEvent(this), + trapSquash(false), xcSquash(false), cacheCompletionEvent(this), dcacheInterface(params->dcacheInterface), width(params->backEndWidth), exactFullStall(true) { numROBEntries = params->numROBEntries; numInsts = 0; numDispatchEntries = 32; - maxOutstandingMemOps = 4; + maxOutstandingMemOps = params->maxOutstandingMemOps; numWaitingMemOps = 0; waitingInsts = 0; + switchedOut = false; + // IQ.setBE(this); LSQ.setBE(this); @@ -533,6 +547,7 @@ LWBackEnd::setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; LSQ.setCPU(cpu_ptr); + checker = cpu->checker; } template @@ -554,30 +569,35 @@ LWBackEnd::checkInterrupts() !cpu->inPalMode(thread->readPC()) && !trapSquash && !xcSquash) { - // Will need to squash all instructions currently in flight and have - // the interrupt handler restart at the last non-committed inst. - // Most of that can be handled through the trap() function. The - // processInterrupts() function really just checks for interrupts - // and then calls trap() if there is an interrupt present. + frontEnd->interruptPending = true; + if (robEmpty() && !LSQ.hasStoresToWB()) { + // Will need to squash all instructions currently in flight and have + // the interrupt handler restart at the last non-committed inst. + // Most of that can be handled through the trap() function. The + // processInterrupts() function really just checks for interrupts + // and then calls trap() if there is an interrupt present. - // Not sure which thread should be the one to interrupt. For now - // always do thread 0. - assert(!thread->inSyscall); - thread->inSyscall = true; + // Not sure which thread should be the one to interrupt. For now + // always do thread 0. + assert(!thread->inSyscall); + thread->inSyscall = true; - // CPU will handle implementation of the interrupt. - cpu->processInterrupts(); + // CPU will handle implementation of the interrupt. + cpu->processInterrupts(); - // Now squash or record that I need to squash this cycle. - commitStatus = TrapPending; + // Now squash or record that I need to squash this cycle. + commitStatus = TrapPending; - // Exit state update mode to avoid accidental updating. - thread->inSyscall = false; + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; - // Generate trap squash event. - generateTrapEvent(); + // Generate trap squash event. + generateTrapEvent(); - DPRINTF(BE, "Interrupt detected.\n"); + DPRINTF(BE, "Interrupt detected.\n"); + } else { + DPRINTF(BE, "Interrupt must wait for ROB to drain.\n"); + } } } @@ -585,7 +605,7 @@ template void LWBackEnd::handleFault(Fault &fault, Tick latency) { - DPRINTF(BE, "Handling fault!"); + DPRINTF(BE, "Handling fault!\n"); assert(!thread->inSyscall); @@ -615,6 +635,9 @@ LWBackEnd::tick() wbCycle = 0; + // Read in any done instruction information and update the IQ or LSQ. + updateStructures(); + #if FULL_SYSTEM checkInterrupts(); @@ -623,7 +646,7 @@ LWBackEnd::tick() squashFromTrap(); } else if (xcSquash) { squashFromXC(); - } else if (fetchHasFault && robEmpty() && frontEnd->isEmpty()) { + } else if (fetchHasFault && robEmpty() && frontEnd->isEmpty() && !LSQ.hasStoresToWB()) { DPRINTF(BE, "ROB and front end empty, handling fetch fault\n"); Fault fetch_fault = frontEnd->getFault(); if (fetch_fault == NoFault) { @@ -636,9 +659,6 @@ LWBackEnd::tick() } #endif - // Read in any done instruction information and update the IQ or LSQ. - updateStructures(); - if (dispatchStatus != Blocked) { dispatchInsts(); } else { @@ -719,12 +739,41 @@ LWBackEnd::dispatchInsts() for (int i = 0; i < inst->numDestRegs(); ++i) renameTable[inst->destRegIdx(i)] = inst; - if (inst->readyToIssue() && !inst->isNonSpeculative()) { - DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n", - inst->seqNum); - exeList.push(inst); + if (inst->isMemBarrier() || inst->isWriteBarrier()) { + if (memBarrier) { + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + } + memBarrier = inst; + inst->setCanCommit(); + } else if (inst->readyToIssue() && !inst->isNonSpeculative()) { if (inst->isMemRef()) { + LSQ.insert(inst); + if (memBarrier) { + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + addWaitingMemOp(inst); + + waitingList.push_front(inst); + inst->iqIt = waitingList.begin(); + inst->iqItValid = true; + waitingInsts++; + } else { + DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n", + inst->seqNum); + exeList.push(inst); + } + } else { + DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n", + inst->seqNum); + exeList.push(inst); } } else { if (inst->isNonSpeculative()) { @@ -735,6 +784,14 @@ LWBackEnd::dispatchInsts() if (inst->isMemRef()) { addWaitingMemOp(inst); LSQ.insert(inst); + if (memBarrier) { + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + } } DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to " @@ -872,9 +929,6 @@ LWBackEnd::executeInsts() ++funcExeInst; ++num_executed; - // keep an instruction count - thread->numInst++; - thread->numInsts++; exeList.pop(); @@ -915,7 +969,7 @@ LWBackEnd::instToCommit(DynInstPtr &inst) inst->setCanCommit(); if (inst->isExecuted()) { - inst->setCompleted(); + inst->setResultReady(); int dependents = wakeDependents(inst); if (dependents) { producer_inst[0]++; @@ -956,7 +1010,7 @@ LWBackEnd::writebackInsts() inst->seqNum, inst->readPC()); inst->setCanCommit(); - inst->setCompleted(); + inst->setResultReady(); if (inst->isExecuted()) { int dependents = wakeDependents(inst); @@ -997,7 +1051,9 @@ LWBackEnd::commitInst(int inst_num) // If the instruction is not executed yet, then it is a non-speculative // or store inst. Signal backwards that it should be executed. if (!inst->isExecuted()) { - if (inst->isNonSpeculative()) { + if (inst->isNonSpeculative() || + inst->isMemBarrier() || + inst->isWriteBarrier()) { #if !FULL_SYSTEM // Hack to make sure syscalls aren't executed until all stores // write back their data. This direct communication shouldn't @@ -1017,6 +1073,16 @@ LWBackEnd::commitInst(int inst_num) "instruction at the head of the ROB, PC %#x.\n", inst->readPC()); + if (inst->isMemBarrier() || inst->isWriteBarrier()) { + DPRINTF(BE, "Waking dependents on barrier [sn:%lli]\n", + inst->seqNum); + assert(memBarrier); + wakeDependents(inst, true); + if (memBarrier == inst) + memBarrier = NULL; + inst->clearMemDependents(); + } + // Send back the non-speculative instruction's sequence number. if (inst->iqItValid) { DPRINTF(BE, "Removing instruction from waiting list\n"); @@ -1066,13 +1132,45 @@ LWBackEnd::commitInst(int inst_num) // Not handled for now. assert(!inst->isThreadSync()); - + assert(inst->memDepReady()); + // Stores will mark themselves as totally completed as they need + // to wait to writeback to memory. @todo: Hack...attempt to fix + // having the checker be forced to wait until a store completes in + // order to check all of the instructions. If the store at the + // head of the check list misses, but a later store hits, then + // loads in the checker may see the younger store values instead + // of the store they should see. Either the checker needs its own + // memory (annoying to update), its own store buffer (how to tell + // which value is correct?), or something else... + if (!inst->isStore()) { + inst->setCompleted(); + } // Check if the instruction caused a fault. If so, trap. Fault inst_fault = inst->getFault(); + // Use checker prior to updating anything due to traps or PC + // based events. + if (checker) { + checker->tick(inst); + } + if (inst_fault != NoFault) { DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", inst->seqNum, inst->readPC()); + + // Instruction is completed as it has a fault. + inst->setCompleted(); + + if (LSQ.hasStoresToWB()) { + DPRINTF(BE, "Stores still in flight, will wait until drained.\n"); + return false; + } else if (inst_num != 0) { + DPRINTF(BE, "Will wait until instruction is head of commit group.\n"); + return false; + } else if (checker && inst->isStore()) { + checker->tick(inst); + } + thread->setInst( static_cast(inst->staticInst->machInst)); #if FULL_SYSTEM @@ -1094,6 +1192,8 @@ LWBackEnd::commitInst(int inst_num) } if (inst->traceData) { + inst->traceData->setFetchSeq(inst->seqNum); + inst->traceData->setCPSeq(thread->numInst); inst->traceData->finalize(); inst->traceData = NULL; } @@ -1105,18 +1205,18 @@ LWBackEnd::commitInst(int inst_num) instList.pop_back(); --numInsts; - thread->numInsts++; ++thread->funcExeInst; - // Maybe move this to where teh fault is handled; if the fault is handled, + // Maybe move this to where the fault is handled; if the fault is handled, // don't try to set this myself as the fault will set it. If not, then // I set thread->PC = thread->nextPC and thread->nextPC = thread->nextPC + 4. thread->setPC(thread->readNextPC()); + thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst)); updateComInstStats(inst); // Write the done sequence number here. // LSQ.commitLoads(inst->seqNum); -// LSQ.commitStores(inst->seqNum); toIEW->doneSeqNum = inst->seqNum; + lastCommitCycle = curTick; #if FULL_SYSTEM int count = 0; @@ -1243,6 +1343,22 @@ LWBackEnd::squash(const InstSeqNum &sn) waitingInsts--; } + while (memBarrier && memBarrier->seqNum > sn) { + DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously squashed)\n", memBarrier->seqNum); + memBarrier->clearMemDependents(); + if (memBarrier->memDepReady()) { + DPRINTF(BE, "No previous barrier\n"); + memBarrier = NULL; + } else { + std::list &srcs = memBarrier->getMemSrcs(); + memBarrier = srcs.front(); + srcs.pop_front(); + assert(srcs.empty()); + DPRINTF(BE, "Previous barrier: [sn:%lli]\n", + memBarrier->seqNum); + } + } + frontEnd->addFreeRegs(freed_regs); } @@ -1254,6 +1370,7 @@ LWBackEnd::squashFromXC() squash(squashed_inst); frontEnd->squash(squashed_inst, thread->readPC(), false, false); + frontEnd->interruptPending = false; thread->trapPending = false; thread->inSyscall = false; @@ -1269,6 +1386,7 @@ LWBackEnd::squashFromTrap() squash(squashed_inst); frontEnd->squash(squashed_inst, thread->readPC(), false, false); + frontEnd->interruptPending = false; thread->trapPending = false; thread->inSyscall = false; @@ -1319,6 +1437,36 @@ LWBackEnd::fetchFault(Fault &fault) fetchHasFault = true; } +template +void +LWBackEnd::switchOut() +{ + switchedOut = true; + // Need to get rid of all committed, non-speculative state and write it + // to memory/XC. In this case this is stores that have committed and not + // yet written back. + LSQ.switchOut(); + squash(0); +} + +template +void +LWBackEnd::takeOverFrom(ExecContext *old_xc) +{ + switchedOut = false; + xcSquash = false; + trapSquash = false; + + numInsts = 0; + numWaitingMemOps = 0; + waitingMemOps.clear(); + waitingInsts = 0; + switchedOut = false; + dispatchStatus = Running; + commitStatus = Running; + LSQ.takeOverFrom(old_xc); +} + template void LWBackEnd::updateExeInstStats(DynInstPtr &inst) @@ -1358,7 +1506,11 @@ template void LWBackEnd::updateComInstStats(DynInstPtr &inst) { - unsigned thread = inst->threadNumber; + unsigned tid = inst->threadNumber; + + // keep an instruction count + thread->numInst++; + thread->numInsts++; cpu->numInst++; // @@ -1366,33 +1518,33 @@ LWBackEnd::updateComInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) { - stat_com_swp[thread]++; + stat_com_swp[tid]++; } else { - stat_com_inst[thread]++; + stat_com_inst[tid]++; } #else - stat_com_inst[thread]++; + stat_com_inst[tid]++; #endif // // Control Instructions // if (inst->isControl()) - stat_com_branches[thread]++; + stat_com_branches[tid]++; // // Memory references // if (inst->isMemRef()) { - stat_com_refs[thread]++; + stat_com_refs[tid]++; if (inst->isLoad()) { - stat_com_loads[thread]++; + stat_com_loads[tid]++; } } if (inst->isMemBarrier()) { - stat_com_membars[thread]++; + stat_com_membars[tid]++; } } diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh index eb9886244..042610324 100644 --- a/cpu/ozone/lw_lsq.hh +++ b/cpu/ozone/lw_lsq.hh @@ -41,6 +41,7 @@ #include "cpu/inst_seq.hh" #include "mem/mem_interface.hh" //#include "mem/page_table.hh" +#include "sim/debug.hh" #include "sim/sim_object.hh" //class PageTable; @@ -90,7 +91,10 @@ class OzoneLWLSQ { /** The writeback event for the store. Needed for store * conditionals. */ + public: Event *wbEvent; + bool miss; + private: /** The pointer to the LSQ unit that issued the store. */ OzoneLWLSQ *lsqPtr; }; @@ -228,6 +232,14 @@ class OzoneLWLSQ { !storeQueue.back().completed && !dcacheInterface->isBlocked(); } + void switchOut(); + + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + + bool switchedOut; + private: /** Completes the store at the specified index. */ void completeStore(int store_idx); @@ -560,12 +572,10 @@ OzoneLWLSQ::read(MemReqPtr &req, T &data, int load_idx) sq_it++; } - // If there's no forwarding case, then go access memory DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n", inst->readPC()); - // Setup MemReq pointer req->cmd = Read; req->completionEvent = NULL; @@ -594,8 +604,12 @@ OzoneLWLSQ::read(MemReqPtr &req, T &data, int load_idx) DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x " "vaddr:%#x flags:%i\n", inst->readPC(), req->paddr, req->vaddr, req->flags); - - +/* + Addr debug_addr = ULL(0xfffffc0000be81a8); + if (req->vaddr == debug_addr) { + debug_break(); + } +*/ assert(!req->completionEvent); req->completionEvent = new typename BackEnd::LdWritebackEvent(inst, be); @@ -647,7 +661,15 @@ OzoneLWLSQ::write(MemReqPtr &req, T &data, int store_idx) (*sq_it).req = req; (*sq_it).size = sizeof(T); (*sq_it).data = data; - + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); +/* + Addr debug_addr = ULL(0xfffffc0000be81a8); + if (req->vaddr == debug_addr) { + debug_break(); + } +*/ // This function only writes the data to the store queue, so no fault // can happen here. return NoFault; diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh index 7b22d2564..9b7e48f96 100644 --- a/cpu/ozone/lw_lsq_impl.hh +++ b/cpu/ozone/lw_lsq_impl.hh @@ -29,6 +29,7 @@ #include "arch/isa_traits.hh" #include "base/str.hh" #include "cpu/ozone/lw_lsq.hh" +#include "cpu/checker/cpu.hh" template OzoneLWLSQ::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst, @@ -39,6 +40,7 @@ OzoneLWLSQ::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst, inst(_inst), be(_be), wbEvent(wb_event), + miss(false), lsqPtr(lsq_ptr) { this->setFlags(Event::AutoDelete); @@ -54,13 +56,21 @@ OzoneLWLSQ::StoreCompletionEvent::process() //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); // lsqPtr->cpu->wakeCPU(); + if (lsqPtr->isSwitchedOut()) { + if (wbEvent) + delete wbEvent; + + return; + } + if (wbEvent) { wbEvent->process(); delete wbEvent; } lsqPtr->completeStore(inst->sqIdx); - be->removeDcacheMiss(inst); + if (miss) + be->removeDcacheMiss(inst); } template @@ -80,8 +90,7 @@ OzoneLWLSQ::OzoneLWLSQ() template void OzoneLWLSQ::init(Params *params, unsigned maxLQEntries, - unsigned maxSQEntries, unsigned id) - + unsigned maxSQEntries, unsigned id) { DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id); @@ -90,7 +99,7 @@ OzoneLWLSQ::init(Params *params, unsigned maxLQEntries, LQEntries = maxLQEntries; SQEntries = maxSQEntries; - for (int i = 0; i < LQEntries * 10; i++) { + for (int i = 0; i < LQEntries * 2; i++) { LQIndices.push(i); SQIndices.push(i); } @@ -196,6 +205,7 @@ template void OzoneLWLSQ::insertLoad(DynInstPtr &load_inst) { + assert(loads < LQEntries * 2); assert(!LQIndices.empty()); int load_index = LQIndices.front(); LQIndices.pop(); @@ -503,21 +513,13 @@ OzoneLWLSQ::writebackStores() assert((*sq_it).req); assert(!(*sq_it).committed); - MemReqPtr req = (*sq_it).req; (*sq_it).committed = true; + MemReqPtr req = (*sq_it).req; + req->cmd = Write; req->completionEvent = NULL; req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); - - DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " - "to Addr:%#x, data:%#x [sn:%lli]\n", - inst->sqIdx,inst->readPC(), - req->paddr, *(req->data), - inst->seqNum); switch((*sq_it).size) { case 1: @@ -535,8 +537,25 @@ OzoneLWLSQ::writebackStores() default: panic("Unexpected store size!\n"); } + if (!(req->flags & LOCKED)) { + (*sq_it).inst->setCompleted(); + if (cpu->checker) { + cpu->checker->tick((*sq_it).inst); + } + } + + DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + inst->sqIdx,inst->readPC(), + req->paddr, *(req->data), + inst->seqNum); if (dcacheInterface) { + assert(!req->completionEvent); + StoreCompletionEvent *store_event = new + StoreCompletionEvent(inst, be, NULL, this); + req->completionEvent = store_event; + MemAccessResult result = dcacheInterface->access(req); if (isStalled() && @@ -551,13 +570,14 @@ OzoneLWLSQ::writebackStores() if (result != MA_HIT && dcacheInterface->doEvents()) { // Event *wb = NULL; - + store_event->miss = true; typename BackEnd::LdWritebackEvent *wb = NULL; if (req->flags & LOCKED) { // Stx_C does not generate a system port transaction. // req->result=1; wb = new typename BackEnd::LdWritebackEvent(inst, be); + store_event->wbEvent = wb; } DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); @@ -567,9 +587,6 @@ OzoneLWLSQ::writebackStores() // Will stores need their own kind of writeback events? // Do stores even need writeback events? - assert(!req->completionEvent); - req->completionEvent = new - StoreCompletionEvent(inst, be, wb, this); be->addDcacheMiss(inst); lastDcacheStall = curTick; @@ -597,10 +614,10 @@ OzoneLWLSQ::writebackStores() typename BackEnd::LdWritebackEvent *wb = new typename BackEnd::LdWritebackEvent(inst, be); - wb->schedule(curTick); + store_event->wbEvent = wb; } sq_it--; - completeStore(inst->sqIdx); +// completeStore(inst->sqIdx); } } else { panic("Must HAVE DCACHE!!!!!\n"); @@ -758,31 +775,121 @@ OzoneLWLSQ::completeStore(int store_idx) DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n", inst->sqIdx, inst->seqNum, storesToWB); - // A bit conservative because a store completion may not free up entries, - // but hopefully avoids two store completions in one cycle from making - // the CPU tick twice. -// cpu->activityThisCycle(); assert(!storeQueue.empty()); SQItHash.erase(sq_hash_it); SQIndices.push(inst->sqIdx); storeQueue.erase(sq_it); --stores; -/* - SQIt oldest_store_it = --(storeQueue.end()); - if (sq_it == oldest_store_it) { - do { - inst = (*oldest_store_it).inst; - sq_hash_it = SQItHash.find(inst->sqIdx); - assert(sq_hash_it != SQItHash.end()); - SQItHash.erase(sq_hash_it); - SQIndices.push(inst->sqIdx); - storeQueue.erase(oldest_store_it--); - - --stores; - } while ((*oldest_store_it).completed && - oldest_store_it != storeQueue.end()); - -// be->updateLSQNextCycle = true; +// assert(!inst->isCompleted()); + inst->setCompleted(); + if (cpu->checker) { + cpu->checker->tick(inst); } -*/ +} + +template +void +OzoneLWLSQ::switchOut() +{ + switchedOut = true; + SQIt sq_it = --(storeQueue.end()); + while (storesToWB > 0 && + sq_it != storeQueue.end() && + (*sq_it).inst && + (*sq_it).canWB) { + + DynInstPtr inst = (*sq_it).inst; + + if ((*sq_it).size == 0 && !(*sq_it).completed) { + sq_it--; +// completeStore(inst->sqIdx); + + continue; + } + + // Store conditionals don't complete until *after* they have written + // back. If it's here and not yet sent to memory, then don't bother + // as it's not part of committed state. + if (inst->isDataPrefetch() || (*sq_it).committed || + (*sq_it).req->flags & LOCKED) { + sq_it--; + continue; + } + + assert((*sq_it).req); + assert(!(*sq_it).committed); + + MemReqPtr req = (*sq_it).req; + (*sq_it).committed = true; + + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); + + DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x directly to memory [sn:%lli]\n", + inst->sqIdx,inst->readPC(), + req->paddr, *(req->data), + inst->seqNum); + + switch((*sq_it).size) { + case 1: + cpu->write(req, (uint8_t &)(*sq_it).data); + break; + case 2: + cpu->write(req, (uint16_t &)(*sq_it).data); + break; + case 4: + cpu->write(req, (uint32_t &)(*sq_it).data); + break; + case 8: + cpu->write(req, (uint64_t &)(*sq_it).data); + break; + default: + panic("Unexpected store size!\n"); + } + } + + // Clear the queue to free up resources + storeQueue.clear(); + loadQueue.clear(); + loads = stores = storesToWB = 0; +} + +template +void +OzoneLWLSQ::takeOverFrom(ExecContext *old_xc) +{ + // Clear out any old state. May be redundant if this is the first time + // the CPU is being used. + stalled = false; + isLoadBlocked = false; + loadBlockedHandled = false; + switchedOut = false; + + // Could do simple checks here to see if indices are on twice + while (!LQIndices.empty()) + LQIndices.pop(); + while (!SQIndices.empty()) + SQIndices.pop(); + + for (int i = 0; i < LQEntries * 2; i++) { + LQIndices.push(i); + SQIndices.push(i); + } + + // May want to initialize these entries to NULL + +// loadHead = loadTail = 0; + +// storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + + loadFaultInst = storeFaultInst = memDepViolator = NULL; + + blockedLoadSeqNum = 0; } diff --git a/cpu/ozone/simple_params.hh b/cpu/ozone/simple_params.hh index e503654aa..647da1781 100644 --- a/cpu/ozone/simple_params.hh +++ b/cpu/ozone/simple_params.hh @@ -51,6 +51,7 @@ class SimpleParams : public BaseCPU::Params unsigned backEndLatency; unsigned maxInstBufferSize; unsigned numPhysicalRegs; + unsigned maxOutstandingMemOps; // // Fetch // diff --git a/python/m5/objects/OzoneCPU.py b/python/m5/objects/OzoneCPU.py index 8186a44bb..3fca61e28 100644 --- a/python/m5/objects/OzoneCPU.py +++ b/python/m5/objects/OzoneCPU.py @@ -9,12 +9,15 @@ class DerivOzoneCPU(BaseCPU): if not build_env['FULL_SYSTEM']: mem = Param.FunctionalMemory(NULL, "memory") + checker = Param.BaseCPU("Checker CPU") + width = Param.Unsigned("Width") frontEndWidth = Param.Unsigned("Front end width") backEndWidth = Param.Unsigned("Back end width") backEndSquashLatency = Param.Unsigned("Back end squash latency") backEndLatency = Param.Unsigned("Back end latency") maxInstBufferSize = Param.Unsigned("Maximum instruction buffer size") + maxOutstandingMemOps = Param.Unsigned("Maximum number of outstanding memory operations") decodeToFetchDelay = Param.Unsigned("Decode to fetch delay") renameToFetchDelay = Param.Unsigned("Rename to fetch delay") iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch "