diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh index 18978142d..cd754dc3c 100644 --- a/cpu/base_dyn_inst.hh +++ b/cpu/base_dyn_inst.hh @@ -117,11 +117,6 @@ class BaseDynInst : public FastAlloc, public RefCounted Fault write(T data, Addr addr, unsigned flags, uint64_t *res); - // @todo: Probably should not have this function in the DynInst. - template - bool snoop(MemReqPtr &req, T &data) - { return cpu->snoop(req, data); } - void prefetch(Addr addr, unsigned flags); void writeHint(Addr addr, int size, unsigned flags); Fault copySrcTranslate(Addr src); diff --git a/cpu/ozone/back_end_impl.hh b/cpu/ozone/back_end_impl.hh index 0b0f04f59..36770d65c 100644 --- a/cpu/ozone/back_end_impl.hh +++ b/cpu/ozone/back_end_impl.hh @@ -1385,7 +1385,7 @@ BackEnd::writebackInsts() inst->seqNum, inst->readPC()); inst->setCanCommit(); - inst->setCompleted(); + inst->setResultReady(); if (inst->isExecuted()) { int dependents = IQ.wakeDependents(inst); diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh index 56b6571a2..eec8902d8 100644 --- a/cpu/ozone/cpu.hh +++ b/cpu/ozone/cpu.hh @@ -53,6 +53,7 @@ class AlphaDTB; class PhysicalMemory; class MemoryController; +class Sampler; class RemoteGDB; class GDBListener; @@ -69,6 +70,9 @@ namespace Trace { class InstRecord; } +template +class Checker; + /** * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with * simple out-of-order capabilities added to it. It is still a 1 CPI machine @@ -226,7 +230,9 @@ class OzoneCPU : public BaseCPU }; // execution context proxy - OzoneXC xcProxy; + OzoneXC ozoneXC; + ExecContext *xcProxy; + ExecContext *checkerXC; typedef OzoneThreadState ImplState; @@ -245,6 +251,7 @@ class OzoneCPU : public BaseCPU void tick(); std::set snList; + std::set lockAddrList; private: struct TickEvent : public Event { @@ -262,9 +269,9 @@ class OzoneCPU : public BaseCPU void scheduleTickEvent(int delay) { if (tickEvent.squashed()) - tickEvent.reschedule(curTick + delay); + tickEvent.reschedule(curTick + cycles(delay)); else if (!tickEvent.scheduled()) - tickEvent.schedule(curTick + delay); + tickEvent.schedule(curTick + cycles(delay)); } /// Unschedule tick event, regardless of its current state. @@ -322,7 +329,7 @@ class OzoneCPU : public BaseCPU int cpuId; - void switchOut(); + void switchOut(Sampler *sampler); void takeOverFrom(BaseCPU *oldCPU); #if FULL_SYSTEM @@ -472,6 +479,7 @@ class OzoneCPU : public BaseCPU Fault error; if (req->flags & LOCKED) { // lockAddr = req->paddr; + lockAddrList.insert(req->paddr); lockFlag = true; } @@ -546,7 +554,13 @@ class OzoneCPU : public BaseCPU req->result = 2; } else { if (this->lockFlag/* && this->lockAddr == req->paddr*/) { - req->result = 1; + if (lockAddrList.find(req->paddr) != + lockAddrList.end()) { + req->result = 1; + } else { + req->result = 0; + return NoFault; + } } else { req->result = 0; return NoFault; @@ -599,7 +613,7 @@ class OzoneCPU : public BaseCPU void setSyscallReturn(SyscallReturn return_value, int tid); #endif - ExecContext *xcBase() { return &xcProxy; } + ExecContext *xcBase() { return xcProxy; } bool decoupledFrontEnd; struct CommStruct { @@ -615,6 +629,8 @@ class OzoneCPU : public BaseCPU bool lockFlag; Stats::Scalar<> quiesceCycles; + + Checker *checker; }; #endif // __CPU_OZONE_CPU_HH__ diff --git a/cpu/ozone/cpu_builder.cc b/cpu/ozone/cpu_builder.cc index 0146dd1bd..64aa49c71 100644 --- a/cpu/ozone/cpu_builder.cc +++ b/cpu/ozone/cpu_builder.cc @@ -1,6 +1,7 @@ #include +#include "cpu/checker/cpu.hh" #include "cpu/inst_seq.hh" #include "cpu/ozone/cpu.hh" #include "cpu/ozone/ozone_impl.hh" @@ -50,6 +51,8 @@ SimObjectVectorParam workload; SimObjectParam mem; +SimObjectParam checker; + Param max_insts_any_thread; Param max_insts_all_threads; Param max_loads_any_thread; @@ -66,6 +69,7 @@ Param backEndSquashLatency; Param backEndLatency; Param maxInstBufferSize; Param numPhysicalRegs; +Param maxOutstandingMemOps; Param decodeToFetchDelay; Param renameToFetchDelay; @@ -164,6 +168,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) INIT_PARAM_DFLT(mem, "Memory", NULL), + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + INIT_PARAM_DFLT(max_insts_any_thread, "Terminate when any thread reaches this inst count", 0), @@ -190,6 +196,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), INIT_PARAM(numPhysicalRegs, "Number of physical registers"), + INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4), INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), @@ -314,7 +321,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) #endif // FULL_SYSTEM params->mem = mem; - + params->checker = checker; params->max_insts_any_thread = max_insts_any_thread; params->max_insts_all_threads = max_insts_all_threads; params->max_loads_any_thread = max_loads_any_thread; @@ -334,6 +341,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) params->backEndLatency = backEndLatency; params->maxInstBufferSize = maxInstBufferSize; params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; + params->maxOutstandingMemOps = maxOutstandingMemOps; params->decodeToFetchDelay = decodeToFetchDelay; params->renameToFetchDelay = renameToFetchDelay; @@ -445,6 +453,8 @@ SimObjectVectorParam workload; SimObjectParam mem; +SimObjectParam checker; + Param max_insts_any_thread; Param max_insts_all_threads; Param max_loads_any_thread; @@ -559,6 +569,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) INIT_PARAM_DFLT(mem, "Memory", NULL), + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + INIT_PARAM_DFLT(max_insts_any_thread, "Terminate when any thread reaches this inst count", 0), @@ -709,7 +721,7 @@ CREATE_SIM_OBJECT(SimpleOzoneCPU) #endif // FULL_SYSTEM params->mem = mem; - + params->checker = checker; params->max_insts_any_thread = max_insts_any_thread; params->max_insts_all_threads = max_insts_all_threads; params->max_loads_any_thread = max_loads_any_thread; diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh index 17d944e7c..4f3fdf521 100644 --- a/cpu/ozone/cpu_impl.hh +++ b/cpu/ozone/cpu_impl.hh @@ -33,6 +33,7 @@ #include "base/trace.hh" #include "config/full_system.hh" #include "cpu/base.hh" +#include "cpu/checker/exec_context.hh" #include "cpu/exec_context.hh" #include "cpu/exetrace.hh" #include "cpu/ozone/cpu.hh" @@ -156,17 +157,33 @@ OzoneCPU::OzoneCPU(Params *p) #endif comm(5, 5) { - + if (p->checker) { + BaseCPU *temp_checker = p->checker; + checker = dynamic_cast *>(temp_checker); + } else { + checker = NULL; + } frontEnd = new FrontEnd(p); backEnd = new BackEnd(p); _status = Idle; - thread.xcProxy = &xcProxy; + if (checker) { + checker->setMemory(mem); +#if FULL_SYSTEM + checker->setSystem(p->system); +#endif + checkerXC = new CheckerExecContext(&ozoneXC, checker); + thread.xcProxy = checkerXC; + xcProxy = checkerXC; + } else { + thread.xcProxy = &ozoneXC; + xcProxy = &ozoneXC; + } thread.inSyscall = false; - xcProxy.cpu = this; - xcProxy.thread = &thread; + ozoneXC.cpu = this; + ozoneXC.thread = &thread; thread.setStatus(ExecContext::Suspended); #if FULL_SYSTEM @@ -177,7 +194,7 @@ OzoneCPU::OzoneCPU(Params *p) thread.tid = 0; thread.mem = p->mem; - thread.quiesceEvent = new EndQuiesceEvent(&xcProxy); + thread.quiesceEvent = new EndQuiesceEvent(xcProxy); system = p->system; itb = p->itb; @@ -187,9 +204,10 @@ OzoneCPU::OzoneCPU(Params *p) if (p->profile) { thread.profile = new FunctionProfile(p->system->kernelSymtab); + // @todo: This might be better as an ExecContext instead of OzoneXC Callback *cb = new MakeCallback(&xcProxy); + &OzoneXC::dumpFuncProfile>(&ozoneXC); registerExitCallback(cb); } @@ -198,7 +216,6 @@ OzoneCPU::OzoneCPU(Params *p) static ProfileNode dummyNode; thread.profileNode = &dummyNode; thread.profilePC = 3; - #else // xc = new ExecContext(this, /* thread_num */ 0, p->workload[0], /* asid */ 0); thread.cpu = this; @@ -225,13 +242,13 @@ OzoneCPU::OzoneCPU(Params *p) issueWidth = p->issueWidth; */ - execContexts.push_back(&xcProxy); + execContexts.push_back(xcProxy); frontEnd->setCPU(this); backEnd->setCPU(this); - frontEnd->setXC(&xcProxy); - backEnd->setXC(&xcProxy); + frontEnd->setXC(xcProxy); + backEnd->setXC(xcProxy); frontEnd->setThreadState(&thread); backEnd->setThreadState(&thread); @@ -250,7 +267,7 @@ OzoneCPU::OzoneCPU(Params *p) for (int i = 0; i < TheISA::TotalNumRegs; ++i) { thread.renameTable[i] = new DynInst(this); - thread.renameTable[i]->setCompleted(); + thread.renameTable[i]->setResultReady(); } frontEnd->renameTable.copyFrom(thread.renameTable); @@ -312,11 +329,15 @@ OzoneCPU::copyToXC() */ template void -OzoneCPU::switchOut() +OzoneCPU::switchOut(Sampler *sampler) { + // Front end needs state from back end, so switch out the back end first. + backEnd->switchOut(); + frontEnd->switchOut(); _status = SwitchedOut; if (tickEvent.scheduled()) tickEvent.squash(); + sampler->signalSwitched(); } template @@ -325,8 +346,16 @@ OzoneCPU::takeOverFrom(BaseCPU *oldCPU) { BaseCPU::takeOverFrom(oldCPU); + backEnd->takeOverFrom(); + frontEnd->takeOverFrom(); assert(!tickEvent.scheduled()); + // @todo: Fix hardcoded number + // Clear out any old information in time buffer. + for (int i = 0; i < 6; ++i) { + comm.advance(); + } + // if any of this CPU's ExecContexts are active, mark the CPU as // running and schedule its tick event. for (int i = 0; i < execContexts.size(); ++i) { @@ -470,7 +499,7 @@ OzoneCPU::serialize(std::ostream &os) BaseCPU::serialize(os); SERIALIZE_ENUM(_status); nameOut(os, csprintf("%s.xc", name())); - xcProxy.serialize(os); + ozoneXC.serialize(os); nameOut(os, csprintf("%s.tickEvent", name())); tickEvent.serialize(os); } @@ -481,7 +510,7 @@ OzoneCPU::unserialize(Checkpoint *cp, const std::string §ion) { BaseCPU::unserialize(cp, section); UNSERIALIZE_ENUM(_status); - xcProxy.unserialize(cp, csprintf("%s.xc", section)); + ozoneXC.unserialize(cp, csprintf("%s.xc", section)); tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); } @@ -579,7 +608,7 @@ template Addr OzoneCPU::dbg_vtophys(Addr addr) { - return vtophys(&xcProxy, addr); + return vtophys(xcProxy, addr); } #endif // FULL_SYSTEM /* @@ -725,7 +754,7 @@ OzoneCPU::tick() comInstEventQueue[0]->serviceEvents(numInst); if (!tickEvent.scheduled() && _status == Running) - tickEvent.schedule(curTick + 1); + tickEvent.schedule(curTick + cycles(1)); } template @@ -750,7 +779,7 @@ OzoneCPU::syscall() DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst); - thread.process->syscall(&xcProxy); + thread.process->syscall(xcProxy); thread.funcExeInst--; @@ -784,19 +813,17 @@ OzoneCPU::hwrei() { // Need to move this to ISA code // May also need to make this per thread +/* if (!inPalMode()) return new UnimplementedOpcodeFault; thread.setNextPC(thread.readMiscReg(AlphaISA::IPR_EXC_ADDR)); - +*/ lockFlag = false; + lockAddrList.clear(); + kernelStats->hwrei(); - // Not sure how to make a similar check in the Ozone model -// if (!misspeculating()) { - kernelStats->hwrei(); - - checkInterrupts = true; -// } + checkInterrupts = true; // FIXME: XXX check for interrupts? XXX return NoFault; @@ -847,6 +874,11 @@ OzoneCPU::processInterrupts() if (ipl && ipl > thread.readMiscReg(IPR_IPLR)) { thread.setMiscReg(IPR_ISR, summary); thread.setMiscReg(IPR_INTID, ipl); + // @todo: Make this more transparent + if (checker) { + checkerXC->setMiscReg(IPR_ISR, summary); + checkerXC->setMiscReg(IPR_INTID, ipl); + } Fault fault = new InterruptFault; fault->invoke(thread.getXCProxy()); DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", @@ -860,7 +892,7 @@ OzoneCPU::simPalCheck(int palFunc) { // Need to move this to ISA code // May also need to make this per thread - this->kernelStats->callpal(palFunc, &xcProxy); + this->kernelStats->callpal(palFunc, xcProxy); switch (palFunc) { case PAL::halt: @@ -944,7 +976,28 @@ OzoneCPU::OzoneXC::dumpFuncProfile() template void OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) -{ } +{ + // some things should already be set up + assert(getMemPtr() == old_context->getMemPtr()); +#if FULL_SYSTEM + assert(getSystemPtr() == old_context->getSystemPtr()); +#else + assert(getProcessPtr() == old_context->getProcessPtr()); +#endif + + // copy over functional state + setStatus(old_context->status()); + copyArchRegs(old_context); + setCpuId(old_context->readCpuId()); +#if !FULL_SYSTEM + setFuncExeInst(old_context->readFuncExeInst()); +#endif + +// storeCondFailures = 0; + cpu->lockFlag = false; + + old_context->setStatus(ExecContext::Unallocated); +} template void @@ -1062,21 +1115,24 @@ template float OzoneCPU::OzoneXC::readFloatRegSingle(int reg_idx) { - return thread->renameTable[reg_idx]->readFloatResult(); + int idx = reg_idx + TheISA::FP_Base_DepTag; + return thread->renameTable[idx]->readFloatResult(); } template double OzoneCPU::OzoneXC::readFloatRegDouble(int reg_idx) { - return thread->renameTable[reg_idx]->readDoubleResult(); + int idx = reg_idx + TheISA::FP_Base_DepTag; + return thread->renameTable[idx]->readDoubleResult(); } template uint64_t OzoneCPU::OzoneXC::readFloatRegInt(int reg_idx) { - return thread->renameTable[reg_idx]->readIntResult(); + int idx = reg_idx + TheISA::FP_Base_DepTag; + return thread->renameTable[idx]->readIntResult(); } template @@ -1101,7 +1157,9 @@ template void OzoneCPU::OzoneXC::setFloatRegDouble(int reg_idx, double val) { - thread->renameTable[reg_idx]->setDoubleResult(val); + int idx = reg_idx + TheISA::FP_Base_DepTag; + + thread->renameTable[idx]->setDoubleResult(val); if (!thread->inSyscall) { cpu->squashFromXC(); diff --git a/cpu/ozone/dyn_inst.hh b/cpu/ozone/dyn_inst.hh index 4382af0fd..f251c28ea 100644 --- a/cpu/ozone/dyn_inst.hh +++ b/cpu/ozone/dyn_inst.hh @@ -59,9 +59,9 @@ class OzoneDynInst : public BaseDynInst typedef TheISA::MiscReg MiscReg; typedef typename std::list::iterator ListIt; - // Note that this is duplicated from the BaseDynInst class; I'm simply not - // sure the enum would carry through so I could use it in array - // declarations in this class. + // Note that this is duplicated from the BaseDynInst class; I'm + // simply not sure the enum would carry through so I could use it + // in array declarations in this class. enum { MaxInstSrcRegs = TheISA::MaxInstSrcRegs, MaxInstDestRegs = TheISA::MaxInstDestRegs @@ -90,9 +90,23 @@ class OzoneDynInst : public BaseDynInst void addDependent(DynInstPtr &dependent_inst); std::vector &getDependents() { return dependents; } + std::vector &getMemDeps() { return memDependents; } + std::list &getMemSrcs() { return srcMemInsts; } void wakeDependents(); + void wakeMemDependents(); + + void addMemDependent(DynInstPtr &inst) { memDependents.push_back(inst); } + + void addSrcMemInst(DynInstPtr &inst) { srcMemInsts.push_back(inst); } + + void markMemInstReady(OzoneDynInst *inst); + + // For now I will remove instructions from the list when they wake + // up. In the future, you only really need a counter. + bool memDepReady() { return srcMemInsts.empty(); } + // void setBPredInfo(const BPredInfo &bp_info) { bpInfo = bp_info; } // BPredInfo &getBPredInfo() { return bpInfo; } @@ -104,9 +118,13 @@ class OzoneDynInst : public BaseDynInst std::vector dependents; - /** The instruction that produces the value of the source registers. These - * may be NULL if the value has already been read from the source - * instruction. + std::vector memDependents; + + std::list srcMemInsts; + + /** The instruction that produces the value of the source + * registers. These may be NULL if the value has already been + * read from the source instruction. */ DynInstPtr srcInsts[MaxInstSrcRegs]; @@ -165,22 +183,22 @@ class OzoneDynInst : public BaseDynInst */ void setIntReg(const StaticInst *si, int idx, uint64_t val) { - this->instResult.integer = val; + BaseDynInst::setIntReg(si, idx, val); } void setFloatRegSingle(const StaticInst *si, int idx, float val) { - this->instResult.fp = val; + BaseDynInst::setFloatRegSingle(si, idx, val); } void setFloatRegDouble(const StaticInst *si, int idx, double val) { - this->instResult.dbl = val; + BaseDynInst::setFloatRegDouble(si, idx, val); } void setFloatRegInt(const StaticInst *si, int idx, uint64_t val) { - this->instResult.integer = val; + BaseDynInst::setFloatRegInt(si, idx, val); } void setIntResult(uint64_t result) { this->instResult.integer = result; } @@ -199,6 +217,8 @@ class OzoneDynInst : public BaseDynInst void clearDependents(); + void clearMemDependents(); + public: // ISA stuff MiscReg readMiscReg(int misc_reg); diff --git a/cpu/ozone/dyn_inst_impl.hh b/cpu/ozone/dyn_inst_impl.hh index c83481c9a..a7e4460a1 100644 --- a/cpu/ozone/dyn_inst_impl.hh +++ b/cpu/ozone/dyn_inst_impl.hh @@ -38,7 +38,7 @@ template OzoneDynInst::OzoneDynInst(FullCPU *cpu) : BaseDynInst(0, 0, 0, 0, cpu) { - this->setCompleted(); + this->setResultReady(); initInstPtrs(); } @@ -130,7 +130,7 @@ template bool OzoneDynInst::srcInstReady(int regIdx) { - return srcInsts[regIdx]->isCompleted(); + return srcInsts[regIdx]->isResultReady(); } template @@ -149,6 +149,28 @@ OzoneDynInst::wakeDependents() } } +template +void +OzoneDynInst::wakeMemDependents() +{ + for (int i = 0; i < memDependents.size(); ++i) { + memDependents[i]->markMemInstReady(this); + } +} + +template +void +OzoneDynInst::markMemInstReady(OzoneDynInst *inst) +{ + ListIt mem_it = srcMemInsts.begin(); + while ((*mem_it) != inst && mem_it != srcMemInsts.end()) { + mem_it++; + } + assert(mem_it != srcMemInsts.end()); + + srcMemInsts.erase(mem_it); +} + template void OzoneDynInst::initInstPtrs() @@ -164,7 +186,7 @@ bool OzoneDynInst::srcsReady() { for (int i = 0; i < this->numSrcRegs(); ++i) { - if (!srcInsts[i]->isCompleted()) + if (!srcInsts[i]->isResultReady()) return false; } @@ -176,7 +198,7 @@ bool OzoneDynInst::eaSrcsReady() { for (int i = 1; i < this->numSrcRegs(); ++i) { - if (!srcInsts[i]->isCompleted()) + if (!srcInsts[i]->isResultReady()) return false; } @@ -195,6 +217,14 @@ OzoneDynInst::clearDependents() prevDestInst[i] = NULL; } } + +template +void +OzoneDynInst::clearMemDependents() +{ + memDependents.clear(); +} + template MiscReg OzoneDynInst::readMiscReg(int misc_reg) @@ -213,6 +243,7 @@ template Fault OzoneDynInst::setMiscReg(int misc_reg, const MiscReg &val) { + this->setIntResult(val); return this->thread->setMiscReg(misc_reg, val); } @@ -234,11 +265,13 @@ OzoneDynInst::hwrei() this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR)); + this->cpu->hwrei(); +/* this->cpu->kernelStats->hwrei(); this->cpu->checkInterrupts = true; this->cpu->lockFlag = false; - +*/ // FIXME: XXX check for interrupts? XXX return NoFault; } diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh index 2bff2544d..188925ae5 100644 --- a/cpu/ozone/front_end.hh +++ b/cpu/ozone/front_end.hh @@ -66,6 +66,14 @@ class FrontEnd bool isEmpty() { return instBuffer.empty(); } + void switchOut(); + + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + + bool switchedOut; + private: bool updateStatus(); @@ -198,6 +206,9 @@ class FrontEnd DynInstPtr barrierInst; + public: + bool interruptPending; + private: // number of idle cycles /* Stats::Average<> notIdleFraction; @@ -223,6 +234,8 @@ class FrontEnd Stats::Scalar<> fetchBlockedCycles; /** Stat for total number of fetched cache lines. */ Stats::Scalar<> fetchedCacheLines; + + Stats::Scalar<> fetchIcacheSquashes; /** Distribution of number of instructions fetched each cycle. */ Stats::Distribution<> fetchNisnDist; // Stats::Vector<> qfull_iq_occupancy; diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh index 7c18386cf..a3eb809d0 100644 --- a/cpu/ozone/front_end_impl.hh +++ b/cpu/ozone/front_end_impl.hh @@ -19,8 +19,11 @@ FrontEnd::FrontEnd(Params *params) width(params->frontEndWidth), freeRegs(params->numPhysicalRegs), numPhysRegs(params->numPhysicalRegs), - serializeNext(false) + serializeNext(false), + interruptPending(false) { + switchedOut = false; + status = Idle; // Setup branch predictor. @@ -127,6 +130,11 @@ FrontEnd::regStats() .desc("Number of cache lines fetched") .prereq(fetchedCacheLines); + fetchIcacheSquashes + .name(name() + ".fetchIcacheSquashes") + .desc("Number of outstanding Icache misses that were squashed") + .prereq(fetchIcacheSquashes); + fetchNisnDist .init(/* base value */ 0, /* last value */ width, @@ -370,6 +378,10 @@ FrontEnd::fetchCacheLine() #endif // FULL_SYSTEM Fault fault = NoFault; + if (interruptPending && flags == 0) { + return fault; + } + // Align the fetch PC so it's at the start of a cache block. Addr fetch_PC = icacheBlockAlignPC(PC); @@ -397,7 +409,8 @@ FrontEnd::fetchCacheLine() // exists within the cache. if (icacheInterface && fault == NoFault) { #if FULL_SYSTEM - if (cpu->system->memctrl->badaddr(memReq->paddr)) { + if (cpu->system->memctrl->badaddr(memReq->paddr) || + memReq->flags & UNCACHEABLE) { DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a " "misspeculating path!", memReq->paddr); @@ -497,7 +510,7 @@ FrontEnd::processBarriers(DynInstPtr &inst) dispatchedTempSerializing++; } - // Change status over to BarrierStall so that other stages know + // Change status over to SerializeBlocked so that other stages know // what this is blocked on. status = SerializeBlocked; @@ -613,8 +626,10 @@ FrontEnd::processCacheCompletion(MemReqPtr &req) // Do something here. if (status != IcacheMissStall || - req != memReq) { + req != memReq || + switchedOut) { DPRINTF(FE, "Previous fetch was squashed.\n"); + fetchIcacheSquashes++; return; } @@ -702,6 +717,7 @@ FrontEnd::getInstFromCacheline() DynInstPtr inst = barrierInst; status = Running; barrierInst = NULL; + inst->clearSerializeBefore(); return inst; } @@ -773,7 +789,7 @@ FrontEnd::renameInst(DynInstPtr &inst) DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n", inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum); - if (src_inst->isCompleted()) { + if (src_inst->isResultReady()) { DPRINTF(FE, "Reg ready.\n"); inst->markSrcRegReady(i); } else { @@ -807,6 +823,38 @@ FrontEnd::wakeFromQuiesce() status = Running; } +template +void +FrontEnd::switchOut() +{ + switchedOut = true; + memReq = NULL; + squash(0, 0); + instBuffer.clear(); + instBufferSize = 0; + status = Idle; +} + +template +void +FrontEnd::takeOverFrom(ExecContext *old_xc) +{ + assert(freeRegs == numPhysRegs); + fetchCacheLineNextCycle = true; + + cacheBlkValid = false; + +#if !FULL_SYSTEM +// pTable = params->pTable; +#endif + fetchFault = NoFault; + serializeNext = false; + barrierInst = NULL; + status = Running; + switchedOut = false; + interruptPending = false; +} + template void FrontEnd::dumpInsts() diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh index f17c93ff4..028fdaf8c 100644 --- a/cpu/ozone/lw_back_end.hh +++ b/cpu/ozone/lw_back_end.hh @@ -17,6 +17,8 @@ #include "mem/mem_req.hh" #include "sim/eventq.hh" +template +class Checker; class ExecContext; template @@ -126,6 +128,8 @@ class LWBackEnd Addr commitPC; + Tick lastCommitCycle; + bool robEmpty() { return instList.empty(); } bool isFull() { return numInsts >= numROBEntries; } @@ -133,7 +137,7 @@ class LWBackEnd void fetchFault(Fault &fault); - int wakeDependents(DynInstPtr &inst); + int wakeDependents(DynInstPtr &inst, bool memory_deps = false); /** Tells memory dependence unit that a memory instruction needs to be * rescheduled. It will re-execute once replayMemInst() is called. @@ -182,6 +186,12 @@ class LWBackEnd void instToCommit(DynInstPtr &inst); + void switchOut(); + + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + private: void generateTrapEvent(Tick latency = 0); void handleFault(Fault &fault, Tick latency = 0); @@ -303,6 +313,10 @@ class LWBackEnd Fault faultFromFetch; bool fetchHasFault; + bool switchedOut; + + DynInstPtr memBarrier; + private: struct pqCompare { bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const @@ -327,7 +341,7 @@ class LWBackEnd bool exactFullStall; - bool fetchRedirect[Impl::MaxThreads]; +// bool fetchRedirect[Impl::MaxThreads]; // number of cycles stalled for D-cache misses /* Stats::Scalar<> dcacheStallCycles; @@ -414,6 +428,8 @@ class LWBackEnd Stats::VectorDistribution<> ROB_occ_dist; public: void dumpInsts(); + + Checker *checker; }; template diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh index d1290239c..d4829629d 100644 --- a/cpu/ozone/lw_back_end_impl.hh +++ b/cpu/ozone/lw_back_end_impl.hh @@ -1,5 +1,6 @@ #include "encumbered/cpu/full/op_class.hh" +#include "cpu/checker/cpu.hh" #include "cpu/ozone/lw_back_end.hh" template @@ -10,28 +11,36 @@ LWBackEnd::generateTrapEvent(Tick latency) TrapEvent *trap = new TrapEvent(this); - trap->schedule(curTick + latency); + trap->schedule(curTick + cpu->cycles(latency)); thread->trapPending = true; } template int -LWBackEnd::wakeDependents(DynInstPtr &inst) +LWBackEnd::wakeDependents(DynInstPtr &inst, bool memory_deps) { assert(!inst->isSquashed()); - std::vector &dependents = inst->getDependents(); + std::vector &dependents = memory_deps ? inst->getMemDeps() : + inst->getDependents(); int num_outputs = dependents.size(); DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum); for (int i = 0; i < num_outputs; i++) { DynInstPtr dep_inst = dependents[i]; - dep_inst->markSrcRegReady(); + if (!memory_deps) { + dep_inst->markSrcRegReady(); + } else { + if (!dep_inst->isSquashed()) + dep_inst->markMemInstReady(inst.get()); + } + DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum); if (dep_inst->readyToIssue() && dep_inst->isInROB() && - !dep_inst->isNonSpeculative()) { + !dep_inst->isNonSpeculative() && + dep_inst->memDepReady() && !dep_inst->isMemBarrier() && !dep_inst->isWriteBarrier()) { DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n", dep_inst->seqNum); exeList.push(dep_inst); @@ -114,6 +123,9 @@ LWBackEnd::LdWritebackEvent::process() // iewStage->wakeCPU(); + if (be->isSwitchedOut()) + return; + if (dcacheMiss) { be->removeDcacheMiss(inst); } @@ -169,16 +181,18 @@ LWBackEnd::DCacheCompletionEvent::description() template LWBackEnd::LWBackEnd(Params *params) : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), - xcSquash(false), cacheCompletionEvent(this), + trapSquash(false), xcSquash(false), cacheCompletionEvent(this), dcacheInterface(params->dcacheInterface), width(params->backEndWidth), exactFullStall(true) { numROBEntries = params->numROBEntries; numInsts = 0; numDispatchEntries = 32; - maxOutstandingMemOps = 4; + maxOutstandingMemOps = params->maxOutstandingMemOps; numWaitingMemOps = 0; waitingInsts = 0; + switchedOut = false; + // IQ.setBE(this); LSQ.setBE(this); @@ -533,6 +547,7 @@ LWBackEnd::setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; LSQ.setCPU(cpu_ptr); + checker = cpu->checker; } template @@ -554,30 +569,35 @@ LWBackEnd::checkInterrupts() !cpu->inPalMode(thread->readPC()) && !trapSquash && !xcSquash) { - // Will need to squash all instructions currently in flight and have - // the interrupt handler restart at the last non-committed inst. - // Most of that can be handled through the trap() function. The - // processInterrupts() function really just checks for interrupts - // and then calls trap() if there is an interrupt present. + frontEnd->interruptPending = true; + if (robEmpty() && !LSQ.hasStoresToWB()) { + // Will need to squash all instructions currently in flight and have + // the interrupt handler restart at the last non-committed inst. + // Most of that can be handled through the trap() function. The + // processInterrupts() function really just checks for interrupts + // and then calls trap() if there is an interrupt present. - // Not sure which thread should be the one to interrupt. For now - // always do thread 0. - assert(!thread->inSyscall); - thread->inSyscall = true; + // Not sure which thread should be the one to interrupt. For now + // always do thread 0. + assert(!thread->inSyscall); + thread->inSyscall = true; - // CPU will handle implementation of the interrupt. - cpu->processInterrupts(); + // CPU will handle implementation of the interrupt. + cpu->processInterrupts(); - // Now squash or record that I need to squash this cycle. - commitStatus = TrapPending; + // Now squash or record that I need to squash this cycle. + commitStatus = TrapPending; - // Exit state update mode to avoid accidental updating. - thread->inSyscall = false; + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; - // Generate trap squash event. - generateTrapEvent(); + // Generate trap squash event. + generateTrapEvent(); - DPRINTF(BE, "Interrupt detected.\n"); + DPRINTF(BE, "Interrupt detected.\n"); + } else { + DPRINTF(BE, "Interrupt must wait for ROB to drain.\n"); + } } } @@ -585,7 +605,7 @@ template void LWBackEnd::handleFault(Fault &fault, Tick latency) { - DPRINTF(BE, "Handling fault!"); + DPRINTF(BE, "Handling fault!\n"); assert(!thread->inSyscall); @@ -615,6 +635,9 @@ LWBackEnd::tick() wbCycle = 0; + // Read in any done instruction information and update the IQ or LSQ. + updateStructures(); + #if FULL_SYSTEM checkInterrupts(); @@ -623,7 +646,7 @@ LWBackEnd::tick() squashFromTrap(); } else if (xcSquash) { squashFromXC(); - } else if (fetchHasFault && robEmpty() && frontEnd->isEmpty()) { + } else if (fetchHasFault && robEmpty() && frontEnd->isEmpty() && !LSQ.hasStoresToWB()) { DPRINTF(BE, "ROB and front end empty, handling fetch fault\n"); Fault fetch_fault = frontEnd->getFault(); if (fetch_fault == NoFault) { @@ -636,9 +659,6 @@ LWBackEnd::tick() } #endif - // Read in any done instruction information and update the IQ or LSQ. - updateStructures(); - if (dispatchStatus != Blocked) { dispatchInsts(); } else { @@ -719,12 +739,41 @@ LWBackEnd::dispatchInsts() for (int i = 0; i < inst->numDestRegs(); ++i) renameTable[inst->destRegIdx(i)] = inst; - if (inst->readyToIssue() && !inst->isNonSpeculative()) { - DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n", - inst->seqNum); - exeList.push(inst); + if (inst->isMemBarrier() || inst->isWriteBarrier()) { + if (memBarrier) { + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + } + memBarrier = inst; + inst->setCanCommit(); + } else if (inst->readyToIssue() && !inst->isNonSpeculative()) { if (inst->isMemRef()) { + LSQ.insert(inst); + if (memBarrier) { + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + addWaitingMemOp(inst); + + waitingList.push_front(inst); + inst->iqIt = waitingList.begin(); + inst->iqItValid = true; + waitingInsts++; + } else { + DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n", + inst->seqNum); + exeList.push(inst); + } + } else { + DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n", + inst->seqNum); + exeList.push(inst); } } else { if (inst->isNonSpeculative()) { @@ -735,6 +784,14 @@ LWBackEnd::dispatchInsts() if (inst->isMemRef()) { addWaitingMemOp(inst); LSQ.insert(inst); + if (memBarrier) { + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + } } DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to " @@ -872,9 +929,6 @@ LWBackEnd::executeInsts() ++funcExeInst; ++num_executed; - // keep an instruction count - thread->numInst++; - thread->numInsts++; exeList.pop(); @@ -915,7 +969,7 @@ LWBackEnd::instToCommit(DynInstPtr &inst) inst->setCanCommit(); if (inst->isExecuted()) { - inst->setCompleted(); + inst->setResultReady(); int dependents = wakeDependents(inst); if (dependents) { producer_inst[0]++; @@ -956,7 +1010,7 @@ LWBackEnd::writebackInsts() inst->seqNum, inst->readPC()); inst->setCanCommit(); - inst->setCompleted(); + inst->setResultReady(); if (inst->isExecuted()) { int dependents = wakeDependents(inst); @@ -997,7 +1051,9 @@ LWBackEnd::commitInst(int inst_num) // If the instruction is not executed yet, then it is a non-speculative // or store inst. Signal backwards that it should be executed. if (!inst->isExecuted()) { - if (inst->isNonSpeculative()) { + if (inst->isNonSpeculative() || + inst->isMemBarrier() || + inst->isWriteBarrier()) { #if !FULL_SYSTEM // Hack to make sure syscalls aren't executed until all stores // write back their data. This direct communication shouldn't @@ -1017,6 +1073,16 @@ LWBackEnd::commitInst(int inst_num) "instruction at the head of the ROB, PC %#x.\n", inst->readPC()); + if (inst->isMemBarrier() || inst->isWriteBarrier()) { + DPRINTF(BE, "Waking dependents on barrier [sn:%lli]\n", + inst->seqNum); + assert(memBarrier); + wakeDependents(inst, true); + if (memBarrier == inst) + memBarrier = NULL; + inst->clearMemDependents(); + } + // Send back the non-speculative instruction's sequence number. if (inst->iqItValid) { DPRINTF(BE, "Removing instruction from waiting list\n"); @@ -1066,13 +1132,45 @@ LWBackEnd::commitInst(int inst_num) // Not handled for now. assert(!inst->isThreadSync()); - + assert(inst->memDepReady()); + // Stores will mark themselves as totally completed as they need + // to wait to writeback to memory. @todo: Hack...attempt to fix + // having the checker be forced to wait until a store completes in + // order to check all of the instructions. If the store at the + // head of the check list misses, but a later store hits, then + // loads in the checker may see the younger store values instead + // of the store they should see. Either the checker needs its own + // memory (annoying to update), its own store buffer (how to tell + // which value is correct?), or something else... + if (!inst->isStore()) { + inst->setCompleted(); + } // Check if the instruction caused a fault. If so, trap. Fault inst_fault = inst->getFault(); + // Use checker prior to updating anything due to traps or PC + // based events. + if (checker) { + checker->tick(inst); + } + if (inst_fault != NoFault) { DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", inst->seqNum, inst->readPC()); + + // Instruction is completed as it has a fault. + inst->setCompleted(); + + if (LSQ.hasStoresToWB()) { + DPRINTF(BE, "Stores still in flight, will wait until drained.\n"); + return false; + } else if (inst_num != 0) { + DPRINTF(BE, "Will wait until instruction is head of commit group.\n"); + return false; + } else if (checker && inst->isStore()) { + checker->tick(inst); + } + thread->setInst( static_cast(inst->staticInst->machInst)); #if FULL_SYSTEM @@ -1094,6 +1192,8 @@ LWBackEnd::commitInst(int inst_num) } if (inst->traceData) { + inst->traceData->setFetchSeq(inst->seqNum); + inst->traceData->setCPSeq(thread->numInst); inst->traceData->finalize(); inst->traceData = NULL; } @@ -1105,18 +1205,18 @@ LWBackEnd::commitInst(int inst_num) instList.pop_back(); --numInsts; - thread->numInsts++; ++thread->funcExeInst; - // Maybe move this to where teh fault is handled; if the fault is handled, + // Maybe move this to where the fault is handled; if the fault is handled, // don't try to set this myself as the fault will set it. If not, then // I set thread->PC = thread->nextPC and thread->nextPC = thread->nextPC + 4. thread->setPC(thread->readNextPC()); + thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst)); updateComInstStats(inst); // Write the done sequence number here. // LSQ.commitLoads(inst->seqNum); -// LSQ.commitStores(inst->seqNum); toIEW->doneSeqNum = inst->seqNum; + lastCommitCycle = curTick; #if FULL_SYSTEM int count = 0; @@ -1243,6 +1343,22 @@ LWBackEnd::squash(const InstSeqNum &sn) waitingInsts--; } + while (memBarrier && memBarrier->seqNum > sn) { + DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously squashed)\n", memBarrier->seqNum); + memBarrier->clearMemDependents(); + if (memBarrier->memDepReady()) { + DPRINTF(BE, "No previous barrier\n"); + memBarrier = NULL; + } else { + std::list &srcs = memBarrier->getMemSrcs(); + memBarrier = srcs.front(); + srcs.pop_front(); + assert(srcs.empty()); + DPRINTF(BE, "Previous barrier: [sn:%lli]\n", + memBarrier->seqNum); + } + } + frontEnd->addFreeRegs(freed_regs); } @@ -1254,6 +1370,7 @@ LWBackEnd::squashFromXC() squash(squashed_inst); frontEnd->squash(squashed_inst, thread->readPC(), false, false); + frontEnd->interruptPending = false; thread->trapPending = false; thread->inSyscall = false; @@ -1269,6 +1386,7 @@ LWBackEnd::squashFromTrap() squash(squashed_inst); frontEnd->squash(squashed_inst, thread->readPC(), false, false); + frontEnd->interruptPending = false; thread->trapPending = false; thread->inSyscall = false; @@ -1319,6 +1437,36 @@ LWBackEnd::fetchFault(Fault &fault) fetchHasFault = true; } +template +void +LWBackEnd::switchOut() +{ + switchedOut = true; + // Need to get rid of all committed, non-speculative state and write it + // to memory/XC. In this case this is stores that have committed and not + // yet written back. + LSQ.switchOut(); + squash(0); +} + +template +void +LWBackEnd::takeOverFrom(ExecContext *old_xc) +{ + switchedOut = false; + xcSquash = false; + trapSquash = false; + + numInsts = 0; + numWaitingMemOps = 0; + waitingMemOps.clear(); + waitingInsts = 0; + switchedOut = false; + dispatchStatus = Running; + commitStatus = Running; + LSQ.takeOverFrom(old_xc); +} + template void LWBackEnd::updateExeInstStats(DynInstPtr &inst) @@ -1358,7 +1506,11 @@ template void LWBackEnd::updateComInstStats(DynInstPtr &inst) { - unsigned thread = inst->threadNumber; + unsigned tid = inst->threadNumber; + + // keep an instruction count + thread->numInst++; + thread->numInsts++; cpu->numInst++; // @@ -1366,33 +1518,33 @@ LWBackEnd::updateComInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) { - stat_com_swp[thread]++; + stat_com_swp[tid]++; } else { - stat_com_inst[thread]++; + stat_com_inst[tid]++; } #else - stat_com_inst[thread]++; + stat_com_inst[tid]++; #endif // // Control Instructions // if (inst->isControl()) - stat_com_branches[thread]++; + stat_com_branches[tid]++; // // Memory references // if (inst->isMemRef()) { - stat_com_refs[thread]++; + stat_com_refs[tid]++; if (inst->isLoad()) { - stat_com_loads[thread]++; + stat_com_loads[tid]++; } } if (inst->isMemBarrier()) { - stat_com_membars[thread]++; + stat_com_membars[tid]++; } } diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh index eb9886244..042610324 100644 --- a/cpu/ozone/lw_lsq.hh +++ b/cpu/ozone/lw_lsq.hh @@ -41,6 +41,7 @@ #include "cpu/inst_seq.hh" #include "mem/mem_interface.hh" //#include "mem/page_table.hh" +#include "sim/debug.hh" #include "sim/sim_object.hh" //class PageTable; @@ -90,7 +91,10 @@ class OzoneLWLSQ { /** The writeback event for the store. Needed for store * conditionals. */ + public: Event *wbEvent; + bool miss; + private: /** The pointer to the LSQ unit that issued the store. */ OzoneLWLSQ *lsqPtr; }; @@ -228,6 +232,14 @@ class OzoneLWLSQ { !storeQueue.back().completed && !dcacheInterface->isBlocked(); } + void switchOut(); + + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + + bool switchedOut; + private: /** Completes the store at the specified index. */ void completeStore(int store_idx); @@ -560,12 +572,10 @@ OzoneLWLSQ::read(MemReqPtr &req, T &data, int load_idx) sq_it++; } - // If there's no forwarding case, then go access memory DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n", inst->readPC()); - // Setup MemReq pointer req->cmd = Read; req->completionEvent = NULL; @@ -594,8 +604,12 @@ OzoneLWLSQ::read(MemReqPtr &req, T &data, int load_idx) DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x " "vaddr:%#x flags:%i\n", inst->readPC(), req->paddr, req->vaddr, req->flags); - - +/* + Addr debug_addr = ULL(0xfffffc0000be81a8); + if (req->vaddr == debug_addr) { + debug_break(); + } +*/ assert(!req->completionEvent); req->completionEvent = new typename BackEnd::LdWritebackEvent(inst, be); @@ -647,7 +661,15 @@ OzoneLWLSQ::write(MemReqPtr &req, T &data, int store_idx) (*sq_it).req = req; (*sq_it).size = sizeof(T); (*sq_it).data = data; - + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); +/* + Addr debug_addr = ULL(0xfffffc0000be81a8); + if (req->vaddr == debug_addr) { + debug_break(); + } +*/ // This function only writes the data to the store queue, so no fault // can happen here. return NoFault; diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh index 7b22d2564..9b7e48f96 100644 --- a/cpu/ozone/lw_lsq_impl.hh +++ b/cpu/ozone/lw_lsq_impl.hh @@ -29,6 +29,7 @@ #include "arch/isa_traits.hh" #include "base/str.hh" #include "cpu/ozone/lw_lsq.hh" +#include "cpu/checker/cpu.hh" template OzoneLWLSQ::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst, @@ -39,6 +40,7 @@ OzoneLWLSQ::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst, inst(_inst), be(_be), wbEvent(wb_event), + miss(false), lsqPtr(lsq_ptr) { this->setFlags(Event::AutoDelete); @@ -54,13 +56,21 @@ OzoneLWLSQ::StoreCompletionEvent::process() //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); // lsqPtr->cpu->wakeCPU(); + if (lsqPtr->isSwitchedOut()) { + if (wbEvent) + delete wbEvent; + + return; + } + if (wbEvent) { wbEvent->process(); delete wbEvent; } lsqPtr->completeStore(inst->sqIdx); - be->removeDcacheMiss(inst); + if (miss) + be->removeDcacheMiss(inst); } template @@ -80,8 +90,7 @@ OzoneLWLSQ::OzoneLWLSQ() template void OzoneLWLSQ::init(Params *params, unsigned maxLQEntries, - unsigned maxSQEntries, unsigned id) - + unsigned maxSQEntries, unsigned id) { DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id); @@ -90,7 +99,7 @@ OzoneLWLSQ::init(Params *params, unsigned maxLQEntries, LQEntries = maxLQEntries; SQEntries = maxSQEntries; - for (int i = 0; i < LQEntries * 10; i++) { + for (int i = 0; i < LQEntries * 2; i++) { LQIndices.push(i); SQIndices.push(i); } @@ -196,6 +205,7 @@ template void OzoneLWLSQ::insertLoad(DynInstPtr &load_inst) { + assert(loads < LQEntries * 2); assert(!LQIndices.empty()); int load_index = LQIndices.front(); LQIndices.pop(); @@ -503,21 +513,13 @@ OzoneLWLSQ::writebackStores() assert((*sq_it).req); assert(!(*sq_it).committed); - MemReqPtr req = (*sq_it).req; (*sq_it).committed = true; + MemReqPtr req = (*sq_it).req; + req->cmd = Write; req->completionEvent = NULL; req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); - - DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " - "to Addr:%#x, data:%#x [sn:%lli]\n", - inst->sqIdx,inst->readPC(), - req->paddr, *(req->data), - inst->seqNum); switch((*sq_it).size) { case 1: @@ -535,8 +537,25 @@ OzoneLWLSQ::writebackStores() default: panic("Unexpected store size!\n"); } + if (!(req->flags & LOCKED)) { + (*sq_it).inst->setCompleted(); + if (cpu->checker) { + cpu->checker->tick((*sq_it).inst); + } + } + + DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + inst->sqIdx,inst->readPC(), + req->paddr, *(req->data), + inst->seqNum); if (dcacheInterface) { + assert(!req->completionEvent); + StoreCompletionEvent *store_event = new + StoreCompletionEvent(inst, be, NULL, this); + req->completionEvent = store_event; + MemAccessResult result = dcacheInterface->access(req); if (isStalled() && @@ -551,13 +570,14 @@ OzoneLWLSQ::writebackStores() if (result != MA_HIT && dcacheInterface->doEvents()) { // Event *wb = NULL; - + store_event->miss = true; typename BackEnd::LdWritebackEvent *wb = NULL; if (req->flags & LOCKED) { // Stx_C does not generate a system port transaction. // req->result=1; wb = new typename BackEnd::LdWritebackEvent(inst, be); + store_event->wbEvent = wb; } DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); @@ -567,9 +587,6 @@ OzoneLWLSQ::writebackStores() // Will stores need their own kind of writeback events? // Do stores even need writeback events? - assert(!req->completionEvent); - req->completionEvent = new - StoreCompletionEvent(inst, be, wb, this); be->addDcacheMiss(inst); lastDcacheStall = curTick; @@ -597,10 +614,10 @@ OzoneLWLSQ::writebackStores() typename BackEnd::LdWritebackEvent *wb = new typename BackEnd::LdWritebackEvent(inst, be); - wb->schedule(curTick); + store_event->wbEvent = wb; } sq_it--; - completeStore(inst->sqIdx); +// completeStore(inst->sqIdx); } } else { panic("Must HAVE DCACHE!!!!!\n"); @@ -758,31 +775,121 @@ OzoneLWLSQ::completeStore(int store_idx) DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n", inst->sqIdx, inst->seqNum, storesToWB); - // A bit conservative because a store completion may not free up entries, - // but hopefully avoids two store completions in one cycle from making - // the CPU tick twice. -// cpu->activityThisCycle(); assert(!storeQueue.empty()); SQItHash.erase(sq_hash_it); SQIndices.push(inst->sqIdx); storeQueue.erase(sq_it); --stores; -/* - SQIt oldest_store_it = --(storeQueue.end()); - if (sq_it == oldest_store_it) { - do { - inst = (*oldest_store_it).inst; - sq_hash_it = SQItHash.find(inst->sqIdx); - assert(sq_hash_it != SQItHash.end()); - SQItHash.erase(sq_hash_it); - SQIndices.push(inst->sqIdx); - storeQueue.erase(oldest_store_it--); - - --stores; - } while ((*oldest_store_it).completed && - oldest_store_it != storeQueue.end()); - -// be->updateLSQNextCycle = true; +// assert(!inst->isCompleted()); + inst->setCompleted(); + if (cpu->checker) { + cpu->checker->tick(inst); } -*/ +} + +template +void +OzoneLWLSQ::switchOut() +{ + switchedOut = true; + SQIt sq_it = --(storeQueue.end()); + while (storesToWB > 0 && + sq_it != storeQueue.end() && + (*sq_it).inst && + (*sq_it).canWB) { + + DynInstPtr inst = (*sq_it).inst; + + if ((*sq_it).size == 0 && !(*sq_it).completed) { + sq_it--; +// completeStore(inst->sqIdx); + + continue; + } + + // Store conditionals don't complete until *after* they have written + // back. If it's here and not yet sent to memory, then don't bother + // as it's not part of committed state. + if (inst->isDataPrefetch() || (*sq_it).committed || + (*sq_it).req->flags & LOCKED) { + sq_it--; + continue; + } + + assert((*sq_it).req); + assert(!(*sq_it).committed); + + MemReqPtr req = (*sq_it).req; + (*sq_it).committed = true; + + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); + + DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x directly to memory [sn:%lli]\n", + inst->sqIdx,inst->readPC(), + req->paddr, *(req->data), + inst->seqNum); + + switch((*sq_it).size) { + case 1: + cpu->write(req, (uint8_t &)(*sq_it).data); + break; + case 2: + cpu->write(req, (uint16_t &)(*sq_it).data); + break; + case 4: + cpu->write(req, (uint32_t &)(*sq_it).data); + break; + case 8: + cpu->write(req, (uint64_t &)(*sq_it).data); + break; + default: + panic("Unexpected store size!\n"); + } + } + + // Clear the queue to free up resources + storeQueue.clear(); + loadQueue.clear(); + loads = stores = storesToWB = 0; +} + +template +void +OzoneLWLSQ::takeOverFrom(ExecContext *old_xc) +{ + // Clear out any old state. May be redundant if this is the first time + // the CPU is being used. + stalled = false; + isLoadBlocked = false; + loadBlockedHandled = false; + switchedOut = false; + + // Could do simple checks here to see if indices are on twice + while (!LQIndices.empty()) + LQIndices.pop(); + while (!SQIndices.empty()) + SQIndices.pop(); + + for (int i = 0; i < LQEntries * 2; i++) { + LQIndices.push(i); + SQIndices.push(i); + } + + // May want to initialize these entries to NULL + +// loadHead = loadTail = 0; + +// storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + + loadFaultInst = storeFaultInst = memDepViolator = NULL; + + blockedLoadSeqNum = 0; } diff --git a/cpu/ozone/simple_params.hh b/cpu/ozone/simple_params.hh index e503654aa..647da1781 100644 --- a/cpu/ozone/simple_params.hh +++ b/cpu/ozone/simple_params.hh @@ -51,6 +51,7 @@ class SimpleParams : public BaseCPU::Params unsigned backEndLatency; unsigned maxInstBufferSize; unsigned numPhysicalRegs; + unsigned maxOutstandingMemOps; // // Fetch // diff --git a/python/m5/objects/OzoneCPU.py b/python/m5/objects/OzoneCPU.py index 8186a44bb..3fca61e28 100644 --- a/python/m5/objects/OzoneCPU.py +++ b/python/m5/objects/OzoneCPU.py @@ -9,12 +9,15 @@ class DerivOzoneCPU(BaseCPU): if not build_env['FULL_SYSTEM']: mem = Param.FunctionalMemory(NULL, "memory") + checker = Param.BaseCPU("Checker CPU") + width = Param.Unsigned("Width") frontEndWidth = Param.Unsigned("Front end width") backEndWidth = Param.Unsigned("Back end width") backEndSquashLatency = Param.Unsigned("Back end squash latency") backEndLatency = Param.Unsigned("Back end latency") maxInstBufferSize = Param.Unsigned("Maximum instruction buffer size") + maxOutstandingMemOps = Param.Unsigned("Maximum number of outstanding memory operations") decodeToFetchDelay = Param.Unsigned("Decode to fetch delay") renameToFetchDelay = Param.Unsigned("Rename to fetch delay") iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch "