Fixes for ozone CPU to successfully boot and run linux.

cpu/base_dyn_inst.hh:
    Remove snoop function (did not mean to commit it).
cpu/ozone/back_end_impl.hh:
    Set instruction as having its result ready, not completed.
cpu/ozone/cpu.hh:
    Fixes for store conditionals.  Use an additional lock addr list to make sure that the access is valid.  I don't know if this is fully necessary, but it gives me a peace of mind (at some performance cost).
    Make sure to schedule for cycles(1) and not just 1 cycle in the future as tick = 1ps.
    Also support the new Checker.
cpu/ozone/cpu_builder.cc:
    Add parameter for maxOutstandingMemOps so it can be set through the config.
    Also add in the checker.  Right now it's a BaseCPU simobject, but that may change in the future.
cpu/ozone/cpu_impl.hh:
    Add support for the checker.  For now there's a dynamic cast to convert the simobject passed back from the builder to the proper Checker type.  It's ugly, but only happens at startup, and is probably a justified use of dynamic cast.

    Support switching out/taking over from other CPUs.

    Correct indexing problem for float registers.
cpu/ozone/dyn_inst.hh:
    Add ability for instructions to wait on memory instructions in addition to source register instructions.  This is needed for memory dependence predictors and memory barriers.
cpu/ozone/dyn_inst_impl.hh:
    Support waiting on memory operations.
    Use "resultReady" to differentiate an instruction having its registers produced vs being totally completed.
cpu/ozone/front_end.hh:
    Support switching out.
    Also record if an interrupt is pending.
cpu/ozone/front_end_impl.hh:
    Support switching out.  Also support stalling the front end if an interrupt is pending.
cpu/ozone/lw_back_end.hh:
    Add checker in.
    Support switching out.
    Support memory barriers.
cpu/ozone/lw_back_end_impl.hh:
    Lots of changes to get things to work right.
    Faults, traps, interrupts all wait until all stores have written back (important).
    Memory barriers are supported, as is the general ability for instructions to be dependent on other memory instructions.
cpu/ozone/lw_lsq.hh:
    Support switching out.
    Also use store writeback events in all cases, not just dcache misses.
cpu/ozone/lw_lsq_impl.hh:
    Support switching out.
    Also use store writeback events in all cases, not just dcache misses.
    Support the checker CPU.  Marks instructions as completed once the functional access is done (which has to be done for the checker to be able to verify results).
cpu/ozone/simple_params.hh:
    Add max outstanding mem ops parameter.
python/m5/objects/OzoneCPU.py:
    Add max outstanding mem ops, checker.

--HG--
extra : convert_revision : f4d408e1bb1f25836a097b6abe3856111e950c59
This commit is contained in:
Kevin Lim 2006-05-11 19:18:36 -04:00
parent 8a9416ef8d
commit 21df09cf7a
15 changed files with 660 additions and 164 deletions

View file

@ -117,11 +117,6 @@ class BaseDynInst : public FastAlloc, public RefCounted
Fault write(T data, Addr addr, unsigned flags,
uint64_t *res);
// @todo: Probably should not have this function in the DynInst.
template <class T>
bool snoop(MemReqPtr &req, T &data)
{ return cpu->snoop(req, data); }
void prefetch(Addr addr, unsigned flags);
void writeHint(Addr addr, int size, unsigned flags);
Fault copySrcTranslate(Addr src);

View file

@ -1385,7 +1385,7 @@ BackEnd<Impl>::writebackInsts()
inst->seqNum, inst->readPC());
inst->setCanCommit();
inst->setCompleted();
inst->setResultReady();
if (inst->isExecuted()) {
int dependents = IQ.wakeDependents(inst);

View file

@ -53,6 +53,7 @@ class AlphaDTB;
class PhysicalMemory;
class MemoryController;
class Sampler;
class RemoteGDB;
class GDBListener;
@ -69,6 +70,9 @@ namespace Trace {
class InstRecord;
}
template <class>
class Checker;
/**
* Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with
* simple out-of-order capabilities added to it. It is still a 1 CPI machine
@ -226,7 +230,9 @@ class OzoneCPU : public BaseCPU
};
// execution context proxy
OzoneXC xcProxy;
OzoneXC ozoneXC;
ExecContext *xcProxy;
ExecContext *checkerXC;
typedef OzoneThreadState<Impl> ImplState;
@ -245,6 +251,7 @@ class OzoneCPU : public BaseCPU
void tick();
std::set<InstSeqNum> snList;
std::set<Addr> lockAddrList;
private:
struct TickEvent : public Event
{
@ -262,9 +269,9 @@ class OzoneCPU : public BaseCPU
void scheduleTickEvent(int delay)
{
if (tickEvent.squashed())
tickEvent.reschedule(curTick + delay);
tickEvent.reschedule(curTick + cycles(delay));
else if (!tickEvent.scheduled())
tickEvent.schedule(curTick + delay);
tickEvent.schedule(curTick + cycles(delay));
}
/// Unschedule tick event, regardless of its current state.
@ -322,7 +329,7 @@ class OzoneCPU : public BaseCPU
int cpuId;
void switchOut();
void switchOut(Sampler *sampler);
void takeOverFrom(BaseCPU *oldCPU);
#if FULL_SYSTEM
@ -472,6 +479,7 @@ class OzoneCPU : public BaseCPU
Fault error;
if (req->flags & LOCKED) {
// lockAddr = req->paddr;
lockAddrList.insert(req->paddr);
lockFlag = true;
}
@ -546,7 +554,13 @@ class OzoneCPU : public BaseCPU
req->result = 2;
} else {
if (this->lockFlag/* && this->lockAddr == req->paddr*/) {
req->result = 1;
if (lockAddrList.find(req->paddr) !=
lockAddrList.end()) {
req->result = 1;
} else {
req->result = 0;
return NoFault;
}
} else {
req->result = 0;
return NoFault;
@ -599,7 +613,7 @@ class OzoneCPU : public BaseCPU
void setSyscallReturn(SyscallReturn return_value, int tid);
#endif
ExecContext *xcBase() { return &xcProxy; }
ExecContext *xcBase() { return xcProxy; }
bool decoupledFrontEnd;
struct CommStruct {
@ -615,6 +629,8 @@ class OzoneCPU : public BaseCPU
bool lockFlag;
Stats::Scalar<> quiesceCycles;
Checker<DynInstPtr> *checker;
};
#endif // __CPU_OZONE_CPU_HH__

View file

@ -1,6 +1,7 @@
#include <string>
#include "cpu/checker/cpu.hh"
#include "cpu/inst_seq.hh"
#include "cpu/ozone/cpu.hh"
#include "cpu/ozone/ozone_impl.hh"
@ -50,6 +51,8 @@ SimObjectVectorParam<Process *> workload;
SimObjectParam<FunctionalMemory *> mem;
SimObjectParam<BaseCPU *> checker;
Param<Counter> max_insts_any_thread;
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
@ -66,6 +69,7 @@ Param<unsigned> backEndSquashLatency;
Param<unsigned> backEndLatency;
Param<unsigned> maxInstBufferSize;
Param<unsigned> numPhysicalRegs;
Param<unsigned> maxOutstandingMemOps;
Param<unsigned> decodeToFetchDelay;
Param<unsigned> renameToFetchDelay;
@ -164,6 +168,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
INIT_PARAM_DFLT(mem, "Memory", NULL),
INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
INIT_PARAM_DFLT(max_insts_any_thread,
"Terminate when any thread reaches this inst count",
0),
@ -190,6 +196,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4),
INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
@ -314,7 +321,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
#endif // FULL_SYSTEM
params->mem = mem;
params->checker = checker;
params->max_insts_any_thread = max_insts_any_thread;
params->max_insts_all_threads = max_insts_all_threads;
params->max_loads_any_thread = max_loads_any_thread;
@ -334,6 +341,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
params->backEndLatency = backEndLatency;
params->maxInstBufferSize = maxInstBufferSize;
params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs;
params->maxOutstandingMemOps = maxOutstandingMemOps;
params->decodeToFetchDelay = decodeToFetchDelay;
params->renameToFetchDelay = renameToFetchDelay;
@ -445,6 +453,8 @@ SimObjectVectorParam<Process *> workload;
SimObjectParam<FunctionalMemory *> mem;
SimObjectParam<BaseCPU *> checker;
Param<Counter> max_insts_any_thread;
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
@ -559,6 +569,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
INIT_PARAM_DFLT(mem, "Memory", NULL),
INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
INIT_PARAM_DFLT(max_insts_any_thread,
"Terminate when any thread reaches this inst count",
0),
@ -709,7 +721,7 @@ CREATE_SIM_OBJECT(SimpleOzoneCPU)
#endif // FULL_SYSTEM
params->mem = mem;
params->checker = checker;
params->max_insts_any_thread = max_insts_any_thread;
params->max_insts_all_threads = max_insts_all_threads;
params->max_loads_any_thread = max_loads_any_thread;

View file

@ -33,6 +33,7 @@
#include "base/trace.hh"
#include "config/full_system.hh"
#include "cpu/base.hh"
#include "cpu/checker/exec_context.hh"
#include "cpu/exec_context.hh"
#include "cpu/exetrace.hh"
#include "cpu/ozone/cpu.hh"
@ -156,17 +157,33 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
#endif
comm(5, 5)
{
if (p->checker) {
BaseCPU *temp_checker = p->checker;
checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
} else {
checker = NULL;
}
frontEnd = new FrontEnd(p);
backEnd = new BackEnd(p);
_status = Idle;
thread.xcProxy = &xcProxy;
if (checker) {
checker->setMemory(mem);
#if FULL_SYSTEM
checker->setSystem(p->system);
#endif
checkerXC = new CheckerExecContext<OzoneXC>(&ozoneXC, checker);
thread.xcProxy = checkerXC;
xcProxy = checkerXC;
} else {
thread.xcProxy = &ozoneXC;
xcProxy = &ozoneXC;
}
thread.inSyscall = false;
xcProxy.cpu = this;
xcProxy.thread = &thread;
ozoneXC.cpu = this;
ozoneXC.thread = &thread;
thread.setStatus(ExecContext::Suspended);
#if FULL_SYSTEM
@ -177,7 +194,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
thread.tid = 0;
thread.mem = p->mem;
thread.quiesceEvent = new EndQuiesceEvent(&xcProxy);
thread.quiesceEvent = new EndQuiesceEvent(xcProxy);
system = p->system;
itb = p->itb;
@ -187,9 +204,10 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
if (p->profile) {
thread.profile = new FunctionProfile(p->system->kernelSymtab);
// @todo: This might be better as an ExecContext instead of OzoneXC
Callback *cb =
new MakeCallback<OzoneXC,
&OzoneXC::dumpFuncProfile>(&xcProxy);
&OzoneXC::dumpFuncProfile>(&ozoneXC);
registerExitCallback(cb);
}
@ -198,7 +216,6 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
static ProfileNode dummyNode;
thread.profileNode = &dummyNode;
thread.profilePC = 3;
#else
// xc = new ExecContext(this, /* thread_num */ 0, p->workload[0], /* asid */ 0);
thread.cpu = this;
@ -225,13 +242,13 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
issueWidth = p->issueWidth;
*/
execContexts.push_back(&xcProxy);
execContexts.push_back(xcProxy);
frontEnd->setCPU(this);
backEnd->setCPU(this);
frontEnd->setXC(&xcProxy);
backEnd->setXC(&xcProxy);
frontEnd->setXC(xcProxy);
backEnd->setXC(xcProxy);
frontEnd->setThreadState(&thread);
backEnd->setThreadState(&thread);
@ -250,7 +267,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
thread.renameTable[i] = new DynInst(this);
thread.renameTable[i]->setCompleted();
thread.renameTable[i]->setResultReady();
}
frontEnd->renameTable.copyFrom(thread.renameTable);
@ -312,11 +329,15 @@ OzoneCPU<Impl>::copyToXC()
*/
template <class Impl>
void
OzoneCPU<Impl>::switchOut()
OzoneCPU<Impl>::switchOut(Sampler *sampler)
{
// Front end needs state from back end, so switch out the back end first.
backEnd->switchOut();
frontEnd->switchOut();
_status = SwitchedOut;
if (tickEvent.scheduled())
tickEvent.squash();
sampler->signalSwitched();
}
template <class Impl>
@ -325,8 +346,16 @@ OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
{
BaseCPU::takeOverFrom(oldCPU);
backEnd->takeOverFrom();
frontEnd->takeOverFrom();
assert(!tickEvent.scheduled());
// @todo: Fix hardcoded number
// Clear out any old information in time buffer.
for (int i = 0; i < 6; ++i) {
comm.advance();
}
// if any of this CPU's ExecContexts are active, mark the CPU as
// running and schedule its tick event.
for (int i = 0; i < execContexts.size(); ++i) {
@ -470,7 +499,7 @@ OzoneCPU<Impl>::serialize(std::ostream &os)
BaseCPU::serialize(os);
SERIALIZE_ENUM(_status);
nameOut(os, csprintf("%s.xc", name()));
xcProxy.serialize(os);
ozoneXC.serialize(os);
nameOut(os, csprintf("%s.tickEvent", name()));
tickEvent.serialize(os);
}
@ -481,7 +510,7 @@ OzoneCPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
{
BaseCPU::unserialize(cp, section);
UNSERIALIZE_ENUM(_status);
xcProxy.unserialize(cp, csprintf("%s.xc", section));
ozoneXC.unserialize(cp, csprintf("%s.xc", section));
tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
}
@ -579,7 +608,7 @@ template <class Impl>
Addr
OzoneCPU<Impl>::dbg_vtophys(Addr addr)
{
return vtophys(&xcProxy, addr);
return vtophys(xcProxy, addr);
}
#endif // FULL_SYSTEM
/*
@ -725,7 +754,7 @@ OzoneCPU<Impl>::tick()
comInstEventQueue[0]->serviceEvents(numInst);
if (!tickEvent.scheduled() && _status == Running)
tickEvent.schedule(curTick + 1);
tickEvent.schedule(curTick + cycles(1));
}
template <class Impl>
@ -750,7 +779,7 @@ OzoneCPU<Impl>::syscall()
DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst);
thread.process->syscall(&xcProxy);
thread.process->syscall(xcProxy);
thread.funcExeInst--;
@ -784,19 +813,17 @@ OzoneCPU<Impl>::hwrei()
{
// Need to move this to ISA code
// May also need to make this per thread
/*
if (!inPalMode())
return new UnimplementedOpcodeFault;
thread.setNextPC(thread.readMiscReg(AlphaISA::IPR_EXC_ADDR));
*/
lockFlag = false;
lockAddrList.clear();
kernelStats->hwrei();
// Not sure how to make a similar check in the Ozone model
// if (!misspeculating()) {
kernelStats->hwrei();
checkInterrupts = true;
// }
checkInterrupts = true;
// FIXME: XXX check for interrupts? XXX
return NoFault;
@ -847,6 +874,11 @@ OzoneCPU<Impl>::processInterrupts()
if (ipl && ipl > thread.readMiscReg(IPR_IPLR)) {
thread.setMiscReg(IPR_ISR, summary);
thread.setMiscReg(IPR_INTID, ipl);
// @todo: Make this more transparent
if (checker) {
checkerXC->setMiscReg(IPR_ISR, summary);
checkerXC->setMiscReg(IPR_INTID, ipl);
}
Fault fault = new InterruptFault;
fault->invoke(thread.getXCProxy());
DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
@ -860,7 +892,7 @@ OzoneCPU<Impl>::simPalCheck(int palFunc)
{
// Need to move this to ISA code
// May also need to make this per thread
this->kernelStats->callpal(palFunc, &xcProxy);
this->kernelStats->callpal(palFunc, xcProxy);
switch (palFunc) {
case PAL::halt:
@ -944,7 +976,28 @@ OzoneCPU<Impl>::OzoneXC::dumpFuncProfile()
template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
{ }
{
// some things should already be set up
assert(getMemPtr() == old_context->getMemPtr());
#if FULL_SYSTEM
assert(getSystemPtr() == old_context->getSystemPtr());
#else
assert(getProcessPtr() == old_context->getProcessPtr());
#endif
// copy over functional state
setStatus(old_context->status());
copyArchRegs(old_context);
setCpuId(old_context->readCpuId());
#if !FULL_SYSTEM
setFuncExeInst(old_context->readFuncExeInst());
#endif
// storeCondFailures = 0;
cpu->lockFlag = false;
old_context->setStatus(ExecContext::Unallocated);
}
template <class Impl>
void
@ -1062,21 +1115,24 @@ template <class Impl>
float
OzoneCPU<Impl>::OzoneXC::readFloatRegSingle(int reg_idx)
{
return thread->renameTable[reg_idx]->readFloatResult();
int idx = reg_idx + TheISA::FP_Base_DepTag;
return thread->renameTable[idx]->readFloatResult();
}
template <class Impl>
double
OzoneCPU<Impl>::OzoneXC::readFloatRegDouble(int reg_idx)
{
return thread->renameTable[reg_idx]->readDoubleResult();
int idx = reg_idx + TheISA::FP_Base_DepTag;
return thread->renameTable[idx]->readDoubleResult();
}
template <class Impl>
uint64_t
OzoneCPU<Impl>::OzoneXC::readFloatRegInt(int reg_idx)
{
return thread->renameTable[reg_idx]->readIntResult();
int idx = reg_idx + TheISA::FP_Base_DepTag;
return thread->renameTable[idx]->readIntResult();
}
template <class Impl>
@ -1101,7 +1157,9 @@ template <class Impl>
void
OzoneCPU<Impl>::OzoneXC::setFloatRegDouble(int reg_idx, double val)
{
thread->renameTable[reg_idx]->setDoubleResult(val);
int idx = reg_idx + TheISA::FP_Base_DepTag;
thread->renameTable[idx]->setDoubleResult(val);
if (!thread->inSyscall) {
cpu->squashFromXC();

View file

@ -59,9 +59,9 @@ class OzoneDynInst : public BaseDynInst<Impl>
typedef TheISA::MiscReg MiscReg;
typedef typename std::list<DynInstPtr>::iterator ListIt;
// Note that this is duplicated from the BaseDynInst class; I'm simply not
// sure the enum would carry through so I could use it in array
// declarations in this class.
// Note that this is duplicated from the BaseDynInst class; I'm
// simply not sure the enum would carry through so I could use it
// in array declarations in this class.
enum {
MaxInstSrcRegs = TheISA::MaxInstSrcRegs,
MaxInstDestRegs = TheISA::MaxInstDestRegs
@ -90,9 +90,23 @@ class OzoneDynInst : public BaseDynInst<Impl>
void addDependent(DynInstPtr &dependent_inst);
std::vector<DynInstPtr> &getDependents() { return dependents; }
std::vector<DynInstPtr> &getMemDeps() { return memDependents; }
std::list<DynInstPtr> &getMemSrcs() { return srcMemInsts; }
void wakeDependents();
void wakeMemDependents();
void addMemDependent(DynInstPtr &inst) { memDependents.push_back(inst); }
void addSrcMemInst(DynInstPtr &inst) { srcMemInsts.push_back(inst); }
void markMemInstReady(OzoneDynInst<Impl> *inst);
// For now I will remove instructions from the list when they wake
// up. In the future, you only really need a counter.
bool memDepReady() { return srcMemInsts.empty(); }
// void setBPredInfo(const BPredInfo &bp_info) { bpInfo = bp_info; }
// BPredInfo &getBPredInfo() { return bpInfo; }
@ -104,9 +118,13 @@ class OzoneDynInst : public BaseDynInst<Impl>
std::vector<DynInstPtr> dependents;
/** The instruction that produces the value of the source registers. These
* may be NULL if the value has already been read from the source
* instruction.
std::vector<DynInstPtr> memDependents;
std::list<DynInstPtr> srcMemInsts;
/** The instruction that produces the value of the source
* registers. These may be NULL if the value has already been
* read from the source instruction.
*/
DynInstPtr srcInsts[MaxInstSrcRegs];
@ -165,22 +183,22 @@ class OzoneDynInst : public BaseDynInst<Impl>
*/
void setIntReg(const StaticInst *si, int idx, uint64_t val)
{
this->instResult.integer = val;
BaseDynInst<Impl>::setIntReg(si, idx, val);
}
void setFloatRegSingle(const StaticInst *si, int idx, float val)
{
this->instResult.fp = val;
BaseDynInst<Impl>::setFloatRegSingle(si, idx, val);
}
void setFloatRegDouble(const StaticInst *si, int idx, double val)
{
this->instResult.dbl = val;
BaseDynInst<Impl>::setFloatRegDouble(si, idx, val);
}
void setFloatRegInt(const StaticInst *si, int idx, uint64_t val)
{
this->instResult.integer = val;
BaseDynInst<Impl>::setFloatRegInt(si, idx, val);
}
void setIntResult(uint64_t result) { this->instResult.integer = result; }
@ -199,6 +217,8 @@ class OzoneDynInst : public BaseDynInst<Impl>
void clearDependents();
void clearMemDependents();
public:
// ISA stuff
MiscReg readMiscReg(int misc_reg);

View file

@ -38,7 +38,7 @@ template <class Impl>
OzoneDynInst<Impl>::OzoneDynInst(FullCPU *cpu)
: BaseDynInst<Impl>(0, 0, 0, 0, cpu)
{
this->setCompleted();
this->setResultReady();
initInstPtrs();
}
@ -130,7 +130,7 @@ template <class Impl>
bool
OzoneDynInst<Impl>::srcInstReady(int regIdx)
{
return srcInsts[regIdx]->isCompleted();
return srcInsts[regIdx]->isResultReady();
}
template <class Impl>
@ -149,6 +149,28 @@ OzoneDynInst<Impl>::wakeDependents()
}
}
template <class Impl>
void
OzoneDynInst<Impl>::wakeMemDependents()
{
for (int i = 0; i < memDependents.size(); ++i) {
memDependents[i]->markMemInstReady(this);
}
}
template <class Impl>
void
OzoneDynInst<Impl>::markMemInstReady(OzoneDynInst<Impl> *inst)
{
ListIt mem_it = srcMemInsts.begin();
while ((*mem_it) != inst && mem_it != srcMemInsts.end()) {
mem_it++;
}
assert(mem_it != srcMemInsts.end());
srcMemInsts.erase(mem_it);
}
template <class Impl>
void
OzoneDynInst<Impl>::initInstPtrs()
@ -164,7 +186,7 @@ bool
OzoneDynInst<Impl>::srcsReady()
{
for (int i = 0; i < this->numSrcRegs(); ++i) {
if (!srcInsts[i]->isCompleted())
if (!srcInsts[i]->isResultReady())
return false;
}
@ -176,7 +198,7 @@ bool
OzoneDynInst<Impl>::eaSrcsReady()
{
for (int i = 1; i < this->numSrcRegs(); ++i) {
if (!srcInsts[i]->isCompleted())
if (!srcInsts[i]->isResultReady())
return false;
}
@ -195,6 +217,14 @@ OzoneDynInst<Impl>::clearDependents()
prevDestInst[i] = NULL;
}
}
template <class Impl>
void
OzoneDynInst<Impl>::clearMemDependents()
{
memDependents.clear();
}
template <class Impl>
MiscReg
OzoneDynInst<Impl>::readMiscReg(int misc_reg)
@ -213,6 +243,7 @@ template <class Impl>
Fault
OzoneDynInst<Impl>::setMiscReg(int misc_reg, const MiscReg &val)
{
this->setIntResult(val);
return this->thread->setMiscReg(misc_reg, val);
}
@ -234,11 +265,13 @@ OzoneDynInst<Impl>::hwrei()
this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR));
this->cpu->hwrei();
/*
this->cpu->kernelStats->hwrei();
this->cpu->checkInterrupts = true;
this->cpu->lockFlag = false;
*/
// FIXME: XXX check for interrupts? XXX
return NoFault;
}

View file

@ -66,6 +66,14 @@ class FrontEnd
bool isEmpty() { return instBuffer.empty(); }
void switchOut();
void takeOverFrom(ExecContext *old_xc = NULL);
bool isSwitchedOut() { return switchedOut; }
bool switchedOut;
private:
bool updateStatus();
@ -198,6 +206,9 @@ class FrontEnd
DynInstPtr barrierInst;
public:
bool interruptPending;
private:
// number of idle cycles
/*
Stats::Average<> notIdleFraction;
@ -223,6 +234,8 @@ class FrontEnd
Stats::Scalar<> fetchBlockedCycles;
/** Stat for total number of fetched cache lines. */
Stats::Scalar<> fetchedCacheLines;
Stats::Scalar<> fetchIcacheSquashes;
/** Distribution of number of instructions fetched each cycle. */
Stats::Distribution<> fetchNisnDist;
// Stats::Vector<> qfull_iq_occupancy;

View file

@ -19,8 +19,11 @@ FrontEnd<Impl>::FrontEnd(Params *params)
width(params->frontEndWidth),
freeRegs(params->numPhysicalRegs),
numPhysRegs(params->numPhysicalRegs),
serializeNext(false)
serializeNext(false),
interruptPending(false)
{
switchedOut = false;
status = Idle;
// Setup branch predictor.
@ -127,6 +130,11 @@ FrontEnd<Impl>::regStats()
.desc("Number of cache lines fetched")
.prereq(fetchedCacheLines);
fetchIcacheSquashes
.name(name() + ".fetchIcacheSquashes")
.desc("Number of outstanding Icache misses that were squashed")
.prereq(fetchIcacheSquashes);
fetchNisnDist
.init(/* base value */ 0,
/* last value */ width,
@ -370,6 +378,10 @@ FrontEnd<Impl>::fetchCacheLine()
#endif // FULL_SYSTEM
Fault fault = NoFault;
if (interruptPending && flags == 0) {
return fault;
}
// Align the fetch PC so it's at the start of a cache block.
Addr fetch_PC = icacheBlockAlignPC(PC);
@ -397,7 +409,8 @@ FrontEnd<Impl>::fetchCacheLine()
// exists within the cache.
if (icacheInterface && fault == NoFault) {
#if FULL_SYSTEM
if (cpu->system->memctrl->badaddr(memReq->paddr)) {
if (cpu->system->memctrl->badaddr(memReq->paddr) ||
memReq->flags & UNCACHEABLE) {
DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a "
"misspeculating path!",
memReq->paddr);
@ -497,7 +510,7 @@ FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
dispatchedTempSerializing++;
}
// Change status over to BarrierStall so that other stages know
// Change status over to SerializeBlocked so that other stages know
// what this is blocked on.
status = SerializeBlocked;
@ -613,8 +626,10 @@ FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req)
// Do something here.
if (status != IcacheMissStall ||
req != memReq) {
req != memReq ||
switchedOut) {
DPRINTF(FE, "Previous fetch was squashed.\n");
fetchIcacheSquashes++;
return;
}
@ -702,6 +717,7 @@ FrontEnd<Impl>::getInstFromCacheline()
DynInstPtr inst = barrierInst;
status = Running;
barrierInst = NULL;
inst->clearSerializeBefore();
return inst;
}
@ -773,7 +789,7 @@ FrontEnd<Impl>::renameInst(DynInstPtr &inst)
DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n",
inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum);
if (src_inst->isCompleted()) {
if (src_inst->isResultReady()) {
DPRINTF(FE, "Reg ready.\n");
inst->markSrcRegReady(i);
} else {
@ -807,6 +823,38 @@ FrontEnd<Impl>::wakeFromQuiesce()
status = Running;
}
template <class Impl>
void
FrontEnd<Impl>::switchOut()
{
switchedOut = true;
memReq = NULL;
squash(0, 0);
instBuffer.clear();
instBufferSize = 0;
status = Idle;
}
template <class Impl>
void
FrontEnd<Impl>::takeOverFrom(ExecContext *old_xc)
{
assert(freeRegs == numPhysRegs);
fetchCacheLineNextCycle = true;
cacheBlkValid = false;
#if !FULL_SYSTEM
// pTable = params->pTable;
#endif
fetchFault = NoFault;
serializeNext = false;
barrierInst = NULL;
status = Running;
switchedOut = false;
interruptPending = false;
}
template <class Impl>
void
FrontEnd<Impl>::dumpInsts()

View file

@ -17,6 +17,8 @@
#include "mem/mem_req.hh"
#include "sim/eventq.hh"
template <class>
class Checker;
class ExecContext;
template <class Impl>
@ -126,6 +128,8 @@ class LWBackEnd
Addr commitPC;
Tick lastCommitCycle;
bool robEmpty() { return instList.empty(); }
bool isFull() { return numInsts >= numROBEntries; }
@ -133,7 +137,7 @@ class LWBackEnd
void fetchFault(Fault &fault);
int wakeDependents(DynInstPtr &inst);
int wakeDependents(DynInstPtr &inst, bool memory_deps = false);
/** Tells memory dependence unit that a memory instruction needs to be
* rescheduled. It will re-execute once replayMemInst() is called.
@ -182,6 +186,12 @@ class LWBackEnd
void instToCommit(DynInstPtr &inst);
void switchOut();
void takeOverFrom(ExecContext *old_xc = NULL);
bool isSwitchedOut() { return switchedOut; }
private:
void generateTrapEvent(Tick latency = 0);
void handleFault(Fault &fault, Tick latency = 0);
@ -303,6 +313,10 @@ class LWBackEnd
Fault faultFromFetch;
bool fetchHasFault;
bool switchedOut;
DynInstPtr memBarrier;
private:
struct pqCompare {
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
@ -327,7 +341,7 @@ class LWBackEnd
bool exactFullStall;
bool fetchRedirect[Impl::MaxThreads];
// bool fetchRedirect[Impl::MaxThreads];
// number of cycles stalled for D-cache misses
/* Stats::Scalar<> dcacheStallCycles;
@ -414,6 +428,8 @@ class LWBackEnd
Stats::VectorDistribution<> ROB_occ_dist;
public:
void dumpInsts();
Checker<DynInstPtr> *checker;
};
template <class Impl>

View file

@ -1,5 +1,6 @@
#include "encumbered/cpu/full/op_class.hh"
#include "cpu/checker/cpu.hh"
#include "cpu/ozone/lw_back_end.hh"
template <class Impl>
@ -10,28 +11,36 @@ LWBackEnd<Impl>::generateTrapEvent(Tick latency)
TrapEvent *trap = new TrapEvent(this);
trap->schedule(curTick + latency);
trap->schedule(curTick + cpu->cycles(latency));
thread->trapPending = true;
}
template <class Impl>
int
LWBackEnd<Impl>::wakeDependents(DynInstPtr &inst)
LWBackEnd<Impl>::wakeDependents(DynInstPtr &inst, bool memory_deps)
{
assert(!inst->isSquashed());
std::vector<DynInstPtr> &dependents = inst->getDependents();
std::vector<DynInstPtr> &dependents = memory_deps ? inst->getMemDeps() :
inst->getDependents();
int num_outputs = dependents.size();
DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
for (int i = 0; i < num_outputs; i++) {
DynInstPtr dep_inst = dependents[i];
dep_inst->markSrcRegReady();
if (!memory_deps) {
dep_inst->markSrcRegReady();
} else {
if (!dep_inst->isSquashed())
dep_inst->markMemInstReady(inst.get());
}
DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
if (dep_inst->readyToIssue() && dep_inst->isInROB() &&
!dep_inst->isNonSpeculative()) {
!dep_inst->isNonSpeculative() &&
dep_inst->memDepReady() && !dep_inst->isMemBarrier() && !dep_inst->isWriteBarrier()) {
DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n",
dep_inst->seqNum);
exeList.push(dep_inst);
@ -114,6 +123,9 @@ LWBackEnd<Impl>::LdWritebackEvent::process()
// iewStage->wakeCPU();
if (be->isSwitchedOut())
return;
if (dcacheMiss) {
be->removeDcacheMiss(inst);
}
@ -169,16 +181,18 @@ LWBackEnd<Impl>::DCacheCompletionEvent::description()
template <class Impl>
LWBackEnd<Impl>::LWBackEnd(Params *params)
: d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
xcSquash(false), cacheCompletionEvent(this),
trapSquash(false), xcSquash(false), cacheCompletionEvent(this),
dcacheInterface(params->dcacheInterface), width(params->backEndWidth),
exactFullStall(true)
{
numROBEntries = params->numROBEntries;
numInsts = 0;
numDispatchEntries = 32;
maxOutstandingMemOps = 4;
maxOutstandingMemOps = params->maxOutstandingMemOps;
numWaitingMemOps = 0;
waitingInsts = 0;
switchedOut = false;
// IQ.setBE(this);
LSQ.setBE(this);
@ -533,6 +547,7 @@ LWBackEnd<Impl>::setCPU(FullCPU *cpu_ptr)
{
cpu = cpu_ptr;
LSQ.setCPU(cpu_ptr);
checker = cpu->checker;
}
template <class Impl>
@ -554,30 +569,35 @@ LWBackEnd<Impl>::checkInterrupts()
!cpu->inPalMode(thread->readPC()) &&
!trapSquash &&
!xcSquash) {
// Will need to squash all instructions currently in flight and have
// the interrupt handler restart at the last non-committed inst.
// Most of that can be handled through the trap() function. The
// processInterrupts() function really just checks for interrupts
// and then calls trap() if there is an interrupt present.
frontEnd->interruptPending = true;
if (robEmpty() && !LSQ.hasStoresToWB()) {
// Will need to squash all instructions currently in flight and have
// the interrupt handler restart at the last non-committed inst.
// Most of that can be handled through the trap() function. The
// processInterrupts() function really just checks for interrupts
// and then calls trap() if there is an interrupt present.
// Not sure which thread should be the one to interrupt. For now
// always do thread 0.
assert(!thread->inSyscall);
thread->inSyscall = true;
// Not sure which thread should be the one to interrupt. For now
// always do thread 0.
assert(!thread->inSyscall);
thread->inSyscall = true;
// CPU will handle implementation of the interrupt.
cpu->processInterrupts();
// CPU will handle implementation of the interrupt.
cpu->processInterrupts();
// Now squash or record that I need to squash this cycle.
commitStatus = TrapPending;
// Now squash or record that I need to squash this cycle.
commitStatus = TrapPending;
// Exit state update mode to avoid accidental updating.
thread->inSyscall = false;
// Exit state update mode to avoid accidental updating.
thread->inSyscall = false;
// Generate trap squash event.
generateTrapEvent();
// Generate trap squash event.
generateTrapEvent();
DPRINTF(BE, "Interrupt detected.\n");
DPRINTF(BE, "Interrupt detected.\n");
} else {
DPRINTF(BE, "Interrupt must wait for ROB to drain.\n");
}
}
}
@ -585,7 +605,7 @@ template <class Impl>
void
LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency)
{
DPRINTF(BE, "Handling fault!");
DPRINTF(BE, "Handling fault!\n");
assert(!thread->inSyscall);
@ -615,6 +635,9 @@ LWBackEnd<Impl>::tick()
wbCycle = 0;
// Read in any done instruction information and update the IQ or LSQ.
updateStructures();
#if FULL_SYSTEM
checkInterrupts();
@ -623,7 +646,7 @@ LWBackEnd<Impl>::tick()
squashFromTrap();
} else if (xcSquash) {
squashFromXC();
} else if (fetchHasFault && robEmpty() && frontEnd->isEmpty()) {
} else if (fetchHasFault && robEmpty() && frontEnd->isEmpty() && !LSQ.hasStoresToWB()) {
DPRINTF(BE, "ROB and front end empty, handling fetch fault\n");
Fault fetch_fault = frontEnd->getFault();
if (fetch_fault == NoFault) {
@ -636,9 +659,6 @@ LWBackEnd<Impl>::tick()
}
#endif
// Read in any done instruction information and update the IQ or LSQ.
updateStructures();
if (dispatchStatus != Blocked) {
dispatchInsts();
} else {
@ -719,12 +739,41 @@ LWBackEnd<Impl>::dispatchInsts()
for (int i = 0; i < inst->numDestRegs(); ++i)
renameTable[inst->destRegIdx(i)] = inst;
if (inst->readyToIssue() && !inst->isNonSpeculative()) {
DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n",
inst->seqNum);
exeList.push(inst);
if (inst->isMemBarrier() || inst->isWriteBarrier()) {
if (memBarrier) {
DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
"barrier [sn:%lli].\n",
inst->seqNum, memBarrier->seqNum);
memBarrier->addMemDependent(inst);
inst->addSrcMemInst(memBarrier);
}
memBarrier = inst;
inst->setCanCommit();
} else if (inst->readyToIssue() && !inst->isNonSpeculative()) {
if (inst->isMemRef()) {
LSQ.insert(inst);
if (memBarrier) {
DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
"barrier [sn:%lli].\n",
inst->seqNum, memBarrier->seqNum);
memBarrier->addMemDependent(inst);
inst->addSrcMemInst(memBarrier);
addWaitingMemOp(inst);
waitingList.push_front(inst);
inst->iqIt = waitingList.begin();
inst->iqItValid = true;
waitingInsts++;
} else {
DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n",
inst->seqNum);
exeList.push(inst);
}
} else {
DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n",
inst->seqNum);
exeList.push(inst);
}
} else {
if (inst->isNonSpeculative()) {
@ -735,6 +784,14 @@ LWBackEnd<Impl>::dispatchInsts()
if (inst->isMemRef()) {
addWaitingMemOp(inst);
LSQ.insert(inst);
if (memBarrier) {
memBarrier->addMemDependent(inst);
inst->addSrcMemInst(memBarrier);
DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
"barrier [sn:%lli].\n",
inst->seqNum, memBarrier->seqNum);
}
}
DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to "
@ -872,9 +929,6 @@ LWBackEnd<Impl>::executeInsts()
++funcExeInst;
++num_executed;
// keep an instruction count
thread->numInst++;
thread->numInsts++;
exeList.pop();
@ -915,7 +969,7 @@ LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
inst->setCanCommit();
if (inst->isExecuted()) {
inst->setCompleted();
inst->setResultReady();
int dependents = wakeDependents(inst);
if (dependents) {
producer_inst[0]++;
@ -956,7 +1010,7 @@ LWBackEnd<Impl>::writebackInsts()
inst->seqNum, inst->readPC());
inst->setCanCommit();
inst->setCompleted();
inst->setResultReady();
if (inst->isExecuted()) {
int dependents = wakeDependents(inst);
@ -997,7 +1051,9 @@ LWBackEnd<Impl>::commitInst(int inst_num)
// If the instruction is not executed yet, then it is a non-speculative
// or store inst. Signal backwards that it should be executed.
if (!inst->isExecuted()) {
if (inst->isNonSpeculative()) {
if (inst->isNonSpeculative() ||
inst->isMemBarrier() ||
inst->isWriteBarrier()) {
#if !FULL_SYSTEM
// Hack to make sure syscalls aren't executed until all stores
// write back their data. This direct communication shouldn't
@ -1017,6 +1073,16 @@ LWBackEnd<Impl>::commitInst(int inst_num)
"instruction at the head of the ROB, PC %#x.\n",
inst->readPC());
if (inst->isMemBarrier() || inst->isWriteBarrier()) {
DPRINTF(BE, "Waking dependents on barrier [sn:%lli]\n",
inst->seqNum);
assert(memBarrier);
wakeDependents(inst, true);
if (memBarrier == inst)
memBarrier = NULL;
inst->clearMemDependents();
}
// Send back the non-speculative instruction's sequence number.
if (inst->iqItValid) {
DPRINTF(BE, "Removing instruction from waiting list\n");
@ -1066,13 +1132,45 @@ LWBackEnd<Impl>::commitInst(int inst_num)
// Not handled for now.
assert(!inst->isThreadSync());
assert(inst->memDepReady());
// Stores will mark themselves as totally completed as they need
// to wait to writeback to memory. @todo: Hack...attempt to fix
// having the checker be forced to wait until a store completes in
// order to check all of the instructions. If the store at the
// head of the check list misses, but a later store hits, then
// loads in the checker may see the younger store values instead
// of the store they should see. Either the checker needs its own
// memory (annoying to update), its own store buffer (how to tell
// which value is correct?), or something else...
if (!inst->isStore()) {
inst->setCompleted();
}
// Check if the instruction caused a fault. If so, trap.
Fault inst_fault = inst->getFault();
// Use checker prior to updating anything due to traps or PC
// based events.
if (checker) {
checker->tick(inst);
}
if (inst_fault != NoFault) {
DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
inst->seqNum, inst->readPC());
// Instruction is completed as it has a fault.
inst->setCompleted();
if (LSQ.hasStoresToWB()) {
DPRINTF(BE, "Stores still in flight, will wait until drained.\n");
return false;
} else if (inst_num != 0) {
DPRINTF(BE, "Will wait until instruction is head of commit group.\n");
return false;
} else if (checker && inst->isStore()) {
checker->tick(inst);
}
thread->setInst(
static_cast<TheISA::MachInst>(inst->staticInst->machInst));
#if FULL_SYSTEM
@ -1094,6 +1192,8 @@ LWBackEnd<Impl>::commitInst(int inst_num)
}
if (inst->traceData) {
inst->traceData->setFetchSeq(inst->seqNum);
inst->traceData->setCPSeq(thread->numInst);
inst->traceData->finalize();
inst->traceData = NULL;
}
@ -1105,18 +1205,18 @@ LWBackEnd<Impl>::commitInst(int inst_num)
instList.pop_back();
--numInsts;
thread->numInsts++;
++thread->funcExeInst;
// Maybe move this to where teh fault is handled; if the fault is handled,
// Maybe move this to where the fault is handled; if the fault is handled,
// don't try to set this myself as the fault will set it. If not, then
// I set thread->PC = thread->nextPC and thread->nextPC = thread->nextPC + 4.
thread->setPC(thread->readNextPC());
thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst));
updateComInstStats(inst);
// Write the done sequence number here.
// LSQ.commitLoads(inst->seqNum);
// LSQ.commitStores(inst->seqNum);
toIEW->doneSeqNum = inst->seqNum;
lastCommitCycle = curTick;
#if FULL_SYSTEM
int count = 0;
@ -1243,6 +1343,22 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
waitingInsts--;
}
while (memBarrier && memBarrier->seqNum > sn) {
DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously squashed)\n", memBarrier->seqNum);
memBarrier->clearMemDependents();
if (memBarrier->memDepReady()) {
DPRINTF(BE, "No previous barrier\n");
memBarrier = NULL;
} else {
std::list<DynInstPtr> &srcs = memBarrier->getMemSrcs();
memBarrier = srcs.front();
srcs.pop_front();
assert(srcs.empty());
DPRINTF(BE, "Previous barrier: [sn:%lli]\n",
memBarrier->seqNum);
}
}
frontEnd->addFreeRegs(freed_regs);
}
@ -1254,6 +1370,7 @@ LWBackEnd<Impl>::squashFromXC()
squash(squashed_inst);
frontEnd->squash(squashed_inst, thread->readPC(),
false, false);
frontEnd->interruptPending = false;
thread->trapPending = false;
thread->inSyscall = false;
@ -1269,6 +1386,7 @@ LWBackEnd<Impl>::squashFromTrap()
squash(squashed_inst);
frontEnd->squash(squashed_inst, thread->readPC(),
false, false);
frontEnd->interruptPending = false;
thread->trapPending = false;
thread->inSyscall = false;
@ -1319,6 +1437,36 @@ LWBackEnd<Impl>::fetchFault(Fault &fault)
fetchHasFault = true;
}
template <class Impl>
void
LWBackEnd<Impl>::switchOut()
{
switchedOut = true;
// Need to get rid of all committed, non-speculative state and write it
// to memory/XC. In this case this is stores that have committed and not
// yet written back.
LSQ.switchOut();
squash(0);
}
template <class Impl>
void
LWBackEnd<Impl>::takeOverFrom(ExecContext *old_xc)
{
switchedOut = false;
xcSquash = false;
trapSquash = false;
numInsts = 0;
numWaitingMemOps = 0;
waitingMemOps.clear();
waitingInsts = 0;
switchedOut = false;
dispatchStatus = Running;
commitStatus = Running;
LSQ.takeOverFrom(old_xc);
}
template <class Impl>
void
LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
@ -1358,7 +1506,11 @@ template <class Impl>
void
LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
{
unsigned thread = inst->threadNumber;
unsigned tid = inst->threadNumber;
// keep an instruction count
thread->numInst++;
thread->numInsts++;
cpu->numInst++;
//
@ -1366,33 +1518,33 @@ LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch()) {
stat_com_swp[thread]++;
stat_com_swp[tid]++;
} else {
stat_com_inst[thread]++;
stat_com_inst[tid]++;
}
#else
stat_com_inst[thread]++;
stat_com_inst[tid]++;
#endif
//
// Control Instructions
//
if (inst->isControl())
stat_com_branches[thread]++;
stat_com_branches[tid]++;
//
// Memory references
//
if (inst->isMemRef()) {
stat_com_refs[thread]++;
stat_com_refs[tid]++;
if (inst->isLoad()) {
stat_com_loads[thread]++;
stat_com_loads[tid]++;
}
}
if (inst->isMemBarrier()) {
stat_com_membars[thread]++;
stat_com_membars[tid]++;
}
}

View file

@ -41,6 +41,7 @@
#include "cpu/inst_seq.hh"
#include "mem/mem_interface.hh"
//#include "mem/page_table.hh"
#include "sim/debug.hh"
#include "sim/sim_object.hh"
//class PageTable;
@ -90,7 +91,10 @@ class OzoneLWLSQ {
/** The writeback event for the store. Needed for store
* conditionals.
*/
public:
Event *wbEvent;
bool miss;
private:
/** The pointer to the LSQ unit that issued the store. */
OzoneLWLSQ<Impl> *lsqPtr;
};
@ -228,6 +232,14 @@ class OzoneLWLSQ {
!storeQueue.back().completed &&
!dcacheInterface->isBlocked(); }
void switchOut();
void takeOverFrom(ExecContext *old_xc = NULL);
bool isSwitchedOut() { return switchedOut; }
bool switchedOut;
private:
/** Completes the store at the specified index. */
void completeStore(int store_idx);
@ -560,12 +572,10 @@ OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
sq_it++;
}
// If there's no forwarding case, then go access memory
DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n",
inst->readPC());
// Setup MemReq pointer
req->cmd = Read;
req->completionEvent = NULL;
@ -594,8 +604,12 @@ OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x "
"vaddr:%#x flags:%i\n",
inst->readPC(), req->paddr, req->vaddr, req->flags);
/*
Addr debug_addr = ULL(0xfffffc0000be81a8);
if (req->vaddr == debug_addr) {
debug_break();
}
*/
assert(!req->completionEvent);
req->completionEvent =
new typename BackEnd::LdWritebackEvent(inst, be);
@ -647,7 +661,15 @@ OzoneLWLSQ<Impl>::write(MemReqPtr &req, T &data, int store_idx)
(*sq_it).req = req;
(*sq_it).size = sizeof(T);
(*sq_it).data = data;
assert(!req->data);
req->data = new uint8_t[64];
memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
/*
Addr debug_addr = ULL(0xfffffc0000be81a8);
if (req->vaddr == debug_addr) {
debug_break();
}
*/
// This function only writes the data to the store queue, so no fault
// can happen here.
return NoFault;

View file

@ -29,6 +29,7 @@
#include "arch/isa_traits.hh"
#include "base/str.hh"
#include "cpu/ozone/lw_lsq.hh"
#include "cpu/checker/cpu.hh"
template <class Impl>
OzoneLWLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst,
@ -39,6 +40,7 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst,
inst(_inst),
be(_be),
wbEvent(wb_event),
miss(false),
lsqPtr(lsq_ptr)
{
this->setFlags(Event::AutoDelete);
@ -54,13 +56,21 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
//lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
// lsqPtr->cpu->wakeCPU();
if (lsqPtr->isSwitchedOut()) {
if (wbEvent)
delete wbEvent;
return;
}
if (wbEvent) {
wbEvent->process();
delete wbEvent;
}
lsqPtr->completeStore(inst->sqIdx);
be->removeDcacheMiss(inst);
if (miss)
be->removeDcacheMiss(inst);
}
template <class Impl>
@ -80,8 +90,7 @@ OzoneLWLSQ<Impl>::OzoneLWLSQ()
template<class Impl>
void
OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
unsigned maxSQEntries, unsigned id)
unsigned maxSQEntries, unsigned id)
{
DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id);
@ -90,7 +99,7 @@ OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
LQEntries = maxLQEntries;
SQEntries = maxSQEntries;
for (int i = 0; i < LQEntries * 10; i++) {
for (int i = 0; i < LQEntries * 2; i++) {
LQIndices.push(i);
SQIndices.push(i);
}
@ -196,6 +205,7 @@ template <class Impl>
void
OzoneLWLSQ<Impl>::insertLoad(DynInstPtr &load_inst)
{
assert(loads < LQEntries * 2);
assert(!LQIndices.empty());
int load_index = LQIndices.front();
LQIndices.pop();
@ -503,21 +513,13 @@ OzoneLWLSQ<Impl>::writebackStores()
assert((*sq_it).req);
assert(!(*sq_it).committed);
MemReqPtr req = (*sq_it).req;
(*sq_it).committed = true;
MemReqPtr req = (*sq_it).req;
req->cmd = Write;
req->completionEvent = NULL;
req->time = curTick;
assert(!req->data);
req->data = new uint8_t[64];
memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
"to Addr:%#x, data:%#x [sn:%lli]\n",
inst->sqIdx,inst->readPC(),
req->paddr, *(req->data),
inst->seqNum);
switch((*sq_it).size) {
case 1:
@ -535,8 +537,25 @@ OzoneLWLSQ<Impl>::writebackStores()
default:
panic("Unexpected store size!\n");
}
if (!(req->flags & LOCKED)) {
(*sq_it).inst->setCompleted();
if (cpu->checker) {
cpu->checker->tick((*sq_it).inst);
}
}
DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
"to Addr:%#x, data:%#x [sn:%lli]\n",
inst->sqIdx,inst->readPC(),
req->paddr, *(req->data),
inst->seqNum);
if (dcacheInterface) {
assert(!req->completionEvent);
StoreCompletionEvent *store_event = new
StoreCompletionEvent(inst, be, NULL, this);
req->completionEvent = store_event;
MemAccessResult result = dcacheInterface->access(req);
if (isStalled() &&
@ -551,13 +570,14 @@ OzoneLWLSQ<Impl>::writebackStores()
if (result != MA_HIT && dcacheInterface->doEvents()) {
// Event *wb = NULL;
store_event->miss = true;
typename BackEnd::LdWritebackEvent *wb = NULL;
if (req->flags & LOCKED) {
// Stx_C does not generate a system port transaction.
// req->result=1;
wb = new typename BackEnd::LdWritebackEvent(inst,
be);
store_event->wbEvent = wb;
}
DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
@ -567,9 +587,6 @@ OzoneLWLSQ<Impl>::writebackStores()
// Will stores need their own kind of writeback events?
// Do stores even need writeback events?
assert(!req->completionEvent);
req->completionEvent = new
StoreCompletionEvent(inst, be, wb, this);
be->addDcacheMiss(inst);
lastDcacheStall = curTick;
@ -597,10 +614,10 @@ OzoneLWLSQ<Impl>::writebackStores()
typename BackEnd::LdWritebackEvent *wb =
new typename BackEnd::LdWritebackEvent(inst,
be);
wb->schedule(curTick);
store_event->wbEvent = wb;
}
sq_it--;
completeStore(inst->sqIdx);
// completeStore(inst->sqIdx);
}
} else {
panic("Must HAVE DCACHE!!!!!\n");
@ -758,31 +775,121 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n",
inst->sqIdx, inst->seqNum, storesToWB);
// A bit conservative because a store completion may not free up entries,
// but hopefully avoids two store completions in one cycle from making
// the CPU tick twice.
// cpu->activityThisCycle();
assert(!storeQueue.empty());
SQItHash.erase(sq_hash_it);
SQIndices.push(inst->sqIdx);
storeQueue.erase(sq_it);
--stores;
/*
SQIt oldest_store_it = --(storeQueue.end());
if (sq_it == oldest_store_it) {
do {
inst = (*oldest_store_it).inst;
sq_hash_it = SQItHash.find(inst->sqIdx);
assert(sq_hash_it != SQItHash.end());
SQItHash.erase(sq_hash_it);
SQIndices.push(inst->sqIdx);
storeQueue.erase(oldest_store_it--);
--stores;
} while ((*oldest_store_it).completed &&
oldest_store_it != storeQueue.end());
// be->updateLSQNextCycle = true;
// assert(!inst->isCompleted());
inst->setCompleted();
if (cpu->checker) {
cpu->checker->tick(inst);
}
*/
}
template <class Impl>
void
OzoneLWLSQ<Impl>::switchOut()
{
switchedOut = true;
SQIt sq_it = --(storeQueue.end());
while (storesToWB > 0 &&
sq_it != storeQueue.end() &&
(*sq_it).inst &&
(*sq_it).canWB) {
DynInstPtr inst = (*sq_it).inst;
if ((*sq_it).size == 0 && !(*sq_it).completed) {
sq_it--;
// completeStore(inst->sqIdx);
continue;
}
// Store conditionals don't complete until *after* they have written
// back. If it's here and not yet sent to memory, then don't bother
// as it's not part of committed state.
if (inst->isDataPrefetch() || (*sq_it).committed ||
(*sq_it).req->flags & LOCKED) {
sq_it--;
continue;
}
assert((*sq_it).req);
assert(!(*sq_it).committed);
MemReqPtr req = (*sq_it).req;
(*sq_it).committed = true;
req->cmd = Write;
req->completionEvent = NULL;
req->time = curTick;
assert(!req->data);
req->data = new uint8_t[64];
memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x "
"to Addr:%#x, data:%#x directly to memory [sn:%lli]\n",
inst->sqIdx,inst->readPC(),
req->paddr, *(req->data),
inst->seqNum);
switch((*sq_it).size) {
case 1:
cpu->write(req, (uint8_t &)(*sq_it).data);
break;
case 2:
cpu->write(req, (uint16_t &)(*sq_it).data);
break;
case 4:
cpu->write(req, (uint32_t &)(*sq_it).data);
break;
case 8:
cpu->write(req, (uint64_t &)(*sq_it).data);
break;
default:
panic("Unexpected store size!\n");
}
}
// Clear the queue to free up resources
storeQueue.clear();
loadQueue.clear();
loads = stores = storesToWB = 0;
}
template <class Impl>
void
OzoneLWLSQ<Impl>::takeOverFrom(ExecContext *old_xc)
{
// Clear out any old state. May be redundant if this is the first time
// the CPU is being used.
stalled = false;
isLoadBlocked = false;
loadBlockedHandled = false;
switchedOut = false;
// Could do simple checks here to see if indices are on twice
while (!LQIndices.empty())
LQIndices.pop();
while (!SQIndices.empty())
SQIndices.pop();
for (int i = 0; i < LQEntries * 2; i++) {
LQIndices.push(i);
SQIndices.push(i);
}
// May want to initialize these entries to NULL
// loadHead = loadTail = 0;
// storeHead = storeWBIdx = storeTail = 0;
usedPorts = 0;
loadFaultInst = storeFaultInst = memDepViolator = NULL;
blockedLoadSeqNum = 0;
}

View file

@ -51,6 +51,7 @@ class SimpleParams : public BaseCPU::Params
unsigned backEndLatency;
unsigned maxInstBufferSize;
unsigned numPhysicalRegs;
unsigned maxOutstandingMemOps;
//
// Fetch
//

View file

@ -9,12 +9,15 @@ class DerivOzoneCPU(BaseCPU):
if not build_env['FULL_SYSTEM']:
mem = Param.FunctionalMemory(NULL, "memory")
checker = Param.BaseCPU("Checker CPU")
width = Param.Unsigned("Width")
frontEndWidth = Param.Unsigned("Front end width")
backEndWidth = Param.Unsigned("Back end width")
backEndSquashLatency = Param.Unsigned("Back end squash latency")
backEndLatency = Param.Unsigned("Back end latency")
maxInstBufferSize = Param.Unsigned("Maximum instruction buffer size")
maxOutstandingMemOps = Param.Unsigned("Maximum number of outstanding memory operations")
decodeToFetchDelay = Param.Unsigned("Decode to fetch delay")
renameToFetchDelay = Param.Unsigned("Rename to fetch delay")
iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch "