O3 CPU now handles being used with the sampler.

cpu/o3/2bit_local_pred.cc:
cpu/o3/2bit_local_pred.hh:
cpu/o3/bpred_unit.hh:
cpu/o3/bpred_unit_impl.hh:
cpu/o3/btb.cc:
cpu/o3/btb.hh:
cpu/o3/commit.hh:
cpu/o3/commit_impl.hh:
cpu/o3/cpu.cc:
cpu/o3/cpu.hh:
cpu/o3/decode.hh:
cpu/o3/decode_impl.hh:
cpu/o3/fetch.hh:
cpu/o3/fetch_impl.hh:
cpu/o3/fu_pool.cc:
cpu/o3/fu_pool.hh:
cpu/o3/iew.hh:
cpu/o3/iew_impl.hh:
cpu/o3/inst_queue.hh:
cpu/o3/inst_queue_impl.hh:
cpu/o3/lsq.hh:
cpu/o3/lsq_impl.hh:
cpu/o3/lsq_unit.hh:
cpu/o3/lsq_unit_impl.hh:
cpu/o3/mem_dep_unit.hh:
cpu/o3/mem_dep_unit_impl.hh:
cpu/o3/ras.cc:
cpu/o3/ras.hh:
cpu/o3/rename.hh:
cpu/o3/rename_impl.hh:
cpu/o3/rob.hh:
cpu/o3/rob_impl.hh:
cpu/o3/sat_counter.cc:
cpu/o3/sat_counter.hh:
cpu/o3/thread_state.hh:
    Handle switching out and taking over.  Needs to be able to reset all state.
cpu/o3/alpha_cpu_impl.hh:
    Handle taking over from another XC.

--HG--
extra : convert_revision : b936e826f0f8a18319bfa940ff35097b4192b449
This commit is contained in:
Kevin Lim 2006-05-04 11:36:20 -04:00
parent 4601230d35
commit f3358e5f7b
36 changed files with 786 additions and 152 deletions

View file

@ -67,6 +67,14 @@ DefaultBP::DefaultBP(unsigned _localPredictorSize,
instShiftAmt); instShiftAmt);
} }
void
DefaultBP::reset()
{
for (int i = 0; i < localPredictorSets; ++i) {
localCtrs[i].reset();
}
}
bool bool
DefaultBP::lookup(Addr &branch_addr) DefaultBP::lookup(Addr &branch_addr)
{ {

View file

@ -62,6 +62,8 @@ class DefaultBP
*/ */
void update(Addr &branch_addr, bool taken); void update(Addr &branch_addr, bool taken);
void reset();
private: private:
/** /**

View file

@ -151,6 +151,26 @@ template <class Impl>
void void
AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context) AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
{ {
// some things should already be set up
assert(getMemPtr() == old_context->getMemPtr());
#if FULL_SYSTEM
assert(getSystemPtr() == old_context->getSystemPtr());
#else
assert(getProcessPtr() == old_context->getProcessPtr());
#endif
// copy over functional state
setStatus(old_context->status());
copyArchRegs(old_context);
setCpuId(old_context->readCpuId());
#if !FULL_SYSTEM
thread->funcExeInst = old_context->readFuncExeInst();
#endif
old_context->setStatus(ExecContext::Unallocated);
thread->inSyscall = false;
thread->trapPending = false;
} }
template <class Impl> template <class Impl>

View file

@ -67,6 +67,10 @@ class TwobitBPredUnit
*/ */
void regStats(); void regStats();
void switchOut();
void takeOverFrom();
/** /**
* Predicts whether or not the instruction is a taken branch, and the * Predicts whether or not the instruction is a taken branch, and the
* target of the branch if it is taken. * target of the branch if it is taken.

View file

@ -94,6 +94,26 @@ TwobitBPredUnit<Impl>::regStats()
; ;
} }
template <class Impl>
void
TwobitBPredUnit<Impl>::switchOut()
{
for (int i = 0; i < Impl::MaxThreads; ++i) {
predHist[i].clear();
}
}
template <class Impl>
void
TwobitBPredUnit<Impl>::takeOverFrom()
{
for (int i = 0; i < Impl::MaxThreads; ++i)
RAS[i].reset();
BP.reset();
BTB.reset();
}
template <class Impl> template <class Impl>
bool bool
TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid) TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
@ -297,5 +317,6 @@ TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
BP.update(pred_hist.front().PC, actually_taken); BP.update(pred_hist.front().PC, actually_taken);
BTB.update(pred_hist.front().PC, corr_target, tid); BTB.update(pred_hist.front().PC, corr_target, tid);
pred_hist.pop_front();
} }
} }

View file

@ -58,6 +58,14 @@ DefaultBTB::DefaultBTB(unsigned _numEntries,
tagShiftAmt = instShiftAmt + floorLog2(numEntries); tagShiftAmt = instShiftAmt + floorLog2(numEntries);
} }
void
DefaultBTB::reset()
{
for (int i = 0; i < numEntries; ++i) {
btb[i].valid = false;
}
}
inline inline
unsigned unsigned
DefaultBTB::getIndex(const Addr &inst_PC) DefaultBTB::getIndex(const Addr &inst_PC)

View file

@ -65,6 +65,8 @@ class DefaultBTB
DefaultBTB(unsigned numEntries, unsigned tagBits, DefaultBTB(unsigned numEntries, unsigned tagBits,
unsigned instShiftAmt); unsigned instShiftAmt);
void reset();
/** Looks up an address in the BTB. Must call valid() first on the address. /** Looks up an address in the BTB. Must call valid() first on the address.
* @param inst_PC The address of the branch to look up. * @param inst_PC The address of the branch to look up.
* @param tid The thread id. * @param tid The thread id.

View file

@ -175,6 +175,10 @@ class DefaultCommit
/** Initializes stage by sending back the number of free entries. */ /** Initializes stage by sending back the number of free entries. */
void initStage(); void initStage();
void switchOut();
void takeOverFrom();
/** Ticks the commit stage, which tries to commit instructions. */ /** Ticks the commit stage, which tries to commit instructions. */
void tick(); void tick();
@ -351,6 +355,8 @@ class DefaultCommit
/** Number of Active Threads */ /** Number of Active Threads */
unsigned numThreads; unsigned numThreads;
bool switchedOut;
Tick trapLatency; Tick trapLatency;
Tick fetchTrapLatency; Tick fetchTrapLatency;

View file

@ -54,6 +54,7 @@ template <class Impl>
void void
DefaultCommit<Impl>::TrapEvent::process() DefaultCommit<Impl>::TrapEvent::process()
{ {
// This will get reset if it was switched out.
commit->trapSquash[tid] = true; commit->trapSquash[tid] = true;
} }
@ -75,7 +76,8 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
renameWidth(params->renameWidth), renameWidth(params->renameWidth),
iewWidth(params->executeWidth), iewWidth(params->executeWidth),
commitWidth(params->commitWidth), commitWidth(params->commitWidth),
numThreads(params->numberOfThreads) numThreads(params->numberOfThreads),
switchedOut(false)
{ {
_status = Active; _status = Active;
_nextStatus = Inactive; _nextStatus = Inactive;
@ -254,6 +256,9 @@ DefaultCommit<Impl>::setCPU(FullCPU *cpu_ptr)
// Commit must broadcast the number of free entries it has at the start of // Commit must broadcast the number of free entries it has at the start of
// the simulation, so it starts as active. // the simulation, so it starts as active.
cpu->activateStage(FullCPU::CommitIdx); cpu->activateStage(FullCPU::CommitIdx);
trapLatency = cpu->cycles(6);
fetchTrapLatency = cpu->cycles(12);
} }
template <class Impl> template <class Impl>
@ -360,6 +365,29 @@ DefaultCommit<Impl>::initStage()
cpu->activityThisCycle(); cpu->activityThisCycle();
} }
template <class Impl>
void
DefaultCommit<Impl>::switchOut()
{
rob->switchOut();
}
template <class Impl>
void
DefaultCommit<Impl>::takeOverFrom()
{
_status = Active;
_nextStatus = Inactive;
for (int i=0; i < numThreads; i++) {
commitStatus[i] = Idle;
changedROBNumEntries[i] = false;
trapSquash[i] = false;
xcSquash[i] = false;
}
squashCounter = 0;
rob->takeOverFrom();
}
template <class Impl> template <class Impl>
void void
DefaultCommit<Impl>::updateStatus() DefaultCommit<Impl>::updateStatus()
@ -719,8 +747,9 @@ DefaultCommit<Impl>::commit()
while (threads != (*activeThreads).end()) { while (threads != (*activeThreads).end()) {
unsigned tid = *threads++; unsigned tid = *threads++;
if (fromFetch->fetchFault) { if (fromFetch->fetchFault && commitStatus[0] != TrapPending) {
// Record the fault. Wait until it's empty in the ROB. Then handle the trap. // Record the fault. Wait until it's empty in the ROB. Then handle the trap.
// Ignore it if there's already a trap pending as fetch will be redirected.
fetchFault = fromFetch->fetchFault; fetchFault = fromFetch->fetchFault;
fetchFaultSN = fromFetch->fetchFaultSN; fetchFaultSN = fromFetch->fetchFaultSN;
fetchFaultTick = curTick + fetchTrapLatency; fetchFaultTick = curTick + fetchTrapLatency;
@ -975,6 +1004,7 @@ DefaultCommit<Impl>::commitInsts()
} }
PC[tid] = nextPC[tid]; PC[tid] = nextPC[tid];
nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst);
#if FULL_SYSTEM #if FULL_SYSTEM
int count = 0; int count = 0;
Addr oldpc; Addr oldpc;
@ -1002,6 +1032,10 @@ DefaultCommit<Impl>::commitInsts()
DPRINTF(CommitRate, "%i\n", num_committed); DPRINTF(CommitRate, "%i\n", num_committed);
numCommittedDist.sample(num_committed); numCommittedDist.sample(num_committed);
if (num_committed == commitWidth) {
commit_eligible[0]++;
}
} }
template <class Impl> template <class Impl>

View file

@ -124,6 +124,7 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
mem(params->mem), mem(params->mem),
#else #else
// pTable(params->pTable), // pTable(params->pTable),
mem(params->workload[0]->getMemory()),
#endif // FULL_SYSTEM #endif // FULL_SYSTEM
icacheInterface(params->icacheInterface), icacheInterface(params->icacheInterface),
@ -176,9 +177,9 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
numThreads = number_of_threads; numThreads = number_of_threads;
#if !FULL_SYSTEM #if !FULL_SYSTEM
int activeThreads = params->workload.size(); int active_threads = params->workload.size();
#else #else
int activeThreads = 1; int active_threads = 1;
#endif #endif
assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs);
@ -192,7 +193,7 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
PhysRegIndex freg_idx = params->numPhysIntRegs; //Index to 1 after int regs PhysRegIndex freg_idx = params->numPhysIntRegs; //Index to 1 after int regs
for (int tid=0; tid < numThreads; tid++) { for (int tid=0; tid < numThreads; tid++) {
bool bindRegs = (tid <= activeThreads - 1); bool bindRegs = (tid <= active_threads - 1);
commitRenameMap[tid].init(TheISA::NumIntRegs, commitRenameMap[tid].init(TheISA::NumIntRegs,
params->numPhysIntRegs, params->numPhysIntRegs,
@ -357,7 +358,7 @@ FullO3CPU<Impl>::tick()
} }
if (activityCount && !tickEvent.scheduled()) { if (activityCount && !tickEvent.scheduled()) {
tickEvent.schedule(curTick + 1); tickEvent.schedule(curTick + cycles(1));
} }
#if !FULL_SYSTEM #if !FULL_SYSTEM
@ -370,8 +371,8 @@ template <class Impl>
void void
FullO3CPU<Impl>::init() FullO3CPU<Impl>::init()
{ {
if (deferRegistration) { if (!deferRegistration) {
return; registerExecContexts();
} }
// Set inSyscall so that the CPU doesn't squash when initially // Set inSyscall so that the CPU doesn't squash when initially
@ -379,7 +380,6 @@ FullO3CPU<Impl>::init()
for (int i = 0; i < number_of_threads; ++i) for (int i = 0; i < number_of_threads; ++i)
thread[i]->inSyscall = true; thread[i]->inSyscall = true;
registerExecContexts();
// Need to do a copy of the xc->regs into the CPU's regfile so // Need to do a copy of the xc->regs into the CPU's regfile so
// that it can start properly. // that it can start properly.
@ -388,7 +388,7 @@ FullO3CPU<Impl>::init()
// Need to do a copy of the xc->regs into the CPU's regfile so // Need to do a copy of the xc->regs into the CPU's regfile so
// that it can start properly. // that it can start properly.
#if FULL_SYSTEM #if FULL_SYSTEM
ExecContext *src_xc = system->execContexts[tid]; ExecContext *src_xc = execContexts[tid];
#else #else
ExecContext *src_xc = thread[tid]->getXCProxy(); ExecContext *src_xc = thread[tid]->getXCProxy();
#endif #endif
@ -584,7 +584,7 @@ FullO3CPU<Impl>::activateContext(int tid, int delay)
activeThreads.push_back(tid); activeThreads.push_back(tid);
} }
assert(_status == Idle); assert(_status == Idle || _status == SwitchedOut);
scheduleTickEvent(delay); scheduleTickEvent(delay);
@ -658,21 +658,64 @@ FullO3CPU<Impl>::haltContext(int tid)
template <class Impl> template <class Impl>
void void
FullO3CPU<Impl>::switchOut() FullO3CPU<Impl>::switchOut(Sampler *sampler)
{ {
panic("FullO3CPU does not have a switch out function.\n"); // panic("FullO3CPU does not have a switch out function.\n");
fetch.switchOut();
decode.switchOut();
rename.switchOut();
iew.switchOut();
commit.switchOut();
if (tickEvent.scheduled())
tickEvent.squash();
sampler->signalSwitched();
_status = SwitchedOut;
} }
template <class Impl> template <class Impl>
void void
FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU) FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
{ {
for (int i = 0; i < 6; ++i) {
timeBuffer.advance();
fetchQueue.advance();
decodeQueue.advance();
renameQueue.advance();
iewQueue.advance();
activityBuffer.advance();
}
activityCount = 0;
bzero(&stageActive, sizeof(stageActive));
BaseCPU::takeOverFrom(oldCPU); BaseCPU::takeOverFrom(oldCPU);
fetch.takeOverFrom();
decode.takeOverFrom();
rename.takeOverFrom();
iew.takeOverFrom();
commit.takeOverFrom();
assert(!tickEvent.scheduled()); assert(!tickEvent.scheduled());
// @todo: Figure out how to properly select the tid to put onto the active threads list.
int tid = 0;
list<unsigned>::iterator isActive = find(
activeThreads.begin(), activeThreads.end(), tid);
if (isActive == activeThreads.end()) {
//May Need to Re-code this if the delay variable is the
//delay needed for thread to activate
DPRINTF(FullCPU, "Adding Thread %i to active threads list\n",
tid);
activeThreads.push_back(tid);
}
// Set all status's to active, schedule the // Set all status's to active, schedule the
// CPU's tick event. // CPU's tick event.
// @todo: Fix up statuses so this is handled properly
for (int i = 0; i < execContexts.size(); ++i) { for (int i = 0; i < execContexts.size(); ++i) {
ExecContext *xc = execContexts[i]; ExecContext *xc = execContexts[i];
if (xc->status() == ExecContext::Active && _status != Running) { if (xc->status() == ExecContext::Active && _status != Running) {
@ -680,6 +723,8 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
tickEvent.schedule(curTick); tickEvent.schedule(curTick);
} }
} }
if (!tickEvent.scheduled())
tickEvent.schedule(curTick);
} }
template <class Impl> template <class Impl>
@ -758,7 +803,8 @@ template <class Impl>
float float
FullO3CPU<Impl>::readArchFloatRegSingle(int reg_idx, unsigned tid) FullO3CPU<Impl>::readArchFloatRegSingle(int reg_idx, unsigned tid)
{ {
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); int idx = reg_idx + TheISA::FP_Base_DepTag;
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
return regFile.readFloatRegSingle(phys_reg); return regFile.readFloatRegSingle(phys_reg);
} }
@ -767,7 +813,8 @@ template <class Impl>
double double
FullO3CPU<Impl>::readArchFloatRegDouble(int reg_idx, unsigned tid) FullO3CPU<Impl>::readArchFloatRegDouble(int reg_idx, unsigned tid)
{ {
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); int idx = reg_idx + TheISA::FP_Base_DepTag;
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
return regFile.readFloatRegDouble(phys_reg); return regFile.readFloatRegDouble(phys_reg);
} }
@ -776,7 +823,8 @@ template <class Impl>
uint64_t uint64_t
FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, unsigned tid) FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, unsigned tid)
{ {
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); int idx = reg_idx + TheISA::FP_Base_DepTag;
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
return regFile.readFloatRegInt(phys_reg); return regFile.readFloatRegInt(phys_reg);
} }

View file

@ -82,7 +82,8 @@ class FullO3CPU : public BaseFullCPU
Running, Running,
Idle, Idle,
Halted, Halted,
Blocked Blocked,
SwitchedOut
}; };
/** Overall CPU status. */ /** Overall CPU status. */
@ -112,9 +113,9 @@ class FullO3CPU : public BaseFullCPU
void scheduleTickEvent(int delay) void scheduleTickEvent(int delay)
{ {
if (tickEvent.squashed()) if (tickEvent.squashed())
tickEvent.reschedule(curTick + delay); tickEvent.reschedule(curTick + cycles(delay));
else if (!tickEvent.scheduled()) else if (!tickEvent.scheduled())
tickEvent.schedule(curTick + delay); tickEvent.schedule(curTick + cycles(delay));
} }
/** Unschedule tick event, regardless of its current state. */ /** Unschedule tick event, regardless of its current state. */
@ -196,7 +197,7 @@ class FullO3CPU : public BaseFullCPU
/** Switches out this CPU. /** Switches out this CPU.
* @todo: Implement this. * @todo: Implement this.
*/ */
void switchOut(); void switchOut(Sampler *sampler);
/** Takes over from another CPU. /** Takes over from another CPU.
* @todo: Implement this. * @todo: Implement this.

View file

@ -107,6 +107,9 @@ class DefaultDecode
/** Sets pointer to list of active threads. */ /** Sets pointer to list of active threads. */
void setActiveThreads(std::list<unsigned> *at_ptr); void setActiveThreads(std::list<unsigned> *at_ptr);
void switchOut();
void takeOverFrom();
/** Ticks decode, processing all input signals and decoding as many /** Ticks decode, processing all input signals and decoding as many
* instructions as possible. * instructions as possible.
*/ */
@ -272,6 +275,8 @@ class DefaultDecode
Stats::Scalar<> decodeUnblockCycles; Stats::Scalar<> decodeUnblockCycles;
/** Stat for total number of squashing cycles. */ /** Stat for total number of squashing cycles. */
Stats::Scalar<> decodeSquashCycles; Stats::Scalar<> decodeSquashCycles;
/** Stat for number of times a branch is resolved at decode. */
Stats::Scalar<> decodeBranchResolved;
/** Stat for number of times a branch mispredict is detected. */ /** Stat for number of times a branch mispredict is detected. */
Stats::Scalar<> decodeBranchMispred; Stats::Scalar<> decodeBranchMispred;
/** Stat for number of times decode detected a non-control instruction /** Stat for number of times decode detected a non-control instruction

View file

@ -66,40 +66,44 @@ void
DefaultDecode<Impl>::regStats() DefaultDecode<Impl>::regStats()
{ {
decodeIdleCycles decodeIdleCycles
.name(name() + ".decodeIdleCycles") .name(name() + ".DECODE:IdleCycles")
.desc("Number of cycles decode is idle") .desc("Number of cycles decode is idle")
.prereq(decodeIdleCycles); .prereq(decodeIdleCycles);
decodeBlockedCycles decodeBlockedCycles
.name(name() + ".decodeBlockedCycles") .name(name() + ".DECODE:BlockedCycles")
.desc("Number of cycles decode is blocked") .desc("Number of cycles decode is blocked")
.prereq(decodeBlockedCycles); .prereq(decodeBlockedCycles);
decodeRunCycles decodeRunCycles
.name(name() + ".decodeRunCycles") .name(name() + ".DECODE:RunCycles")
.desc("Number of cycles decode is running") .desc("Number of cycles decode is running")
.prereq(decodeRunCycles); .prereq(decodeRunCycles);
decodeUnblockCycles decodeUnblockCycles
.name(name() + ".decodeUnblockCycles") .name(name() + ".DECODE:UnblockCycles")
.desc("Number of cycles decode is unblocking") .desc("Number of cycles decode is unblocking")
.prereq(decodeUnblockCycles); .prereq(decodeUnblockCycles);
decodeSquashCycles decodeSquashCycles
.name(name() + ".decodeSquashCycles") .name(name() + ".DECODE:SquashCycles")
.desc("Number of cycles decode is squashing") .desc("Number of cycles decode is squashing")
.prereq(decodeSquashCycles); .prereq(decodeSquashCycles);
decodeBranchResolved
.name(name() + ".DECODE:BranchResolved")
.desc("Number of times decode resolved a branch")
.prereq(decodeBranchResolved);
decodeBranchMispred decodeBranchMispred
.name(name() + ".decodeBranchMispred") .name(name() + ".DECODE:BranchMispred")
.desc("Number of times decode detected a branch misprediction") .desc("Number of times decode detected a branch misprediction")
.prereq(decodeBranchMispred); .prereq(decodeBranchMispred);
decodeControlMispred decodeControlMispred
.name(name() + ".decodeControlMispred") .name(name() + ".DECODE:ControlMispred")
.desc("Number of times decode detected an instruction incorrectly" .desc("Number of times decode detected an instruction incorrectly"
" predicted as a control") " predicted as a control")
.prereq(decodeControlMispred); .prereq(decodeControlMispred);
decodeDecodedInsts decodeDecodedInsts
.name(name() + ".decodeDecodedInsts") .name(name() + ".DECODE:DecodedInsts")
.desc("Number of instructions handled by decode") .desc("Number of instructions handled by decode")
.prereq(decodeDecodedInsts); .prereq(decodeDecodedInsts);
decodeSquashedInsts decodeSquashedInsts
.name(name() + ".decodeSquashedInsts") .name(name() + ".DECODE:SquashedInsts")
.desc("Number of squashed instructions handled by decode") .desc("Number of squashed instructions handled by decode")
.prereq(decodeSquashedInsts); .prereq(decodeSquashedInsts);
} }
@ -158,6 +162,33 @@ DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr)
activeThreads = at_ptr; activeThreads = at_ptr;
} }
template <class Impl>
void
DefaultDecode<Impl>::switchOut()
{
}
template <class Impl>
void
DefaultDecode<Impl>::takeOverFrom()
{
_status = Inactive;
for (int i = 0; i < numThreads; ++i) {
decodeStatus[i] = Idle;
stalls[i].rename = false;
stalls[i].iew = false;
stalls[i].commit = false;
while (!insts[i].empty())
insts[i].pop();
while (!skidBuffer[i].empty())
skidBuffer[i].pop();
branchCount[i] = 0;
}
wroteToTimeBuffer = false;
}
template<class Impl> template<class Impl>
bool bool
DefaultDecode<Impl>::checkStall(unsigned tid) const DefaultDecode<Impl>::checkStall(unsigned tid) const
@ -680,6 +711,7 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
// Go ahead and compute any PC-relative branches. // Go ahead and compute any PC-relative branches.
if (inst->isDirectCtrl() && inst->isUncondCtrl()) { if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
++decodeBranchResolved;
inst->setNextPC(inst->branchTarget()); inst->setNextPC(inst->branchTarget());
if (inst->mispredicted()) { if (inst->mispredicted()) {

View file

@ -35,6 +35,8 @@
#include "mem/mem_interface.hh" #include "mem/mem_interface.hh"
#include "sim/eventq.hh" #include "sim/eventq.hh"
class Sampler;
/** /**
* DefaultFetch class handles both single threaded and SMT fetch. Its width is * DefaultFetch class handles both single threaded and SMT fetch. Its width is
* specified by the parameters; each cycle it tries to fetch that many * specified by the parameters; each cycle it tries to fetch that many
@ -81,6 +83,7 @@ class DefaultFetch
Fetching, Fetching,
TrapPending, TrapPending,
QuiescePending, QuiescePending,
SwitchOut,
IcacheMissStall, IcacheMissStall,
IcacheMissComplete IcacheMissComplete
}; };
@ -160,6 +163,12 @@ class DefaultFetch
/** Processes cache completion event. */ /** Processes cache completion event. */
void processCacheCompletion(MemReqPtr &req); void processCacheCompletion(MemReqPtr &req);
void switchOut();
void takeOverFrom();
bool isSwitchedOut() { return switchedOut; }
void wakeFromQuiesce(); void wakeFromQuiesce();
private: private:
@ -360,6 +369,8 @@ class DefaultFetch
bool interruptPending; bool interruptPending;
bool switchedOut;
#if !FULL_SYSTEM #if !FULL_SYSTEM
/** Page table pointer. */ /** Page table pointer. */
// PageTable *pTable; // PageTable *pTable;
@ -382,6 +393,8 @@ class DefaultFetch
*/ */
Stats::Scalar<> fetchIdleCycles; Stats::Scalar<> fetchIdleCycles;
Stats::Scalar<> fetchBlockedCycles; Stats::Scalar<> fetchBlockedCycles;
Stats::Scalar<> fetchMiscStallCycles;
/** Stat for total number of fetched cache lines. */ /** Stat for total number of fetched cache lines. */
Stats::Scalar<> fetchedCacheLines; Stats::Scalar<> fetchedCacheLines;

View file

@ -169,53 +169,59 @@ void
DefaultFetch<Impl>::regStats() DefaultFetch<Impl>::regStats()
{ {
icacheStallCycles icacheStallCycles
.name(name() + ".icacheStallCycles") .name(name() + ".FETCH:icacheStallCycles")
.desc("Number of cycles fetch is stalled on an Icache miss") .desc("Number of cycles fetch is stalled on an Icache miss")
.prereq(icacheStallCycles); .prereq(icacheStallCycles);
fetchedInsts fetchedInsts
.name(name() + ".fetchedInsts") .name(name() + ".FETCH:Insts")
.desc("Number of instructions fetch has processed") .desc("Number of instructions fetch has processed")
.prereq(fetchedInsts); .prereq(fetchedInsts);
fetchedBranches fetchedBranches
.name(name() + ".fetchedBranches") .name(name() + ".FETCH:Branches")
.desc("Number of branches that fetch encountered") .desc("Number of branches that fetch encountered")
.prereq(fetchedBranches); .prereq(fetchedBranches);
predictedBranches predictedBranches
.name(name() + ".predictedBranches") .name(name() + ".FETCH:predictedBranches")
.desc("Number of branches that fetch has predicted taken") .desc("Number of branches that fetch has predicted taken")
.prereq(predictedBranches); .prereq(predictedBranches);
fetchCycles fetchCycles
.name(name() + ".fetchCycles") .name(name() + ".FETCH:Cycles")
.desc("Number of cycles fetch has run and was not squashing or" .desc("Number of cycles fetch has run and was not squashing or"
" blocked") " blocked")
.prereq(fetchCycles); .prereq(fetchCycles);
fetchSquashCycles fetchSquashCycles
.name(name() + ".fetchSquashCycles") .name(name() + ".FETCH:SquashCycles")
.desc("Number of cycles fetch has spent squashing") .desc("Number of cycles fetch has spent squashing")
.prereq(fetchSquashCycles); .prereq(fetchSquashCycles);
fetchIdleCycles fetchIdleCycles
.name(name() + ".fetchIdleCycles") .name(name() + ".FETCH:IdleCycles")
.desc("Number of cycles fetch was idle") .desc("Number of cycles fetch was idle")
.prereq(fetchIdleCycles); .prereq(fetchIdleCycles);
fetchBlockedCycles fetchBlockedCycles
.name(name() + ".fetchBlockedCycles") .name(name() + ".FETCH:BlockedCycles")
.desc("Number of cycles fetch has spent blocked") .desc("Number of cycles fetch has spent blocked")
.prereq(fetchBlockedCycles); .prereq(fetchBlockedCycles);
fetchedCacheLines fetchedCacheLines
.name(name() + ".fetchedCacheLines") .name(name() + ".FETCH:CacheLines")
.desc("Number of cache lines fetched") .desc("Number of cache lines fetched")
.prereq(fetchedCacheLines); .prereq(fetchedCacheLines);
fetchMiscStallCycles
.name(name() + ".FETCH:MiscStallCycles")
.desc("Number of cycles fetch has spent waiting on interrupts, or "
"bad addresses, or out of MSHRs")
.prereq(fetchMiscStallCycles);
fetchIcacheSquashes fetchIcacheSquashes
.name(name() + ".fetchIcacheSquashes") .name(name() + ".FETCH:IcacheSquashes")
.desc("Number of outstanding Icache misses that were squashed") .desc("Number of outstanding Icache misses that were squashed")
.prereq(fetchIcacheSquashes); .prereq(fetchIcacheSquashes);
@ -223,24 +229,24 @@ DefaultFetch<Impl>::regStats()
.init(/* base value */ 0, .init(/* base value */ 0,
/* last value */ fetchWidth, /* last value */ fetchWidth,
/* bucket size */ 1) /* bucket size */ 1)
.name(name() + ".rateDist") .name(name() + ".FETCH:rateDist")
.desc("Number of instructions fetched each cycle (Total)") .desc("Number of instructions fetched each cycle (Total)")
.flags(Stats::pdf); .flags(Stats::pdf);
idleRate idleRate
.name(name() + ".idleRate") .name(name() + ".FETCH:idleRate")
.desc("Percent of cycles fetch was idle") .desc("Percent of cycles fetch was idle")
.prereq(idleRate); .prereq(idleRate);
idleRate = fetchIdleCycles * 100 / cpu->numCycles; idleRate = fetchIdleCycles * 100 / cpu->numCycles;
branchRate branchRate
.name(name() + ".branchRate") .name(name() + ".FETCH:branchRate")
.desc("Number of branch fetches per cycle") .desc("Number of branch fetches per cycle")
.flags(Stats::total); .flags(Stats::total);
branchRate = predictedBranches / cpu->numCycles; branchRate = predictedBranches / cpu->numCycles;
fetchRate fetchRate
.name(name() + ".rate") .name(name() + ".FETCH:rate")
.desc("Number of inst fetches per cycle") .desc("Number of inst fetches per cycle")
.flags(Stats::total); .flags(Stats::total);
fetchRate = fetchedInsts / cpu->numCycles; fetchRate = fetchedInsts / cpu->numCycles;
@ -332,7 +338,8 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
// Can keep track of how many cache accesses go unused due to // Can keep track of how many cache accesses go unused due to
// misspeculation here. // misspeculation here.
if (fetchStatus[tid] != IcacheMissStall || if (fetchStatus[tid] != IcacheMissStall ||
req != memReq[tid]) { req != memReq[tid] ||
isSwitchedOut()) {
++fetchIcacheSquashes; ++fetchIcacheSquashes;
return; return;
} }
@ -360,6 +367,35 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
// memReq[tid]->completionEvent = NULL; // memReq[tid]->completionEvent = NULL;
} }
template <class Impl>
void
DefaultFetch<Impl>::switchOut()
{
switchedOut = true;
branchPred.switchOut();
}
template <class Impl>
void
DefaultFetch<Impl>::takeOverFrom()
{
// Reset all state
for (int i = 0; i < Impl::MaxThreads; ++i) {
stalls[i].decode = 0;
stalls[i].rename = 0;
stalls[i].iew = 0;
stalls[i].commit = 0;
PC[i] = cpu->readPC(i);
nextPC[i] = cpu->readNextPC(i);
fetchStatus[i] = Running;
}
numInst = 0;
wroteToTimeBuffer = false;
_status = Inactive;
switchedOut = false;
branchPred.takeOverFrom();
}
template <class Impl> template <class Impl>
void void
DefaultFetch<Impl>::wakeFromQuiesce() DefaultFetch<Impl>::wakeFromQuiesce()
@ -902,8 +938,10 @@ DefaultFetch<Impl>::fetch(bool &status_change)
tid, fetch_PC); tid, fetch_PC);
bool fetch_success = fetchCacheLine(fetch_PC, fault, tid); bool fetch_success = fetchCacheLine(fetch_PC, fault, tid);
if (!fetch_success) if (!fetch_success) {
++fetchMiscStallCycles;
return; return;
}
} else { } else {
if (fetchStatus[tid] == Idle) { if (fetchStatus[tid] == Idle) {
++fetchIdleCycles; ++fetchIdleCycles;

View file

@ -242,6 +242,20 @@ FUPool::dump()
} }
} }
void
FUPool::switchOut()
{
}
void
FUPool::takeOverFrom()
{
for (int i = 0; i < numFU; i++) {
unitBusy[i] = false;
}
unitsToBeFreed.clear();
}
// //
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////

View file

@ -154,6 +154,9 @@ class FUPool : public SimObject
unsigned getIssueLatency(OpClass capability) { unsigned getIssueLatency(OpClass capability) {
return maxIssueLatencies[capability]; return maxIssueLatencies[capability];
} }
void switchOut();
void takeOverFrom();
}; };
#endif // __CPU_O3_FU_POOL_HH__ #endif // __CPU_O3_FU_POOL_HH__

View file

@ -157,6 +157,12 @@ class DefaultIEW
/** Sets pointer to the scoreboard. */ /** Sets pointer to the scoreboard. */
void setScoreboard(Scoreboard *sb_ptr); void setScoreboard(Scoreboard *sb_ptr);
void switchOut();
void takeOverFrom();
bool isSwitchedOut() { return switchedOut; }
/** Sets page table pointer within LSQ. */ /** Sets page table pointer within LSQ. */
// void setPageTable(PageTable *pt_ptr); // void setPageTable(PageTable *pt_ptr);
@ -420,6 +426,8 @@ class DefaultIEW
/** Maximum size of the skid buffer. */ /** Maximum size of the skid buffer. */
unsigned skidBufferMax; unsigned skidBufferMax;
bool switchedOut;
/** Stat for total number of idle cycles. */ /** Stat for total number of idle cycles. */
Stats::Scalar<> iewIdleCycles; Stats::Scalar<> iewIdleCycles;
/** Stat for total number of squashing cycles. */ /** Stat for total number of squashing cycles. */

View file

@ -55,13 +55,13 @@ DefaultIEW<Impl>::LdWritebackEvent::process()
//iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
iewStage->wakeCPU(); if (inst->isSquashed() || iewStage->isSwitchedOut()) {
if (inst->isSquashed()) {
inst = NULL; inst = NULL;
return; return;
} }
iewStage->wakeCPU();
if (!inst->isExecuted()) { if (!inst->isExecuted()) {
inst->setExecuted(); inst->setExecuted();
@ -101,7 +101,8 @@ DefaultIEW<Impl>::DefaultIEW(Params *params)
issueReadWidth(params->issueWidth), issueReadWidth(params->issueWidth),
issueWidth(params->issueWidth), issueWidth(params->issueWidth),
executeWidth(params->executeWidth), executeWidth(params->executeWidth),
numThreads(params->numberOfThreads) numThreads(params->numberOfThreads),
switchedOut(false)
{ {
DPRINTF(IEW, "executeIntWidth: %i.\n", params->executeIntWidth); DPRINTF(IEW, "executeIntWidth: %i.\n", params->executeIntWidth);
_status = Active; _status = Active;
@ -436,6 +437,53 @@ DefaultIEW<Impl>::setPageTable(PageTable *pt_ptr)
} }
#endif #endif
template <class Impl>
void
DefaultIEW<Impl>::switchOut()
{
switchedOut = true;
instQueue.switchOut();
ldstQueue.switchOut();
fuPool->switchOut();
for (int i = 0; i < numThreads; i++) {
while (!insts[i].empty())
insts[i].pop();
while (!skidBuffer[i].empty())
skidBuffer[i].pop();
}
}
template <class Impl>
void
DefaultIEW<Impl>::takeOverFrom()
{
_status = Active;
exeStatus = Running;
wbStatus = Idle;
switchedOut = false;
instQueue.takeOverFrom();
ldstQueue.takeOverFrom();
fuPool->takeOverFrom();
initStage();
cpu->activityThisCycle();
for (int i=0; i < numThreads; i++) {
dispatchStatus[i] = Running;
stalls[i].commit = false;
fetchRedirect[i] = false;
}
updateLSQNextCycle = false;
// @todo: Fix hardcoded number
for (int i = 0; i < 6; ++i) {
issueToExecQueue.advance();
}
}
template<class Impl> template<class Impl>
void void
DefaultIEW<Impl>::squash(unsigned tid) DefaultIEW<Impl>::squash(unsigned tid)

View file

@ -112,6 +112,10 @@ class InstructionQueue
/** Registers statistics. */ /** Registers statistics. */
void regStats(); void regStats();
void resetState();
void resetDependencyGraph();
/** Sets CPU pointer. */ /** Sets CPU pointer. */
void setCPU(FullCPU *_cpu) { cpu = _cpu; } void setCPU(FullCPU *_cpu) { cpu = _cpu; }
@ -127,6 +131,12 @@ class InstructionQueue
/** Sets the global time buffer. */ /** Sets the global time buffer. */
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
void switchOut();
void takeOverFrom();
bool isSwitchedOut() { return switchedOut; }
/** Number of entries needed for given amount of threads. */ /** Number of entries needed for given amount of threads. */
int entryAmount(int num_threads); int entryAmount(int num_threads);
@ -385,6 +395,8 @@ class InstructionQueue
*/ */
unsigned commitToIEWDelay; unsigned commitToIEWDelay;
bool switchedOut;
////////////////////////////////// //////////////////////////////////
// Variables needed for squashing // Variables needed for squashing
////////////////////////////////// //////////////////////////////////
@ -507,7 +519,7 @@ class InstructionQueue
Stats::Scalar<> iqSquashedNonSpecRemoved; Stats::Scalar<> iqSquashedNonSpecRemoved;
Stats::VectorDistribution<> queue_res_dist; Stats::VectorDistribution<> queue_res_dist;
Stats::Vector<> n_issued_dist; Stats::Distribution<> n_issued_dist;
Stats::VectorDistribution<> issue_delay_dist; Stats::VectorDistribution<> issue_delay_dist;
Stats::Vector<> stat_fu_busy; Stats::Vector<> stat_fu_busy;

View file

@ -82,16 +82,10 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
{ {
assert(fuPool); assert(fuPool);
switchedOut = false;
numThreads = params->numberOfThreads; numThreads = params->numberOfThreads;
//Initialize thread IQ counts
for (int i = 0; i <numThreads; i++) {
count[i] = 0;
}
// Initialize the number of free IQ entries.
freeEntries = numEntries;
// Set the number of physical registers as the number of int + float // Set the number of physical registers as the number of int + float
numPhysRegs = numPhysIntRegs + numPhysFloatRegs; numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
@ -101,6 +95,13 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
//dependency graph. //dependency graph.
dependGraph = new DependencyEntry[numPhysRegs]; dependGraph = new DependencyEntry[numPhysRegs];
// Initialize all the head pointers to point to NULL, and all the
// entries as unready.
for (int i = 0; i < numPhysRegs; ++i) {
dependGraph[i].next = NULL;
dependGraph[i].inst = NULL;
}
// Resize the register scoreboard. // Resize the register scoreboard.
regScoreboard.resize(numPhysRegs); regScoreboard.resize(numPhysRegs);
@ -110,27 +111,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
memDepUnit[i].setIQ(this); memDepUnit[i].setIQ(this);
} }
// Initialize all the head pointers to point to NULL, and all the resetState();
// entries as unready.
// Note that in actuality, the registers corresponding to the logical
// registers start off as ready. However this doesn't matter for the
// IQ as the instruction should have been correctly told if those
// registers are ready in rename. Thus it can all be initialized as
// unready.
for (int i = 0; i < numPhysRegs; ++i) {
dependGraph[i].next = NULL;
dependGraph[i].inst = NULL;
regScoreboard[i] = false;
}
for (int i = 0; i < numThreads; ++i) {
squashedSeqNum[i] = 0;
}
for (int i = 0; i < Num_OpClasses; ++i) {
queueOnList[i] = false;
readyIt[i] = listOrder.end();
}
string policy = params->smtIQPolicy; string policy = params->smtIQPolicy;
@ -184,30 +165,7 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
template <class Impl> template <class Impl>
InstructionQueue<Impl>::~InstructionQueue() InstructionQueue<Impl>::~InstructionQueue()
{ {
// Clear the dependency graph resetDependencyGraph();
DependencyEntry *curr;
DependencyEntry *prev;
for (int i = 0; i < numPhysRegs; ++i) {
curr = dependGraph[i].next;
while (curr) {
DependencyEntry::mem_alloc_counter--;
prev = curr;
curr = prev->next;
prev->inst = NULL;
delete prev;
}
if (dependGraph[i].inst) {
dependGraph[i].inst = NULL;
}
dependGraph[i].next = NULL;
}
assert(DependencyEntry::mem_alloc_counter == 0); assert(DependencyEntry::mem_alloc_counter == 0);
delete [] dependGraph; delete [] dependGraph;
@ -307,10 +265,10 @@ InstructionQueue<Impl>::regStats()
queue_res_dist.subname(i, opClassStrings[i]); queue_res_dist.subname(i, opClassStrings[i]);
} }
n_issued_dist n_issued_dist
.init(totalWidth + 1) .init(0,totalWidth,1)
.name(name() + ".ISSUE:issued_per_cycle") .name(name() + ".ISSUE:issued_per_cycle")
.desc("Number of insts issued each cycle") .desc("Number of insts issued each cycle")
.flags(total | pdf | dist) .flags(pdf)
; ;
/* /*
dist_unissued dist_unissued
@ -400,6 +358,71 @@ InstructionQueue<Impl>::regStats()
} }
} }
template <class Impl>
void
InstructionQueue<Impl>::resetState()
{
//Initialize thread IQ counts
for (int i = 0; i <numThreads; i++) {
count[i] = 0;
instList[i].clear();
}
// Initialize the number of free IQ entries.
freeEntries = numEntries;
// Note that in actuality, the registers corresponding to the logical
// registers start off as ready. However this doesn't matter for the
// IQ as the instruction should have been correctly told if those
// registers are ready in rename. Thus it can all be initialized as
// unready.
for (int i = 0; i < numPhysRegs; ++i) {
regScoreboard[i] = false;
}
for (int i = 0; i < numThreads; ++i) {
squashedSeqNum[i] = 0;
}
for (int i = 0; i < Num_OpClasses; ++i) {
while (!readyInsts[i].empty())
readyInsts[i].pop();
queueOnList[i] = false;
readyIt[i] = listOrder.end();
}
nonSpecInsts.clear();
listOrder.clear();
}
template <class Impl>
void
InstructionQueue<Impl>::resetDependencyGraph()
{
// Clear the dependency graph
DependencyEntry *curr;
DependencyEntry *prev;
for (int i = 0; i < numPhysRegs; ++i) {
curr = dependGraph[i].next;
while (curr) {
DependencyEntry::mem_alloc_counter--;
prev = curr;
curr = prev->next;
prev->inst = NULL;
delete prev;
}
if (dependGraph[i].inst) {
dependGraph[i].inst = NULL;
}
dependGraph[i].next = NULL;
}
}
template <class Impl> template <class Impl>
void void
InstructionQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr) InstructionQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr)
@ -426,6 +449,25 @@ InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
fromCommit = timeBuffer->getWire(-commitToIEWDelay); fromCommit = timeBuffer->getWire(-commitToIEWDelay);
} }
template <class Impl>
void
InstructionQueue<Impl>::switchOut()
{
resetState();
resetDependencyGraph();
switchedOut = true;
for (int i = 0; i < numThreads; ++i) {
memDepUnit[i].switchOut();
}
}
template <class Impl>
void
InstructionQueue<Impl>::takeOverFrom()
{
switchedOut = false;
}
template <class Impl> template <class Impl>
int int
InstructionQueue<Impl>::entryAmount(int num_threads) InstructionQueue<Impl>::entryAmount(int num_threads)
@ -685,6 +727,10 @@ InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
{ {
// The CPU could have been sleeping until this op completed (*extremely* // The CPU could have been sleeping until this op completed (*extremely*
// long latency op). Wake it if it was. This may be overkill. // long latency op). Wake it if it was. This may be overkill.
if (isSwitchedOut()) {
return;
}
iewStage->wakeCPU(); iewStage->wakeCPU();
fuPool->freeUnit(fu_idx); fuPool->freeUnit(fu_idx);
@ -816,7 +862,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
FUCompletion *execution = new FUCompletion(issuing_inst, FUCompletion *execution = new FUCompletion(issuing_inst,
idx, this); idx, this);
execution->schedule(curTick + issue_latency - 1); execution->schedule(curTick + cpu->cycles(issue_latency - 1));
} else { } else {
i2e_info->insts[exec_queue_slot++] = issuing_inst; i2e_info->insts[exec_queue_slot++] = issuing_inst;
i2e_info->size++; i2e_info->size++;
@ -862,6 +908,8 @@ InstructionQueue<Impl>::scheduleReadyInsts()
} }
} }
n_issued_dist.sample(total_issued);
if (total_issued) { if (total_issued) {
cpu->activityThisCycle(); cpu->activityThisCycle();
} else { } else {

View file

@ -71,6 +71,9 @@ class LSQ {
/** Sets the page table pointer. */ /** Sets the page table pointer. */
// void setPageTable(PageTable *pt_ptr); // void setPageTable(PageTable *pt_ptr);
void switchOut();
void takeOverFrom();
/** Number of entries needed for the given amount of threads.*/ /** Number of entries needed for the given amount of threads.*/
int entryAmount(int num_threads); int entryAmount(int num_threads);
void removeEntries(unsigned tid); void removeEntries(unsigned tid);
@ -271,15 +274,6 @@ class LSQ {
/** Max SQ Size - Used to Enforce Sharing Policies. */ /** Max SQ Size - Used to Enforce Sharing Policies. */
unsigned maxSQEntries; unsigned maxSQEntries;
/** Global Load Count. */
int loads;
/** Global Store Count */
int stores;
/** Global Store To WB Count */
int storesToWB;
/** Number of Threads. */ /** Number of Threads. */
unsigned numThreads; unsigned numThreads;
}; };

View file

@ -33,7 +33,6 @@ using namespace std;
template <class Impl> template <class Impl>
LSQ<Impl>::LSQ(Params *params) LSQ<Impl>::LSQ(Params *params)
: LQEntries(params->LQEntries), SQEntries(params->SQEntries), : LQEntries(params->LQEntries), SQEntries(params->SQEntries),
loads(0), stores(0), storesToWB(0),
numThreads(params->numberOfThreads) numThreads(params->numberOfThreads)
{ {
DPRINTF(LSQ, "Creating LSQ object.\n"); DPRINTF(LSQ, "Creating LSQ object.\n");
@ -143,6 +142,24 @@ LSQ<Impl>::setPageTable(PageTable *pt_ptr)
} }
#endif #endif
template <class Impl>
void
LSQ<Impl>::switchOut()
{
for (int tid = 0; tid < numThreads; tid++) {
thread[tid].switchOut();
}
}
template <class Impl>
void
LSQ<Impl>::takeOverFrom()
{
for (int tid = 0; tid < numThreads; tid++) {
thread[tid].takeOverFrom();
}
}
template <class Impl> template <class Impl>
int int
LSQ<Impl>::entryAmount(int num_threads) LSQ<Impl>::entryAmount(int num_threads)

View file

@ -38,6 +38,7 @@
#include "cpu/inst_seq.hh" #include "cpu/inst_seq.hh"
#include "mem/mem_interface.hh" #include "mem/mem_interface.hh"
//#include "mem/page_table.hh" //#include "mem/page_table.hh"
#include "sim/debug.hh"
#include "sim/sim_object.hh" #include "sim/sim_object.hh"
#include "arch/faults.hh" #include "arch/faults.hh"
@ -110,6 +111,12 @@ class LSQUnit {
/** Sets the page table pointer. */ /** Sets the page table pointer. */
// void setPageTable(PageTable *pt_ptr); // void setPageTable(PageTable *pt_ptr);
void switchOut();
void takeOverFrom();
bool isSwitchedOut() { return switchedOut; }
/** Ticks the LSQ unit, which in this case only resets the number of /** Ticks the LSQ unit, which in this case only resets the number of
* used cache ports. * used cache ports.
* @todo: Move the number of used ports up to the LSQ level so it can * @todo: Move the number of used ports up to the LSQ level so it can
@ -278,20 +285,20 @@ class LSQUnit {
/** Whether or not the store is completed. */ /** Whether or not the store is completed. */
bool completed; bool completed;
}; };
/*
enum Status { enum Status {
Running, Running,
Idle, Idle,
DcacheMissStall, DcacheMissStall,
DcacheMissSwitch DcacheMissSwitch
}; };
*/
private: private:
/** The LSQUnit thread id. */ /** The LSQUnit thread id. */
unsigned lsqID; unsigned lsqID;
/** The status of the LSQ unit. */ /** The status of the LSQ unit. */
Status _status; // Status _status;
/** The store queue. */ /** The store queue. */
std::vector<SQEntry> storeQueue; std::vector<SQEntry> storeQueue;
@ -335,6 +342,8 @@ class LSQUnit {
/** The number of used cache ports in this cycle. */ /** The number of used cache ports in this cycle. */
int usedPorts; int usedPorts;
bool switchedOut;
//list<InstSeqNum> mshrSeqNums; //list<InstSeqNum> mshrSeqNums;
//Stats::Scalar<> dcacheStallCycles; //Stats::Scalar<> dcacheStallCycles;
@ -373,7 +382,25 @@ class LSQUnit {
// Will also need how many read/write ports the Dcache has. Or keep track // Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store // of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times. // the appropriate number of times.
/*
// total number of loads forwaded from LSQ stores
Stats::Vector<> lsq_forw_loads;
// total number of loads ignored due to invalid addresses
Stats::Vector<> inv_addr_loads;
// total number of software prefetches ignored due to invalid addresses
Stats::Vector<> inv_addr_swpfs;
// total non-speculative bogus addresses seen (debug var)
Counter sim_invalid_addrs;
Stats::Vector<> fu_busy; //cumulative fu busy
// ready loads blocked due to memory disambiguation
Stats::Vector<> lsq_blocked_loads;
Stats::Scalar<> lsqInversion;
*/
public: public:
/** Executes the load at the given index. */ /** Executes the load at the given index. */
template <class T> template <class T>
@ -590,7 +617,12 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
} }
DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
loadQueue[load_idx]->readPC()); loadQueue[load_idx]->readPC());
/*
Addr debug_addr = ULL(0xfffffc0000be81a8);
if (req->vaddr == debug_addr) {
debug_break();
}
*/
assert(!req->completionEvent); assert(!req->completionEvent);
req->completionEvent = req->completionEvent =
new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage); new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage);
@ -608,7 +640,7 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
lastDcacheStall = curTick; lastDcacheStall = curTick;
_status = DcacheMissStall; // _status = DcacheMissStall;
} else { } else {
DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
@ -694,7 +726,12 @@ LSQUnit<Impl>::write(MemReqPtr &req, T &data, int store_idx)
storeQueue[store_idx].req = req; storeQueue[store_idx].req = req;
storeQueue[store_idx].size = sizeof(T); storeQueue[store_idx].size = sizeof(T);
storeQueue[store_idx].data = data; storeQueue[store_idx].data = data;
/*
Addr debug_addr = ULL(0xfffffc0000be81a8);
if (req->vaddr == debug_addr) {
debug_break();
}
*/
// This function only writes the data to the store queue, so no fault // This function only writes the data to the store queue, so no fault
// can happen here. // can happen here.
return NoFault; return NoFault;

View file

@ -50,6 +50,9 @@ LSQUnit<Impl>::StoreCompletionEvent::process()
//lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
if (lsqPtr->isSwitchedOut())
return;
lsqPtr->cpu->wakeCPU(); lsqPtr->cpu->wakeCPU();
if (wbEvent) if (wbEvent)
wbEvent->process(); wbEvent->process();
@ -78,6 +81,8 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
{ {
DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
switchedOut = false;
lsqID = id; lsqID = id;
LQEntries = maxLQEntries; LQEntries = maxLQEntries;
@ -138,6 +143,89 @@ LSQUnit<Impl>::setPageTable(PageTable *pt_ptr)
} }
#endif #endif
template<class Impl>
void
LSQUnit<Impl>::switchOut()
{
switchedOut = true;
for (int i = 0; i < loadQueue.size(); ++i)
loadQueue[i] = NULL;
while (storesToWB > 0 &&
storeWBIdx != storeTail &&
storeQueue[storeWBIdx].inst &&
storeQueue[storeWBIdx].canWB) {
if (storeQueue[storeWBIdx].size == 0 ||
storeQueue[storeWBIdx].inst->isDataPrefetch() ||
storeQueue[storeWBIdx].committed ||
storeQueue[storeWBIdx].req->flags & LOCKED) {
incrStIdx(storeWBIdx);
continue;
}
assert(storeQueue[storeWBIdx].req);
assert(!storeQueue[storeWBIdx].committed);
MemReqPtr req = storeQueue[storeWBIdx].req;
storeQueue[storeWBIdx].committed = true;
req->cmd = Write;
req->completionEvent = NULL;
req->time = curTick;
assert(!req->data);
req->data = new uint8_t[64];
memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size);
DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
"to Addr:%#x, data:%#x [sn:%lli]\n",
storeWBIdx,storeQueue[storeWBIdx].inst->readPC(),
req->paddr, *(req->data),
storeQueue[storeWBIdx].inst->seqNum);
switch(storeQueue[storeWBIdx].size) {
case 1:
cpu->write(req, (uint8_t &)storeQueue[storeWBIdx].data);
break;
case 2:
cpu->write(req, (uint16_t &)storeQueue[storeWBIdx].data);
break;
case 4:
cpu->write(req, (uint32_t &)storeQueue[storeWBIdx].data);
break;
case 8:
cpu->write(req, (uint64_t &)storeQueue[storeWBIdx].data);
break;
default:
panic("Unexpected store size!\n");
}
incrStIdx(storeWBIdx);
}
}
template<class Impl>
void
LSQUnit<Impl>::takeOverFrom()
{
switchedOut = false;
loads = stores = storesToWB = 0;
loadHead = loadTail = 0;
storeHead = storeWBIdx = storeTail = 0;
usedPorts = 0;
loadFaultInst = storeFaultInst = memDepViolator = NULL;
blockedLoadSeqNum = 0;
stalled = false;
isLoadBlocked = false;
loadBlockedHandled = false;
}
template<class Impl> template<class Impl>
void void
LSQUnit<Impl>::resizeLQ(unsigned size) LSQUnit<Impl>::resizeLQ(unsigned size)
@ -647,7 +735,7 @@ LSQUnit<Impl>::writebackStores()
lastDcacheStall = curTick; lastDcacheStall = curTick;
_status = DcacheMissStall; // _status = DcacheMissStall;
//mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);

View file

@ -84,6 +84,10 @@ class MemDepUnit {
/** Registers statistics. */ /** Registers statistics. */
void regStats(); void regStats();
void switchOut();
void takeOverFrom();
/** Sets the pointer to the IQ. */ /** Sets the pointer to the IQ. */
void setIQ(InstructionQueue<Impl> *iq_ptr); void setIQ(InstructionQueue<Impl> *iq_ptr);

View file

@ -101,6 +101,26 @@ MemDepUnit<MemDepPred, Impl>::regStats()
.desc("Number of conflicting stores."); .desc("Number of conflicting stores.");
} }
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::switchOut()
{
for (int i = 0; i < Impl::MaxThreads; ++i) {
instList[i].clear();
}
instsToReplay.clear();
memDepHash.clear();
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::takeOverFrom()
{
loadBarrier = storeBarrier = false;
loadBarrierSN = storeBarrierSN = 0;
depPred.clear();
}
template <class MemDepPred, class Impl> template <class MemDepPred, class Impl>
void void
MemDepUnit<MemDepPred, Impl>::setIQ(InstructionQueue<Impl> *iq_ptr) MemDepUnit<MemDepPred, Impl>::setIQ(InstructionQueue<Impl> *iq_ptr)

View file

@ -41,6 +41,15 @@ ReturnAddrStack::init(unsigned _numEntries)
addrStack[i] = 0; addrStack[i] = 0;
} }
void
ReturnAddrStack::reset()
{
usedEntries = 0;
tos = 0;
for (int i = 0; i < numEntries; ++i)
addrStack[i] = 0;
}
void void
ReturnAddrStack::push(const Addr &return_addr) ReturnAddrStack::push(const Addr &return_addr)
{ {

View file

@ -47,6 +47,8 @@ class ReturnAddrStack
*/ */
void init(unsigned numEntries); void init(unsigned numEntries);
void reset();
/** Returns the top address on the RAS. */ /** Returns the top address on the RAS. */
Addr top() Addr top()
{ return addrStack[tos]; } { return addrStack[tos]; }

View file

@ -153,6 +153,10 @@ class DefaultRename
/** Sets pointer to the scoreboard. */ /** Sets pointer to the scoreboard. */
void setScoreboard(Scoreboard *_scoreboard); void setScoreboard(Scoreboard *_scoreboard);
void switchOut();
void takeOverFrom();
/** Squashes all instructions in a thread. */ /** Squashes all instructions in a thread. */
void squash(unsigned tid); void squash(unsigned tid);
@ -448,6 +452,7 @@ class DefaultRename
Stats::Scalar<> renameUndoneMaps; Stats::Scalar<> renameUndoneMaps;
Stats::Scalar<> renamedSerializing; Stats::Scalar<> renamedSerializing;
Stats::Scalar<> renamedTempSerializing; Stats::Scalar<> renamedTempSerializing;
Stats::Scalar<> renameSkidInsts;
}; };
#endif // __CPU_O3_RENAME_HH__ #endif // __CPU_O3_RENAME_HH__

View file

@ -151,6 +151,11 @@ DefaultRename<Impl>::regStats()
.desc("count of temporary serializing insts renamed") .desc("count of temporary serializing insts renamed")
.flags(Stats::total) .flags(Stats::total)
; ;
renameSkidInsts
.name(name() + ".RENAME:skidInsts")
.desc("count of insts added to the skid buffer")
.flags(Stats::total)
;
} }
template <class Impl> template <class Impl>
@ -213,8 +218,8 @@ DefaultRename<Impl>::initStage()
// Clear these pointers so they are not accidentally used in // Clear these pointers so they are not accidentally used in
// non-initialization code. // non-initialization code.
iew_ptr = NULL; // iew_ptr = NULL;
commit_ptr = NULL; // commit_ptr = NULL;
} }
template<class Impl> template<class Impl>
@ -253,6 +258,55 @@ DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard)
scoreboard = _scoreboard; scoreboard = _scoreboard;
} }
template <class Impl>
void
DefaultRename<Impl>::switchOut()
{
for (int i = 0; i < numThreads; i++) {
typename list<RenameHistory>::iterator hb_it = historyBuffer[i].begin();
while (!historyBuffer[i].empty()) {
assert(hb_it != historyBuffer[i].end());
DPRINTF(Rename, "[tid:%u]: Removing history entry with sequence "
"number %i.\n", i, (*hb_it).instSeqNum);
// Tell the rename map to set the architected register to the
// previous physical register that it was renamed to.
renameMap[i]->setEntry(hb_it->archReg, hb_it->prevPhysReg);
// Put the renamed physical register back on the free list.
freeList->addReg(hb_it->newPhysReg);
historyBuffer[i].erase(hb_it++);
}
insts[i].clear();
skidBuffer[i].clear();
}
}
template <class Impl>
void
DefaultRename<Impl>::takeOverFrom()
{
_status = Inactive;
initStage();
for (int i=0; i< numThreads; i++) {
renameStatus[i] = Idle;
stalls[i].iew = false;
stalls[i].commit = false;
serializeInst[i] = NULL;
instsInProgress[i] = 0;
emptyROB[i] = true;
serializeOnNextInst[i] = false;
}
}
template <class Impl> template <class Impl>
void void
DefaultRename<Impl>::squash(unsigned tid) DefaultRename<Impl>::squash(unsigned tid)
@ -393,7 +447,7 @@ DefaultRename<Impl>::rename(bool &status_change, unsigned tid)
} else if (renameStatus[tid] == Unblocking) { } else if (renameStatus[tid] == Unblocking) {
renameInsts(tid); renameInsts(tid);
++renameUnblockCycles; // ++renameUnblockCycles;
if (validInsts()) { if (validInsts()) {
// Add the current inputs to the skid buffer so they can be // Add the current inputs to the skid buffer so they can be
@ -564,6 +618,8 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
} else if (inst->isSerializeAfter() && !inst->isSerializeHandled()) { } else if (inst->isSerializeAfter() && !inst->isSerializeHandled()) {
DPRINTF(Rename, "Serialize after instruction encountered.\n"); DPRINTF(Rename, "Serialize after instruction encountered.\n");
renamedSerializing++;
inst->setSerializeHandled(); inst->setSerializeHandled();
serializeAfter(insts_to_rename, tid); serializeAfter(insts_to_rename, tid);
@ -594,13 +650,12 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
// Increment which instruction we're on. // Increment which instruction we're on.
++toIEWIndex; ++toIEWIndex;
++renameRenamedInsts;
// Decrement how many instructions are available. // Decrement how many instructions are available.
--insts_available; --insts_available;
} }
instsInProgress[tid] += renamed_insts; instsInProgress[tid] += renamed_insts;
renameRenamedInsts += renamed_insts;
// If we wrote to the time buffer, record this. // If we wrote to the time buffer, record this.
if (toIEWIndex) { if (toIEWIndex) {
@ -635,6 +690,8 @@ DefaultRename<Impl>::skidInsert(unsigned tid)
DPRINTF(Rename, "[tid:%u]: Inserting [sn:%lli] PC:%#x into Rename " DPRINTF(Rename, "[tid:%u]: Inserting [sn:%lli] PC:%#x into Rename "
"skidBuffer\n", tid, inst->seqNum, inst->readPC()); "skidBuffer\n", tid, inst->seqNum, inst->readPC());
++renameSkidInsts;
skidBuffer[tid].push_back(inst); skidBuffer[tid].push_back(inst);
} }

View file

@ -97,6 +97,10 @@ class ROB
*/ */
void setActiveThreads(std::list<unsigned>* at_ptr); void setActiveThreads(std::list<unsigned>* at_ptr);
void switchOut();
void takeOverFrom();
/** Function to insert an instruction into the ROB. Note that whatever /** Function to insert an instruction into the ROB. Note that whatever
* calls this function must ensure that there is enough space within the * calls this function must ensure that there is enough space within the
* ROB for the new instruction. * ROB for the new instruction.

View file

@ -121,6 +121,31 @@ ROB<Impl>::setActiveThreads(list<unsigned> *at_ptr)
activeThreads = at_ptr; activeThreads = at_ptr;
} }
template <class Impl>
void
ROB<Impl>::switchOut()
{
for (int tid = 0; tid < numThreads; tid++) {
instList[tid].clear();
}
}
template <class Impl>
void
ROB<Impl>::takeOverFrom()
{
for (int tid=0; tid < numThreads; tid++) {
doneSquashing[tid] = true;
threadEntries[tid] = 0;
squashIt[tid] = instList[tid].end();
}
numInstsInROB = 0;
// Initialize the "universal" ROB head & tail point to invalid
// pointers
head = instList[0].end();
tail = instList[0].end();
}
template <class Impl> template <class Impl>
void void

View file

@ -30,17 +30,17 @@
#include "cpu/o3/sat_counter.hh" #include "cpu/o3/sat_counter.hh"
SatCounter::SatCounter() SatCounter::SatCounter()
: maxVal(0), counter(0) : initialVal(0), counter(0)
{ {
} }
SatCounter::SatCounter(unsigned bits) SatCounter::SatCounter(unsigned bits)
: maxVal((1 << bits) - 1), counter(0) : initialVal(0), maxVal((1 << bits) - 1), counter(0)
{ {
} }
SatCounter::SatCounter(unsigned bits, unsigned initial_val) SatCounter::SatCounter(unsigned bits, uint8_t initial_val)
: maxVal((1 << bits) - 1), counter(initial_val) : initialVal(initialVal), maxVal((1 << bits) - 1), counter(initial_val)
{ {
// Check to make sure initial value doesn't exceed the max counter value. // Check to make sure initial value doesn't exceed the max counter value.
if (initial_val > maxVal) { if (initial_val > maxVal) {
@ -53,19 +53,3 @@ SatCounter::setBits(unsigned bits)
{ {
maxVal = (1 << bits) - 1; maxVal = (1 << bits) - 1;
} }
void
SatCounter::increment()
{
if (counter < maxVal) {
++counter;
}
}
void
SatCounter::decrement()
{
if (counter > 0) {
--counter;
}
}

View file

@ -57,22 +57,34 @@ class SatCounter
* @param bits How many bits the counter will have. * @param bits How many bits the counter will have.
* @param initial_val Starting value for each counter. * @param initial_val Starting value for each counter.
*/ */
SatCounter(unsigned bits, unsigned initial_val); SatCounter(unsigned bits, uint8_t initial_val);
/** /**
* Sets the number of bits. * Sets the number of bits.
*/ */
void setBits(unsigned bits); void setBits(unsigned bits);
void reset() { counter = initialVal; }
/** /**
* Increments the counter's current value. * Increments the counter's current value.
*/ */
void increment(); void increment()
{
if (counter < maxVal) {
++counter;
}
}
/** /**
* Decrements the counter's current value. * Decrements the counter's current value.
*/ */
void decrement(); void decrement()
{
if (counter > 0) {
--counter;
}
}
/** /**
* Read the counter's value. * Read the counter's value.
@ -81,6 +93,7 @@ class SatCounter
{ return counter; } { return counter; }
private: private:
uint8_t initialVal;
uint8_t maxVal; uint8_t maxVal;
uint8_t counter; uint8_t counter;
}; };

View file

@ -60,7 +60,7 @@ struct O3ThreadState : public ThreadState {
{ } { }
#else #else
O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
: ThreadState(-1, _thread_num, NULL, _process, _asid), : ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid),
cpu(_cpu), inSyscall(0), trapPending(0) cpu(_cpu), inSyscall(0), trapPending(0)
{ } { }